VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 25348

Last change on this file since 25348 was 24452, checked in by vboxsync, 15 years ago

iprt/asm.h: Added ASMIsAmdCpuEx and ASMIsAmdCpu.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 172.0 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 */
29
30#ifndef ___iprt_asm_h
31#define ___iprt_asm_h
32
33#include <iprt/cdefs.h>
34#include <iprt/types.h>
35#include <iprt/assert.h>
36/** @todo #include <iprt/param.h> for PAGE_SIZE. */
37/** @def RT_INLINE_ASM_USES_INTRIN
38 * Defined as 1 if we're using a _MSC_VER 1400.
39 * Otherwise defined as 0.
40 */
41
42/* Solaris 10 header ugliness */
43#ifdef u
44#undef u
45#endif
46
47#ifdef _MSC_VER
48# if _MSC_VER >= 1400
49# define RT_INLINE_ASM_USES_INTRIN 1
50# include <intrin.h>
51 /* Emit the intrinsics at all optimization levels. */
52# pragma intrinsic(_ReadWriteBarrier)
53# pragma intrinsic(__cpuid)
54# pragma intrinsic(_enable)
55# pragma intrinsic(_disable)
56# pragma intrinsic(__rdtsc)
57# pragma intrinsic(__readmsr)
58# pragma intrinsic(__writemsr)
59# pragma intrinsic(__outbyte)
60# pragma intrinsic(__outbytestring)
61# pragma intrinsic(__outword)
62# pragma intrinsic(__outwordstring)
63# pragma intrinsic(__outdword)
64# pragma intrinsic(__outdwordstring)
65# pragma intrinsic(__inbyte)
66# pragma intrinsic(__inbytestring)
67# pragma intrinsic(__inword)
68# pragma intrinsic(__inwordstring)
69# pragma intrinsic(__indword)
70# pragma intrinsic(__indwordstring)
71# pragma intrinsic(__invlpg)
72# pragma intrinsic(__wbinvd)
73# pragma intrinsic(__stosd)
74# pragma intrinsic(__stosw)
75# pragma intrinsic(__stosb)
76# pragma intrinsic(__readcr0)
77# pragma intrinsic(__readcr2)
78# pragma intrinsic(__readcr3)
79# pragma intrinsic(__readcr4)
80# pragma intrinsic(__writecr0)
81# pragma intrinsic(__writecr3)
82# pragma intrinsic(__writecr4)
83# pragma intrinsic(__readdr)
84# pragma intrinsic(__writedr)
85# pragma intrinsic(_BitScanForward)
86# pragma intrinsic(_BitScanReverse)
87# pragma intrinsic(_bittest)
88# pragma intrinsic(_bittestandset)
89# pragma intrinsic(_bittestandreset)
90# pragma intrinsic(_bittestandcomplement)
91# pragma intrinsic(_byteswap_ushort)
92# pragma intrinsic(_byteswap_ulong)
93# pragma intrinsic(_interlockedbittestandset)
94# pragma intrinsic(_interlockedbittestandreset)
95# pragma intrinsic(_InterlockedAnd)
96# pragma intrinsic(_InterlockedOr)
97# pragma intrinsic(_InterlockedIncrement)
98# pragma intrinsic(_InterlockedDecrement)
99# pragma intrinsic(_InterlockedExchange)
100# pragma intrinsic(_InterlockedExchangeAdd)
101# pragma intrinsic(_InterlockedCompareExchange)
102# pragma intrinsic(_InterlockedCompareExchange64)
103# ifdef RT_ARCH_AMD64
104# pragma intrinsic(_mm_mfence)
105# pragma intrinsic(_mm_sfence)
106# pragma intrinsic(_mm_lfence)
107# pragma intrinsic(__stosq)
108# pragma intrinsic(__readcr8)
109# pragma intrinsic(__writecr8)
110# pragma intrinsic(_byteswap_uint64)
111# pragma intrinsic(_InterlockedExchange64)
112# endif
113# endif
114#endif
115#ifndef RT_INLINE_ASM_USES_INTRIN
116# define RT_INLINE_ASM_USES_INTRIN 0
117#endif
118
119/** @def RT_INLINE_ASM_GCC_4_3_X_X86
120 * Used to work around some 4.3.x register allocation issues in this version of
121 * the compiler. */
122#ifdef __GNUC__
123# define RT_INLINE_ASM_GCC_4_3_X_X86 (__GNUC__ == 4 && __GNUC_MINOR__ == 3 && defined(__i386__))
124#endif
125#ifndef RT_INLINE_ASM_GCC_4_3_X_X86
126# define RT_INLINE_ASM_GCC_4_3_X_X86 0
127#endif
128
129
130
131/** @defgroup grp_asm ASM - Assembly Routines
132 * @ingroup grp_rt
133 *
134 * @remarks The difference between ordered and unordered atomic operations are that
135 * the former will complete outstanding reads and writes before continuing
136 * while the latter doesn't make any promisses about the order. Ordered
137 * operations doesn't, it seems, make any 100% promise wrt to whether
138 * the operation will complete before any subsequent memory access.
139 * (please, correct if wrong.)
140 *
141 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
142 * are unordered (note the Uo).
143 *
144 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
145 * or even optimize assembler instructions away. For instance, in the following code
146 * the second rdmsr instruction is optimized away because gcc treats that instruction
147 * as deterministic:
148 *
149 * @code
150 * static inline uint64_t rdmsr_low(int idx)
151 * {
152 * uint32_t low;
153 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
154 * }
155 * ...
156 * uint32_t msr1 = rdmsr_low(1);
157 * foo(msr1);
158 * msr1 = rdmsr_low(1);
159 * bar(msr1);
160 * @endcode
161 *
162 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
163 * use the result of the first call as input parameter for bar() as well. For rdmsr this
164 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
165 * machine status information in general.
166 *
167 * @{
168 */
169
170/** @def RT_INLINE_ASM_EXTERNAL
171 * Defined as 1 if the compiler does not support inline assembly.
172 * The ASM* functions will then be implemented in an external .asm file.
173 *
174 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
175 * inline assembly in their AMD64 compiler.
176 */
177#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
178# define RT_INLINE_ASM_EXTERNAL 1
179#else
180# define RT_INLINE_ASM_EXTERNAL 0
181#endif
182
183/** @def RT_INLINE_ASM_GNU_STYLE
184 * Defined as 1 if the compiler understands GNU style inline assembly.
185 */
186#if defined(_MSC_VER)
187# define RT_INLINE_ASM_GNU_STYLE 0
188#else
189# define RT_INLINE_ASM_GNU_STYLE 1
190#endif
191
192
193/** @todo find a more proper place for this structure? */
194#pragma pack(1)
195/** IDTR */
196typedef struct RTIDTR
197{
198 /** Size of the IDT. */
199 uint16_t cbIdt;
200 /** Address of the IDT. */
201 uintptr_t pIdt;
202} RTIDTR, *PRTIDTR;
203#pragma pack()
204
205#pragma pack(1)
206/** GDTR */
207typedef struct RTGDTR
208{
209 /** Size of the GDT. */
210 uint16_t cbGdt;
211 /** Address of the GDT. */
212 uintptr_t pGdt;
213} RTGDTR, *PRTGDTR;
214#pragma pack()
215
216
217/** @def ASMReturnAddress
218 * Gets the return address of the current (or calling if you like) function or method.
219 */
220#ifdef _MSC_VER
221# ifdef __cplusplus
222extern "C"
223# endif
224void * _ReturnAddress(void);
225# pragma intrinsic(_ReturnAddress)
226# define ASMReturnAddress() _ReturnAddress()
227#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
228# define ASMReturnAddress() __builtin_return_address(0)
229#else
230# error "Unsupported compiler."
231#endif
232
233
234/**
235 * Gets the content of the IDTR CPU register.
236 * @param pIdtr Where to store the IDTR contents.
237 */
238#if RT_INLINE_ASM_EXTERNAL
239DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
240#else
241DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
242{
243# if RT_INLINE_ASM_GNU_STYLE
244 __asm__ __volatile__("sidt %0" : "=m" (*pIdtr));
245# else
246 __asm
247 {
248# ifdef RT_ARCH_AMD64
249 mov rax, [pIdtr]
250 sidt [rax]
251# else
252 mov eax, [pIdtr]
253 sidt [eax]
254# endif
255 }
256# endif
257}
258#endif
259
260
261/**
262 * Sets the content of the IDTR CPU register.
263 * @param pIdtr Where to load the IDTR contents from
264 */
265#if RT_INLINE_ASM_EXTERNAL
266DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
267#else
268DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
269{
270# if RT_INLINE_ASM_GNU_STYLE
271 __asm__ __volatile__("lidt %0" : : "m" (*pIdtr));
272# else
273 __asm
274 {
275# ifdef RT_ARCH_AMD64
276 mov rax, [pIdtr]
277 lidt [rax]
278# else
279 mov eax, [pIdtr]
280 lidt [eax]
281# endif
282 }
283# endif
284}
285#endif
286
287
288/**
289 * Gets the content of the GDTR CPU register.
290 * @param pGdtr Where to store the GDTR contents.
291 */
292#if RT_INLINE_ASM_EXTERNAL
293DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
294#else
295DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
296{
297# if RT_INLINE_ASM_GNU_STYLE
298 __asm__ __volatile__("sgdt %0" : "=m" (*pGdtr));
299# else
300 __asm
301 {
302# ifdef RT_ARCH_AMD64
303 mov rax, [pGdtr]
304 sgdt [rax]
305# else
306 mov eax, [pGdtr]
307 sgdt [eax]
308# endif
309 }
310# endif
311}
312#endif
313
314/**
315 * Get the cs register.
316 * @returns cs.
317 */
318#if RT_INLINE_ASM_EXTERNAL
319DECLASM(RTSEL) ASMGetCS(void);
320#else
321DECLINLINE(RTSEL) ASMGetCS(void)
322{
323 RTSEL SelCS;
324# if RT_INLINE_ASM_GNU_STYLE
325 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
326# else
327 __asm
328 {
329 mov ax, cs
330 mov [SelCS], ax
331 }
332# endif
333 return SelCS;
334}
335#endif
336
337
338/**
339 * Get the DS register.
340 * @returns DS.
341 */
342#if RT_INLINE_ASM_EXTERNAL
343DECLASM(RTSEL) ASMGetDS(void);
344#else
345DECLINLINE(RTSEL) ASMGetDS(void)
346{
347 RTSEL SelDS;
348# if RT_INLINE_ASM_GNU_STYLE
349 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
350# else
351 __asm
352 {
353 mov ax, ds
354 mov [SelDS], ax
355 }
356# endif
357 return SelDS;
358}
359#endif
360
361
362/**
363 * Get the ES register.
364 * @returns ES.
365 */
366#if RT_INLINE_ASM_EXTERNAL
367DECLASM(RTSEL) ASMGetES(void);
368#else
369DECLINLINE(RTSEL) ASMGetES(void)
370{
371 RTSEL SelES;
372# if RT_INLINE_ASM_GNU_STYLE
373 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
374# else
375 __asm
376 {
377 mov ax, es
378 mov [SelES], ax
379 }
380# endif
381 return SelES;
382}
383#endif
384
385
386/**
387 * Get the FS register.
388 * @returns FS.
389 */
390#if RT_INLINE_ASM_EXTERNAL
391DECLASM(RTSEL) ASMGetFS(void);
392#else
393DECLINLINE(RTSEL) ASMGetFS(void)
394{
395 RTSEL SelFS;
396# if RT_INLINE_ASM_GNU_STYLE
397 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
398# else
399 __asm
400 {
401 mov ax, fs
402 mov [SelFS], ax
403 }
404# endif
405 return SelFS;
406}
407# endif
408
409
410/**
411 * Get the GS register.
412 * @returns GS.
413 */
414#if RT_INLINE_ASM_EXTERNAL
415DECLASM(RTSEL) ASMGetGS(void);
416#else
417DECLINLINE(RTSEL) ASMGetGS(void)
418{
419 RTSEL SelGS;
420# if RT_INLINE_ASM_GNU_STYLE
421 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
422# else
423 __asm
424 {
425 mov ax, gs
426 mov [SelGS], ax
427 }
428# endif
429 return SelGS;
430}
431#endif
432
433
434/**
435 * Get the SS register.
436 * @returns SS.
437 */
438#if RT_INLINE_ASM_EXTERNAL
439DECLASM(RTSEL) ASMGetSS(void);
440#else
441DECLINLINE(RTSEL) ASMGetSS(void)
442{
443 RTSEL SelSS;
444# if RT_INLINE_ASM_GNU_STYLE
445 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
446# else
447 __asm
448 {
449 mov ax, ss
450 mov [SelSS], ax
451 }
452# endif
453 return SelSS;
454}
455#endif
456
457
458/**
459 * Get the TR register.
460 * @returns TR.
461 */
462#if RT_INLINE_ASM_EXTERNAL
463DECLASM(RTSEL) ASMGetTR(void);
464#else
465DECLINLINE(RTSEL) ASMGetTR(void)
466{
467 RTSEL SelTR;
468# if RT_INLINE_ASM_GNU_STYLE
469 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
470# else
471 __asm
472 {
473 str ax
474 mov [SelTR], ax
475 }
476# endif
477 return SelTR;
478}
479#endif
480
481
482/**
483 * Get the [RE]FLAGS register.
484 * @returns [RE]FLAGS.
485 */
486#if RT_INLINE_ASM_EXTERNAL
487DECLASM(RTCCUINTREG) ASMGetFlags(void);
488#else
489DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
490{
491 RTCCUINTREG uFlags;
492# if RT_INLINE_ASM_GNU_STYLE
493# ifdef RT_ARCH_AMD64
494 __asm__ __volatile__("pushfq\n\t"
495 "popq %0\n\t"
496 : "=r" (uFlags));
497# else
498 __asm__ __volatile__("pushfl\n\t"
499 "popl %0\n\t"
500 : "=r" (uFlags));
501# endif
502# else
503 __asm
504 {
505# ifdef RT_ARCH_AMD64
506 pushfq
507 pop [uFlags]
508# else
509 pushfd
510 pop [uFlags]
511# endif
512 }
513# endif
514 return uFlags;
515}
516#endif
517
518
519/**
520 * Set the [RE]FLAGS register.
521 * @param uFlags The new [RE]FLAGS value.
522 */
523#if RT_INLINE_ASM_EXTERNAL
524DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
525#else
526DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
527{
528# if RT_INLINE_ASM_GNU_STYLE
529# ifdef RT_ARCH_AMD64
530 __asm__ __volatile__("pushq %0\n\t"
531 "popfq\n\t"
532 : : "g" (uFlags));
533# else
534 __asm__ __volatile__("pushl %0\n\t"
535 "popfl\n\t"
536 : : "g" (uFlags));
537# endif
538# else
539 __asm
540 {
541# ifdef RT_ARCH_AMD64
542 push [uFlags]
543 popfq
544# else
545 push [uFlags]
546 popfd
547# endif
548 }
549# endif
550}
551#endif
552
553
554/**
555 * Gets the content of the CPU timestamp counter register.
556 *
557 * @returns TSC.
558 */
559#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
560DECLASM(uint64_t) ASMReadTSC(void);
561#else
562DECLINLINE(uint64_t) ASMReadTSC(void)
563{
564 RTUINT64U u;
565# if RT_INLINE_ASM_GNU_STYLE
566 __asm__ __volatile__("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
567# else
568# if RT_INLINE_ASM_USES_INTRIN
569 u.u = __rdtsc();
570# else
571 __asm
572 {
573 rdtsc
574 mov [u.s.Lo], eax
575 mov [u.s.Hi], edx
576 }
577# endif
578# endif
579 return u.u;
580}
581#endif
582
583
584/**
585 * Performs the cpuid instruction returning all registers.
586 *
587 * @param uOperator CPUID operation (eax).
588 * @param pvEAX Where to store eax.
589 * @param pvEBX Where to store ebx.
590 * @param pvECX Where to store ecx.
591 * @param pvEDX Where to store edx.
592 * @remark We're using void pointers to ease the use of special bitfield structures and such.
593 */
594#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
595DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
596#else
597DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
598{
599# if RT_INLINE_ASM_GNU_STYLE
600# ifdef RT_ARCH_AMD64
601 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
602 __asm__ ("cpuid\n\t"
603 : "=a" (uRAX),
604 "=b" (uRBX),
605 "=c" (uRCX),
606 "=d" (uRDX)
607 : "0" (uOperator));
608 *(uint32_t *)pvEAX = (uint32_t)uRAX;
609 *(uint32_t *)pvEBX = (uint32_t)uRBX;
610 *(uint32_t *)pvECX = (uint32_t)uRCX;
611 *(uint32_t *)pvEDX = (uint32_t)uRDX;
612# else
613 __asm__ ("xchgl %%ebx, %1\n\t"
614 "cpuid\n\t"
615 "xchgl %%ebx, %1\n\t"
616 : "=a" (*(uint32_t *)pvEAX),
617 "=r" (*(uint32_t *)pvEBX),
618 "=c" (*(uint32_t *)pvECX),
619 "=d" (*(uint32_t *)pvEDX)
620 : "0" (uOperator));
621# endif
622
623# elif RT_INLINE_ASM_USES_INTRIN
624 int aInfo[4];
625 __cpuid(aInfo, uOperator);
626 *(uint32_t *)pvEAX = aInfo[0];
627 *(uint32_t *)pvEBX = aInfo[1];
628 *(uint32_t *)pvECX = aInfo[2];
629 *(uint32_t *)pvEDX = aInfo[3];
630
631# else
632 uint32_t uEAX;
633 uint32_t uEBX;
634 uint32_t uECX;
635 uint32_t uEDX;
636 __asm
637 {
638 push ebx
639 mov eax, [uOperator]
640 cpuid
641 mov [uEAX], eax
642 mov [uEBX], ebx
643 mov [uECX], ecx
644 mov [uEDX], edx
645 pop ebx
646 }
647 *(uint32_t *)pvEAX = uEAX;
648 *(uint32_t *)pvEBX = uEBX;
649 *(uint32_t *)pvECX = uECX;
650 *(uint32_t *)pvEDX = uEDX;
651# endif
652}
653#endif
654
655
656/**
657 * Performs the cpuid instruction returning all registers.
658 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
659 *
660 * @param uOperator CPUID operation (eax).
661 * @param uIdxECX ecx index
662 * @param pvEAX Where to store eax.
663 * @param pvEBX Where to store ebx.
664 * @param pvECX Where to store ecx.
665 * @param pvEDX Where to store edx.
666 * @remark We're using void pointers to ease the use of special bitfield structures and such.
667 */
668#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
669DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
670#else
671DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
672{
673# if RT_INLINE_ASM_GNU_STYLE
674# ifdef RT_ARCH_AMD64
675 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
676 __asm__ ("cpuid\n\t"
677 : "=a" (uRAX),
678 "=b" (uRBX),
679 "=c" (uRCX),
680 "=d" (uRDX)
681 : "0" (uOperator),
682 "2" (uIdxECX));
683 *(uint32_t *)pvEAX = (uint32_t)uRAX;
684 *(uint32_t *)pvEBX = (uint32_t)uRBX;
685 *(uint32_t *)pvECX = (uint32_t)uRCX;
686 *(uint32_t *)pvEDX = (uint32_t)uRDX;
687# else
688 __asm__ ("xchgl %%ebx, %1\n\t"
689 "cpuid\n\t"
690 "xchgl %%ebx, %1\n\t"
691 : "=a" (*(uint32_t *)pvEAX),
692 "=r" (*(uint32_t *)pvEBX),
693 "=c" (*(uint32_t *)pvECX),
694 "=d" (*(uint32_t *)pvEDX)
695 : "0" (uOperator),
696 "2" (uIdxECX));
697# endif
698
699# elif RT_INLINE_ASM_USES_INTRIN
700 int aInfo[4];
701 /* ??? another intrinsic ??? */
702 __cpuid(aInfo, uOperator);
703 *(uint32_t *)pvEAX = aInfo[0];
704 *(uint32_t *)pvEBX = aInfo[1];
705 *(uint32_t *)pvECX = aInfo[2];
706 *(uint32_t *)pvEDX = aInfo[3];
707
708# else
709 uint32_t uEAX;
710 uint32_t uEBX;
711 uint32_t uECX;
712 uint32_t uEDX;
713 __asm
714 {
715 push ebx
716 mov eax, [uOperator]
717 mov ecx, [uIdxECX]
718 cpuid
719 mov [uEAX], eax
720 mov [uEBX], ebx
721 mov [uECX], ecx
722 mov [uEDX], edx
723 pop ebx
724 }
725 *(uint32_t *)pvEAX = uEAX;
726 *(uint32_t *)pvEBX = uEBX;
727 *(uint32_t *)pvECX = uECX;
728 *(uint32_t *)pvEDX = uEDX;
729# endif
730}
731#endif
732
733
734/**
735 * Performs the cpuid instruction returning ecx and edx.
736 *
737 * @param uOperator CPUID operation (eax).
738 * @param pvECX Where to store ecx.
739 * @param pvEDX Where to store edx.
740 * @remark We're using void pointers to ease the use of special bitfield structures and such.
741 */
742#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
743DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
744#else
745DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
746{
747 uint32_t uEBX;
748 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
749}
750#endif
751
752
753/**
754 * Performs the cpuid instruction returning edx.
755 *
756 * @param uOperator CPUID operation (eax).
757 * @returns EDX after cpuid operation.
758 */
759#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
760DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
761#else
762DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
763{
764 RTCCUINTREG xDX;
765# if RT_INLINE_ASM_GNU_STYLE
766# ifdef RT_ARCH_AMD64
767 RTCCUINTREG uSpill;
768 __asm__ ("cpuid"
769 : "=a" (uSpill),
770 "=d" (xDX)
771 : "0" (uOperator)
772 : "rbx", "rcx");
773# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
774 __asm__ ("push %%ebx\n\t"
775 "cpuid\n\t"
776 "pop %%ebx\n\t"
777 : "=a" (uOperator),
778 "=d" (xDX)
779 : "0" (uOperator)
780 : "ecx");
781# else
782 __asm__ ("cpuid"
783 : "=a" (uOperator),
784 "=d" (xDX)
785 : "0" (uOperator)
786 : "ebx", "ecx");
787# endif
788
789# elif RT_INLINE_ASM_USES_INTRIN
790 int aInfo[4];
791 __cpuid(aInfo, uOperator);
792 xDX = aInfo[3];
793
794# else
795 __asm
796 {
797 push ebx
798 mov eax, [uOperator]
799 cpuid
800 mov [xDX], edx
801 pop ebx
802 }
803# endif
804 return (uint32_t)xDX;
805}
806#endif
807
808
809/**
810 * Performs the cpuid instruction returning ecx.
811 *
812 * @param uOperator CPUID operation (eax).
813 * @returns ECX after cpuid operation.
814 */
815#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
816DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
817#else
818DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
819{
820 RTCCUINTREG xCX;
821# if RT_INLINE_ASM_GNU_STYLE
822# ifdef RT_ARCH_AMD64
823 RTCCUINTREG uSpill;
824 __asm__ ("cpuid"
825 : "=a" (uSpill),
826 "=c" (xCX)
827 : "0" (uOperator)
828 : "rbx", "rdx");
829# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
830 __asm__ ("push %%ebx\n\t"
831 "cpuid\n\t"
832 "pop %%ebx\n\t"
833 : "=a" (uOperator),
834 "=c" (xCX)
835 : "0" (uOperator)
836 : "edx");
837# else
838 __asm__ ("cpuid"
839 : "=a" (uOperator),
840 "=c" (xCX)
841 : "0" (uOperator)
842 : "ebx", "edx");
843
844# endif
845
846# elif RT_INLINE_ASM_USES_INTRIN
847 int aInfo[4];
848 __cpuid(aInfo, uOperator);
849 xCX = aInfo[2];
850
851# else
852 __asm
853 {
854 push ebx
855 mov eax, [uOperator]
856 cpuid
857 mov [xCX], ecx
858 pop ebx
859 }
860# endif
861 return (uint32_t)xCX;
862}
863#endif
864
865
866/**
867 * Checks if the current CPU supports CPUID.
868 *
869 * @returns true if CPUID is supported.
870 */
871DECLINLINE(bool) ASMHasCpuId(void)
872{
873#ifdef RT_ARCH_AMD64
874 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
875#else /* !RT_ARCH_AMD64 */
876 bool fRet = false;
877# if RT_INLINE_ASM_GNU_STYLE
878 uint32_t u1;
879 uint32_t u2;
880 __asm__ ("pushf\n\t"
881 "pop %1\n\t"
882 "mov %1, %2\n\t"
883 "xorl $0x200000, %1\n\t"
884 "push %1\n\t"
885 "popf\n\t"
886 "pushf\n\t"
887 "pop %1\n\t"
888 "cmpl %1, %2\n\t"
889 "setne %0\n\t"
890 "push %2\n\t"
891 "popf\n\t"
892 : "=m" (fRet), "=r" (u1), "=r" (u2));
893# else
894 __asm
895 {
896 pushfd
897 pop eax
898 mov ebx, eax
899 xor eax, 0200000h
900 push eax
901 popfd
902 pushfd
903 pop eax
904 cmp eax, ebx
905 setne fRet
906 push ebx
907 popfd
908 }
909# endif
910 return fRet;
911#endif /* !RT_ARCH_AMD64 */
912}
913
914
915/**
916 * Gets the APIC ID of the current CPU.
917 *
918 * @returns the APIC ID.
919 */
920#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
921DECLASM(uint8_t) ASMGetApicId(void);
922#else
923DECLINLINE(uint8_t) ASMGetApicId(void)
924{
925 RTCCUINTREG xBX;
926# if RT_INLINE_ASM_GNU_STYLE
927# ifdef RT_ARCH_AMD64
928 RTCCUINTREG uSpill;
929 __asm__ ("cpuid"
930 : "=a" (uSpill),
931 "=b" (xBX)
932 : "0" (1)
933 : "rcx", "rdx");
934# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
935 RTCCUINTREG uSpill;
936 __asm__ ("mov %%ebx,%1\n\t"
937 "cpuid\n\t"
938 "xchgl %%ebx,%1\n\t"
939 : "=a" (uSpill),
940 "=r" (xBX)
941 : "0" (1)
942 : "ecx", "edx");
943# else
944 RTCCUINTREG uSpill;
945 __asm__ ("cpuid"
946 : "=a" (uSpill),
947 "=b" (xBX)
948 : "0" (1)
949 : "ecx", "edx");
950# endif
951
952# elif RT_INLINE_ASM_USES_INTRIN
953 int aInfo[4];
954 __cpuid(aInfo, 1);
955 xBX = aInfo[1];
956
957# else
958 __asm
959 {
960 push ebx
961 mov eax, 1
962 cpuid
963 mov [xBX], ebx
964 pop ebx
965 }
966# endif
967 return (uint8_t)(xBX >> 24);
968}
969#endif
970
971
972/**
973 * Tests if it a genuine Intel CPU based on the ASMCpuId(0) output.
974 *
975 * @returns true/false.
976 * @param uEBX EBX return from ASMCpuId(0)
977 * @param uECX ECX return from ASMCpuId(0)
978 * @param uEDX EDX return from ASMCpuId(0)
979 */
980DECLINLINE(bool) ASMIsIntelCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
981{
982 return uEBX == UINT32_C(0x756e6547)
983 && uECX == UINT32_C(0x6c65746e)
984 && uEDX == UINT32_C(0x49656e69);
985}
986
987
988/**
989 * Tests if this is a genuine Intel CPU.
990 *
991 * @returns true/false.
992 * @remarks ASSUMES that cpuid is supported by the CPU.
993 */
994DECLINLINE(bool) ASMIsIntelCpu(void)
995{
996 uint32_t uEAX, uEBX, uECX, uEDX;
997 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
998 return ASMIsIntelCpuEx(uEBX, uECX, uEDX);
999}
1000
1001
1002/**
1003 * Tests if it a authentic AMD CPU based on the ASMCpuId(0) output.
1004 *
1005 * @returns true/false.
1006 * @param uEBX EBX return from ASMCpuId(0)
1007 * @param uECX ECX return from ASMCpuId(0)
1008 * @param uEDX EDX return from ASMCpuId(0)
1009 */
1010DECLINLINE(bool) ASMIsAmdCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
1011{
1012 return uEBX == UINT32_C(0x68747541)
1013 && uECX == UINT32_C(0x444d4163)
1014 && uEDX == UINT32_C(0x69746e65);
1015}
1016
1017
1018/**
1019 * Tests if this is an authentic AMD CPU.
1020 *
1021 * @returns true/false.
1022 * @remarks ASSUMES that cpuid is supported by the CPU.
1023 */
1024DECLINLINE(bool) ASMIsAmdCpu(void)
1025{
1026 uint32_t uEAX, uEBX, uECX, uEDX;
1027 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
1028 return ASMIsAmdCpuEx(uEBX, uECX, uEDX);
1029}
1030
1031
1032/**
1033 * Extracts the CPU family from ASMCpuId(1) or ASMCpuId(0x80000001)
1034 *
1035 * @returns Family.
1036 * @param uEAX EAX return from ASMCpuId(1) or ASMCpuId(0x80000001).
1037 */
1038DECLINLINE(uint32_t) ASMGetCpuFamily(uint32_t uEAX)
1039{
1040 return ((uEAX >> 8) & 0xf) == 0xf
1041 ? ((uEAX >> 20) & 0x7f) + 0xf
1042 : ((uEAX >> 8) & 0xf);
1043}
1044
1045
1046/**
1047 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), Intel variant.
1048 *
1049 * @returns Model.
1050 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1051 * @param fIntel Whether it's an intel CPU.
1052 */
1053DECLINLINE(uint32_t) ASMGetCpuModelIntel(uint32_t uEAX)
1054{
1055 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6) /* family! */
1056 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1057 : ((uEAX >> 4) & 0xf);
1058}
1059
1060
1061/**
1062 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), AMD variant.
1063 *
1064 * @returns Model.
1065 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1066 * @param fIntel Whether it's an intel CPU.
1067 */
1068DECLINLINE(uint32_t) ASMGetCpuModelAMD(uint32_t uEAX)
1069{
1070 return ((uEAX >> 8) & 0xf) == 0xf
1071 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1072 : ((uEAX >> 4) & 0xf);
1073}
1074
1075
1076/**
1077 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001)
1078 *
1079 * @returns Model.
1080 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1081 * @param fIntel Whether it's an intel CPU. Use ASMIsIntelCpuEx() or ASMIsIntelCpu().
1082 */
1083DECLINLINE(uint32_t) ASMGetCpuModel(uint32_t uEAX, bool fIntel)
1084{
1085 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6 && fIntel) /* family! */
1086 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1087 : ((uEAX >> 4) & 0xf);
1088}
1089
1090
1091/**
1092 * Extracts the CPU stepping from ASMCpuId(1) or ASMCpuId(0x80000001)
1093 *
1094 * @returns Model.
1095 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1096 */
1097DECLINLINE(uint32_t) ASMGetCpuStepping(uint32_t uEAX)
1098{
1099 return uEAX & 0xf;
1100}
1101
1102
1103/**
1104 * Get cr0.
1105 * @returns cr0.
1106 */
1107#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1108DECLASM(RTCCUINTREG) ASMGetCR0(void);
1109#else
1110DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
1111{
1112 RTCCUINTREG uCR0;
1113# if RT_INLINE_ASM_USES_INTRIN
1114 uCR0 = __readcr0();
1115
1116# elif RT_INLINE_ASM_GNU_STYLE
1117# ifdef RT_ARCH_AMD64
1118 __asm__ __volatile__("movq %%cr0, %0\t\n" : "=r" (uCR0));
1119# else
1120 __asm__ __volatile__("movl %%cr0, %0\t\n" : "=r" (uCR0));
1121# endif
1122# else
1123 __asm
1124 {
1125# ifdef RT_ARCH_AMD64
1126 mov rax, cr0
1127 mov [uCR0], rax
1128# else
1129 mov eax, cr0
1130 mov [uCR0], eax
1131# endif
1132 }
1133# endif
1134 return uCR0;
1135}
1136#endif
1137
1138
1139/**
1140 * Sets the CR0 register.
1141 * @param uCR0 The new CR0 value.
1142 */
1143#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1144DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
1145#else
1146DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
1147{
1148# if RT_INLINE_ASM_USES_INTRIN
1149 __writecr0(uCR0);
1150
1151# elif RT_INLINE_ASM_GNU_STYLE
1152# ifdef RT_ARCH_AMD64
1153 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
1154# else
1155 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
1156# endif
1157# else
1158 __asm
1159 {
1160# ifdef RT_ARCH_AMD64
1161 mov rax, [uCR0]
1162 mov cr0, rax
1163# else
1164 mov eax, [uCR0]
1165 mov cr0, eax
1166# endif
1167 }
1168# endif
1169}
1170#endif
1171
1172
1173/**
1174 * Get cr2.
1175 * @returns cr2.
1176 */
1177#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1178DECLASM(RTCCUINTREG) ASMGetCR2(void);
1179#else
1180DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
1181{
1182 RTCCUINTREG uCR2;
1183# if RT_INLINE_ASM_USES_INTRIN
1184 uCR2 = __readcr2();
1185
1186# elif RT_INLINE_ASM_GNU_STYLE
1187# ifdef RT_ARCH_AMD64
1188 __asm__ __volatile__("movq %%cr2, %0\t\n" : "=r" (uCR2));
1189# else
1190 __asm__ __volatile__("movl %%cr2, %0\t\n" : "=r" (uCR2));
1191# endif
1192# else
1193 __asm
1194 {
1195# ifdef RT_ARCH_AMD64
1196 mov rax, cr2
1197 mov [uCR2], rax
1198# else
1199 mov eax, cr2
1200 mov [uCR2], eax
1201# endif
1202 }
1203# endif
1204 return uCR2;
1205}
1206#endif
1207
1208
1209/**
1210 * Sets the CR2 register.
1211 * @param uCR2 The new CR0 value.
1212 */
1213#if RT_INLINE_ASM_EXTERNAL
1214DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1215#else
1216DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1217{
1218# if RT_INLINE_ASM_GNU_STYLE
1219# ifdef RT_ARCH_AMD64
1220 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1221# else
1222 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1223# endif
1224# else
1225 __asm
1226 {
1227# ifdef RT_ARCH_AMD64
1228 mov rax, [uCR2]
1229 mov cr2, rax
1230# else
1231 mov eax, [uCR2]
1232 mov cr2, eax
1233# endif
1234 }
1235# endif
1236}
1237#endif
1238
1239
1240/**
1241 * Get cr3.
1242 * @returns cr3.
1243 */
1244#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1245DECLASM(RTCCUINTREG) ASMGetCR3(void);
1246#else
1247DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1248{
1249 RTCCUINTREG uCR3;
1250# if RT_INLINE_ASM_USES_INTRIN
1251 uCR3 = __readcr3();
1252
1253# elif RT_INLINE_ASM_GNU_STYLE
1254# ifdef RT_ARCH_AMD64
1255 __asm__ __volatile__("movq %%cr3, %0\t\n" : "=r" (uCR3));
1256# else
1257 __asm__ __volatile__("movl %%cr3, %0\t\n" : "=r" (uCR3));
1258# endif
1259# else
1260 __asm
1261 {
1262# ifdef RT_ARCH_AMD64
1263 mov rax, cr3
1264 mov [uCR3], rax
1265# else
1266 mov eax, cr3
1267 mov [uCR3], eax
1268# endif
1269 }
1270# endif
1271 return uCR3;
1272}
1273#endif
1274
1275
1276/**
1277 * Sets the CR3 register.
1278 *
1279 * @param uCR3 New CR3 value.
1280 */
1281#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1282DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1283#else
1284DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1285{
1286# if RT_INLINE_ASM_USES_INTRIN
1287 __writecr3(uCR3);
1288
1289# elif RT_INLINE_ASM_GNU_STYLE
1290# ifdef RT_ARCH_AMD64
1291 __asm__ __volatile__("movq %0, %%cr3\n\t" : : "r" (uCR3));
1292# else
1293 __asm__ __volatile__("movl %0, %%cr3\n\t" : : "r" (uCR3));
1294# endif
1295# else
1296 __asm
1297 {
1298# ifdef RT_ARCH_AMD64
1299 mov rax, [uCR3]
1300 mov cr3, rax
1301# else
1302 mov eax, [uCR3]
1303 mov cr3, eax
1304# endif
1305 }
1306# endif
1307}
1308#endif
1309
1310
1311/**
1312 * Reloads the CR3 register.
1313 */
1314#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1315DECLASM(void) ASMReloadCR3(void);
1316#else
1317DECLINLINE(void) ASMReloadCR3(void)
1318{
1319# if RT_INLINE_ASM_USES_INTRIN
1320 __writecr3(__readcr3());
1321
1322# elif RT_INLINE_ASM_GNU_STYLE
1323 RTCCUINTREG u;
1324# ifdef RT_ARCH_AMD64
1325 __asm__ __volatile__("movq %%cr3, %0\n\t"
1326 "movq %0, %%cr3\n\t"
1327 : "=r" (u));
1328# else
1329 __asm__ __volatile__("movl %%cr3, %0\n\t"
1330 "movl %0, %%cr3\n\t"
1331 : "=r" (u));
1332# endif
1333# else
1334 __asm
1335 {
1336# ifdef RT_ARCH_AMD64
1337 mov rax, cr3
1338 mov cr3, rax
1339# else
1340 mov eax, cr3
1341 mov cr3, eax
1342# endif
1343 }
1344# endif
1345}
1346#endif
1347
1348
1349/**
1350 * Get cr4.
1351 * @returns cr4.
1352 */
1353#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1354DECLASM(RTCCUINTREG) ASMGetCR4(void);
1355#else
1356DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1357{
1358 RTCCUINTREG uCR4;
1359# if RT_INLINE_ASM_USES_INTRIN
1360 uCR4 = __readcr4();
1361
1362# elif RT_INLINE_ASM_GNU_STYLE
1363# ifdef RT_ARCH_AMD64
1364 __asm__ __volatile__("movq %%cr4, %0\t\n" : "=r" (uCR4));
1365# else
1366 __asm__ __volatile__("movl %%cr4, %0\t\n" : "=r" (uCR4));
1367# endif
1368# else
1369 __asm
1370 {
1371# ifdef RT_ARCH_AMD64
1372 mov rax, cr4
1373 mov [uCR4], rax
1374# else
1375 push eax /* just in case */
1376 /*mov eax, cr4*/
1377 _emit 0x0f
1378 _emit 0x20
1379 _emit 0xe0
1380 mov [uCR4], eax
1381 pop eax
1382# endif
1383 }
1384# endif
1385 return uCR4;
1386}
1387#endif
1388
1389
1390/**
1391 * Sets the CR4 register.
1392 *
1393 * @param uCR4 New CR4 value.
1394 */
1395#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1396DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1397#else
1398DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1399{
1400# if RT_INLINE_ASM_USES_INTRIN
1401 __writecr4(uCR4);
1402
1403# elif RT_INLINE_ASM_GNU_STYLE
1404# ifdef RT_ARCH_AMD64
1405 __asm__ __volatile__("movq %0, %%cr4\n\t" : : "r" (uCR4));
1406# else
1407 __asm__ __volatile__("movl %0, %%cr4\n\t" : : "r" (uCR4));
1408# endif
1409# else
1410 __asm
1411 {
1412# ifdef RT_ARCH_AMD64
1413 mov rax, [uCR4]
1414 mov cr4, rax
1415# else
1416 mov eax, [uCR4]
1417 _emit 0x0F
1418 _emit 0x22
1419 _emit 0xE0 /* mov cr4, eax */
1420# endif
1421 }
1422# endif
1423}
1424#endif
1425
1426
1427/**
1428 * Get cr8.
1429 * @returns cr8.
1430 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1431 */
1432#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1433DECLASM(RTCCUINTREG) ASMGetCR8(void);
1434#else
1435DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1436{
1437# ifdef RT_ARCH_AMD64
1438 RTCCUINTREG uCR8;
1439# if RT_INLINE_ASM_USES_INTRIN
1440 uCR8 = __readcr8();
1441
1442# elif RT_INLINE_ASM_GNU_STYLE
1443 __asm__ __volatile__("movq %%cr8, %0\t\n" : "=r" (uCR8));
1444# else
1445 __asm
1446 {
1447 mov rax, cr8
1448 mov [uCR8], rax
1449 }
1450# endif
1451 return uCR8;
1452# else /* !RT_ARCH_AMD64 */
1453 return 0;
1454# endif /* !RT_ARCH_AMD64 */
1455}
1456#endif
1457
1458
1459/**
1460 * Enables interrupts (EFLAGS.IF).
1461 */
1462#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1463DECLASM(void) ASMIntEnable(void);
1464#else
1465DECLINLINE(void) ASMIntEnable(void)
1466{
1467# if RT_INLINE_ASM_GNU_STYLE
1468 __asm("sti\n");
1469# elif RT_INLINE_ASM_USES_INTRIN
1470 _enable();
1471# else
1472 __asm sti
1473# endif
1474}
1475#endif
1476
1477
1478/**
1479 * Disables interrupts (!EFLAGS.IF).
1480 */
1481#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1482DECLASM(void) ASMIntDisable(void);
1483#else
1484DECLINLINE(void) ASMIntDisable(void)
1485{
1486# if RT_INLINE_ASM_GNU_STYLE
1487 __asm("cli\n");
1488# elif RT_INLINE_ASM_USES_INTRIN
1489 _disable();
1490# else
1491 __asm cli
1492# endif
1493}
1494#endif
1495
1496
1497/**
1498 * Disables interrupts and returns previous xFLAGS.
1499 */
1500#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1501DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1502#else
1503DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1504{
1505 RTCCUINTREG xFlags;
1506# if RT_INLINE_ASM_GNU_STYLE
1507# ifdef RT_ARCH_AMD64
1508 __asm__ __volatile__("pushfq\n\t"
1509 "cli\n\t"
1510 "popq %0\n\t"
1511 : "=r" (xFlags));
1512# else
1513 __asm__ __volatile__("pushfl\n\t"
1514 "cli\n\t"
1515 "popl %0\n\t"
1516 : "=r" (xFlags));
1517# endif
1518# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1519 xFlags = ASMGetFlags();
1520 _disable();
1521# else
1522 __asm {
1523 pushfd
1524 cli
1525 pop [xFlags]
1526 }
1527# endif
1528 return xFlags;
1529}
1530#endif
1531
1532
1533/**
1534 * Are interrupts enabled?
1535 *
1536 * @returns true / false.
1537 */
1538DECLINLINE(RTCCUINTREG) ASMIntAreEnabled(void)
1539{
1540 RTCCUINTREG uFlags = ASMGetFlags();
1541 return uFlags & 0x200 /* X86_EFL_IF */ ? true : false;
1542}
1543
1544
1545/**
1546 * Halts the CPU until interrupted.
1547 */
1548#if RT_INLINE_ASM_EXTERNAL
1549DECLASM(void) ASMHalt(void);
1550#else
1551DECLINLINE(void) ASMHalt(void)
1552{
1553# if RT_INLINE_ASM_GNU_STYLE
1554 __asm__ __volatile__("hlt\n\t");
1555# else
1556 __asm {
1557 hlt
1558 }
1559# endif
1560}
1561#endif
1562
1563
1564/**
1565 * The PAUSE variant of NOP for helping hyperthreaded CPUs detecing spin locks.
1566 */
1567#if RT_INLINE_ASM_EXTERNAL
1568DECLASM(void) ASMNopPause(void);
1569#else
1570DECLINLINE(void) ASMNopPause(void)
1571{
1572# if RT_INLINE_ASM_GNU_STYLE
1573 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
1574# else
1575 __asm {
1576 _emit 0f3h
1577 _emit 090h
1578 }
1579# endif
1580}
1581#endif
1582
1583
1584/**
1585 * Reads a machine specific register.
1586 *
1587 * @returns Register content.
1588 * @param uRegister Register to read.
1589 */
1590#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1591DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1592#else
1593DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1594{
1595 RTUINT64U u;
1596# if RT_INLINE_ASM_GNU_STYLE
1597 __asm__ __volatile__("rdmsr\n\t"
1598 : "=a" (u.s.Lo),
1599 "=d" (u.s.Hi)
1600 : "c" (uRegister));
1601
1602# elif RT_INLINE_ASM_USES_INTRIN
1603 u.u = __readmsr(uRegister);
1604
1605# else
1606 __asm
1607 {
1608 mov ecx, [uRegister]
1609 rdmsr
1610 mov [u.s.Lo], eax
1611 mov [u.s.Hi], edx
1612 }
1613# endif
1614
1615 return u.u;
1616}
1617#endif
1618
1619
1620/**
1621 * Writes a machine specific register.
1622 *
1623 * @returns Register content.
1624 * @param uRegister Register to write to.
1625 * @param u64Val Value to write.
1626 */
1627#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1628DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1629#else
1630DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1631{
1632 RTUINT64U u;
1633
1634 u.u = u64Val;
1635# if RT_INLINE_ASM_GNU_STYLE
1636 __asm__ __volatile__("wrmsr\n\t"
1637 ::"a" (u.s.Lo),
1638 "d" (u.s.Hi),
1639 "c" (uRegister));
1640
1641# elif RT_INLINE_ASM_USES_INTRIN
1642 __writemsr(uRegister, u.u);
1643
1644# else
1645 __asm
1646 {
1647 mov ecx, [uRegister]
1648 mov edx, [u.s.Hi]
1649 mov eax, [u.s.Lo]
1650 wrmsr
1651 }
1652# endif
1653}
1654#endif
1655
1656
1657/**
1658 * Reads low part of a machine specific register.
1659 *
1660 * @returns Register content.
1661 * @param uRegister Register to read.
1662 */
1663#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1664DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1665#else
1666DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1667{
1668 uint32_t u32;
1669# if RT_INLINE_ASM_GNU_STYLE
1670 __asm__ __volatile__("rdmsr\n\t"
1671 : "=a" (u32)
1672 : "c" (uRegister)
1673 : "edx");
1674
1675# elif RT_INLINE_ASM_USES_INTRIN
1676 u32 = (uint32_t)__readmsr(uRegister);
1677
1678#else
1679 __asm
1680 {
1681 mov ecx, [uRegister]
1682 rdmsr
1683 mov [u32], eax
1684 }
1685# endif
1686
1687 return u32;
1688}
1689#endif
1690
1691
1692/**
1693 * Reads high part of a machine specific register.
1694 *
1695 * @returns Register content.
1696 * @param uRegister Register to read.
1697 */
1698#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1699DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1700#else
1701DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1702{
1703 uint32_t u32;
1704# if RT_INLINE_ASM_GNU_STYLE
1705 __asm__ __volatile__("rdmsr\n\t"
1706 : "=d" (u32)
1707 : "c" (uRegister)
1708 : "eax");
1709
1710# elif RT_INLINE_ASM_USES_INTRIN
1711 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1712
1713# else
1714 __asm
1715 {
1716 mov ecx, [uRegister]
1717 rdmsr
1718 mov [u32], edx
1719 }
1720# endif
1721
1722 return u32;
1723}
1724#endif
1725
1726
1727/**
1728 * Gets dr0.
1729 *
1730 * @returns dr0.
1731 */
1732#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1733DECLASM(RTCCUINTREG) ASMGetDR0(void);
1734#else
1735DECLINLINE(RTCCUINTREG) ASMGetDR0(void)
1736{
1737 RTCCUINTREG uDR0;
1738# if RT_INLINE_ASM_USES_INTRIN
1739 uDR0 = __readdr(0);
1740# elif RT_INLINE_ASM_GNU_STYLE
1741# ifdef RT_ARCH_AMD64
1742 __asm__ __volatile__("movq %%dr0, %0\n\t" : "=r" (uDR0));
1743# else
1744 __asm__ __volatile__("movl %%dr0, %0\n\t" : "=r" (uDR0));
1745# endif
1746# else
1747 __asm
1748 {
1749# ifdef RT_ARCH_AMD64
1750 mov rax, dr0
1751 mov [uDR0], rax
1752# else
1753 mov eax, dr0
1754 mov [uDR0], eax
1755# endif
1756 }
1757# endif
1758 return uDR0;
1759}
1760#endif
1761
1762
1763/**
1764 * Gets dr1.
1765 *
1766 * @returns dr1.
1767 */
1768#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1769DECLASM(RTCCUINTREG) ASMGetDR1(void);
1770#else
1771DECLINLINE(RTCCUINTREG) ASMGetDR1(void)
1772{
1773 RTCCUINTREG uDR1;
1774# if RT_INLINE_ASM_USES_INTRIN
1775 uDR1 = __readdr(1);
1776# elif RT_INLINE_ASM_GNU_STYLE
1777# ifdef RT_ARCH_AMD64
1778 __asm__ __volatile__("movq %%dr1, %0\n\t" : "=r" (uDR1));
1779# else
1780 __asm__ __volatile__("movl %%dr1, %0\n\t" : "=r" (uDR1));
1781# endif
1782# else
1783 __asm
1784 {
1785# ifdef RT_ARCH_AMD64
1786 mov rax, dr1
1787 mov [uDR1], rax
1788# else
1789 mov eax, dr1
1790 mov [uDR1], eax
1791# endif
1792 }
1793# endif
1794 return uDR1;
1795}
1796#endif
1797
1798
1799/**
1800 * Gets dr2.
1801 *
1802 * @returns dr2.
1803 */
1804#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1805DECLASM(RTCCUINTREG) ASMGetDR2(void);
1806#else
1807DECLINLINE(RTCCUINTREG) ASMGetDR2(void)
1808{
1809 RTCCUINTREG uDR2;
1810# if RT_INLINE_ASM_USES_INTRIN
1811 uDR2 = __readdr(2);
1812# elif RT_INLINE_ASM_GNU_STYLE
1813# ifdef RT_ARCH_AMD64
1814 __asm__ __volatile__("movq %%dr2, %0\n\t" : "=r" (uDR2));
1815# else
1816 __asm__ __volatile__("movl %%dr2, %0\n\t" : "=r" (uDR2));
1817# endif
1818# else
1819 __asm
1820 {
1821# ifdef RT_ARCH_AMD64
1822 mov rax, dr2
1823 mov [uDR2], rax
1824# else
1825 mov eax, dr2
1826 mov [uDR2], eax
1827# endif
1828 }
1829# endif
1830 return uDR2;
1831}
1832#endif
1833
1834
1835/**
1836 * Gets dr3.
1837 *
1838 * @returns dr3.
1839 */
1840#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1841DECLASM(RTCCUINTREG) ASMGetDR3(void);
1842#else
1843DECLINLINE(RTCCUINTREG) ASMGetDR3(void)
1844{
1845 RTCCUINTREG uDR3;
1846# if RT_INLINE_ASM_USES_INTRIN
1847 uDR3 = __readdr(3);
1848# elif RT_INLINE_ASM_GNU_STYLE
1849# ifdef RT_ARCH_AMD64
1850 __asm__ __volatile__("movq %%dr3, %0\n\t" : "=r" (uDR3));
1851# else
1852 __asm__ __volatile__("movl %%dr3, %0\n\t" : "=r" (uDR3));
1853# endif
1854# else
1855 __asm
1856 {
1857# ifdef RT_ARCH_AMD64
1858 mov rax, dr3
1859 mov [uDR3], rax
1860# else
1861 mov eax, dr3
1862 mov [uDR3], eax
1863# endif
1864 }
1865# endif
1866 return uDR3;
1867}
1868#endif
1869
1870
1871/**
1872 * Gets dr6.
1873 *
1874 * @returns dr6.
1875 */
1876#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1877DECLASM(RTCCUINTREG) ASMGetDR6(void);
1878#else
1879DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1880{
1881 RTCCUINTREG uDR6;
1882# if RT_INLINE_ASM_USES_INTRIN
1883 uDR6 = __readdr(6);
1884# elif RT_INLINE_ASM_GNU_STYLE
1885# ifdef RT_ARCH_AMD64
1886 __asm__ __volatile__("movq %%dr6, %0\n\t" : "=r" (uDR6));
1887# else
1888 __asm__ __volatile__("movl %%dr6, %0\n\t" : "=r" (uDR6));
1889# endif
1890# else
1891 __asm
1892 {
1893# ifdef RT_ARCH_AMD64
1894 mov rax, dr6
1895 mov [uDR6], rax
1896# else
1897 mov eax, dr6
1898 mov [uDR6], eax
1899# endif
1900 }
1901# endif
1902 return uDR6;
1903}
1904#endif
1905
1906
1907/**
1908 * Reads and clears DR6.
1909 *
1910 * @returns DR6.
1911 */
1912#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1913DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1914#else
1915DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1916{
1917 RTCCUINTREG uDR6;
1918# if RT_INLINE_ASM_USES_INTRIN
1919 uDR6 = __readdr(6);
1920 __writedr(6, 0xffff0ff0U); /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1921# elif RT_INLINE_ASM_GNU_STYLE
1922 RTCCUINTREG uNewValue = 0xffff0ff0U;/* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1923# ifdef RT_ARCH_AMD64
1924 __asm__ __volatile__("movq %%dr6, %0\n\t"
1925 "movq %1, %%dr6\n\t"
1926 : "=r" (uDR6)
1927 : "r" (uNewValue));
1928# else
1929 __asm__ __volatile__("movl %%dr6, %0\n\t"
1930 "movl %1, %%dr6\n\t"
1931 : "=r" (uDR6)
1932 : "r" (uNewValue));
1933# endif
1934# else
1935 __asm
1936 {
1937# ifdef RT_ARCH_AMD64
1938 mov rax, dr6
1939 mov [uDR6], rax
1940 mov rcx, rax
1941 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1942 mov dr6, rcx
1943# else
1944 mov eax, dr6
1945 mov [uDR6], eax
1946 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1947 mov dr6, ecx
1948# endif
1949 }
1950# endif
1951 return uDR6;
1952}
1953#endif
1954
1955
1956/**
1957 * Gets dr7.
1958 *
1959 * @returns dr7.
1960 */
1961#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1962DECLASM(RTCCUINTREG) ASMGetDR7(void);
1963#else
1964DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1965{
1966 RTCCUINTREG uDR7;
1967# if RT_INLINE_ASM_USES_INTRIN
1968 uDR7 = __readdr(7);
1969# elif RT_INLINE_ASM_GNU_STYLE
1970# ifdef RT_ARCH_AMD64
1971 __asm__ __volatile__("movq %%dr7, %0\n\t" : "=r" (uDR7));
1972# else
1973 __asm__ __volatile__("movl %%dr7, %0\n\t" : "=r" (uDR7));
1974# endif
1975# else
1976 __asm
1977 {
1978# ifdef RT_ARCH_AMD64
1979 mov rax, dr7
1980 mov [uDR7], rax
1981# else
1982 mov eax, dr7
1983 mov [uDR7], eax
1984# endif
1985 }
1986# endif
1987 return uDR7;
1988}
1989#endif
1990
1991
1992/**
1993 * Sets dr0.
1994 *
1995 * @param uDRVal Debug register value to write
1996 */
1997#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1998DECLASM(void) ASMSetDR0(RTCCUINTREG uDRVal);
1999#else
2000DECLINLINE(void) ASMSetDR0(RTCCUINTREG uDRVal)
2001{
2002# if RT_INLINE_ASM_USES_INTRIN
2003 __writedr(0, uDRVal);
2004# elif RT_INLINE_ASM_GNU_STYLE
2005# ifdef RT_ARCH_AMD64
2006 __asm__ __volatile__("movq %0, %%dr0\n\t" : : "r" (uDRVal));
2007# else
2008 __asm__ __volatile__("movl %0, %%dr0\n\t" : : "r" (uDRVal));
2009# endif
2010# else
2011 __asm
2012 {
2013# ifdef RT_ARCH_AMD64
2014 mov rax, [uDRVal]
2015 mov dr0, rax
2016# else
2017 mov eax, [uDRVal]
2018 mov dr0, eax
2019# endif
2020 }
2021# endif
2022}
2023#endif
2024
2025
2026/**
2027 * Sets dr1.
2028 *
2029 * @param uDRVal Debug register value to write
2030 */
2031#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2032DECLASM(void) ASMSetDR1(RTCCUINTREG uDRVal);
2033#else
2034DECLINLINE(void) ASMSetDR1(RTCCUINTREG uDRVal)
2035{
2036# if RT_INLINE_ASM_USES_INTRIN
2037 __writedr(1, uDRVal);
2038# elif RT_INLINE_ASM_GNU_STYLE
2039# ifdef RT_ARCH_AMD64
2040 __asm__ __volatile__("movq %0, %%dr1\n\t" : : "r" (uDRVal));
2041# else
2042 __asm__ __volatile__("movl %0, %%dr1\n\t" : : "r" (uDRVal));
2043# endif
2044# else
2045 __asm
2046 {
2047# ifdef RT_ARCH_AMD64
2048 mov rax, [uDRVal]
2049 mov dr1, rax
2050# else
2051 mov eax, [uDRVal]
2052 mov dr1, eax
2053# endif
2054 }
2055# endif
2056}
2057#endif
2058
2059
2060/**
2061 * Sets dr2.
2062 *
2063 * @param uDRVal Debug register value to write
2064 */
2065#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2066DECLASM(void) ASMSetDR2(RTCCUINTREG uDRVal);
2067#else
2068DECLINLINE(void) ASMSetDR2(RTCCUINTREG uDRVal)
2069{
2070# if RT_INLINE_ASM_USES_INTRIN
2071 __writedr(2, uDRVal);
2072# elif RT_INLINE_ASM_GNU_STYLE
2073# ifdef RT_ARCH_AMD64
2074 __asm__ __volatile__("movq %0, %%dr2\n\t" : : "r" (uDRVal));
2075# else
2076 __asm__ __volatile__("movl %0, %%dr2\n\t" : : "r" (uDRVal));
2077# endif
2078# else
2079 __asm
2080 {
2081# ifdef RT_ARCH_AMD64
2082 mov rax, [uDRVal]
2083 mov dr2, rax
2084# else
2085 mov eax, [uDRVal]
2086 mov dr2, eax
2087# endif
2088 }
2089# endif
2090}
2091#endif
2092
2093
2094/**
2095 * Sets dr3.
2096 *
2097 * @param uDRVal Debug register value to write
2098 */
2099#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2100DECLASM(void) ASMSetDR3(RTCCUINTREG uDRVal);
2101#else
2102DECLINLINE(void) ASMSetDR3(RTCCUINTREG uDRVal)
2103{
2104# if RT_INLINE_ASM_USES_INTRIN
2105 __writedr(3, uDRVal);
2106# elif RT_INLINE_ASM_GNU_STYLE
2107# ifdef RT_ARCH_AMD64
2108 __asm__ __volatile__("movq %0, %%dr3\n\t" : : "r" (uDRVal));
2109# else
2110 __asm__ __volatile__("movl %0, %%dr3\n\t" : : "r" (uDRVal));
2111# endif
2112# else
2113 __asm
2114 {
2115# ifdef RT_ARCH_AMD64
2116 mov rax, [uDRVal]
2117 mov dr3, rax
2118# else
2119 mov eax, [uDRVal]
2120 mov dr3, eax
2121# endif
2122 }
2123# endif
2124}
2125#endif
2126
2127
2128/**
2129 * Sets dr6.
2130 *
2131 * @param uDRVal Debug register value to write
2132 */
2133#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2134DECLASM(void) ASMSetDR6(RTCCUINTREG uDRVal);
2135#else
2136DECLINLINE(void) ASMSetDR6(RTCCUINTREG uDRVal)
2137{
2138# if RT_INLINE_ASM_USES_INTRIN
2139 __writedr(6, uDRVal);
2140# elif RT_INLINE_ASM_GNU_STYLE
2141# ifdef RT_ARCH_AMD64
2142 __asm__ __volatile__("movq %0, %%dr6\n\t" : : "r" (uDRVal));
2143# else
2144 __asm__ __volatile__("movl %0, %%dr6\n\t" : : "r" (uDRVal));
2145# endif
2146# else
2147 __asm
2148 {
2149# ifdef RT_ARCH_AMD64
2150 mov rax, [uDRVal]
2151 mov dr6, rax
2152# else
2153 mov eax, [uDRVal]
2154 mov dr6, eax
2155# endif
2156 }
2157# endif
2158}
2159#endif
2160
2161
2162/**
2163 * Sets dr7.
2164 *
2165 * @param uDRVal Debug register value to write
2166 */
2167#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2168DECLASM(void) ASMSetDR7(RTCCUINTREG uDRVal);
2169#else
2170DECLINLINE(void) ASMSetDR7(RTCCUINTREG uDRVal)
2171{
2172# if RT_INLINE_ASM_USES_INTRIN
2173 __writedr(7, uDRVal);
2174# elif RT_INLINE_ASM_GNU_STYLE
2175# ifdef RT_ARCH_AMD64
2176 __asm__ __volatile__("movq %0, %%dr7\n\t" : : "r" (uDRVal));
2177# else
2178 __asm__ __volatile__("movl %0, %%dr7\n\t" : : "r" (uDRVal));
2179# endif
2180# else
2181 __asm
2182 {
2183# ifdef RT_ARCH_AMD64
2184 mov rax, [uDRVal]
2185 mov dr7, rax
2186# else
2187 mov eax, [uDRVal]
2188 mov dr7, eax
2189# endif
2190 }
2191# endif
2192}
2193#endif
2194
2195
2196/**
2197 * Compiler memory barrier.
2198 *
2199 * Ensure that the compiler does not use any cached (register/tmp stack) memory
2200 * values or any outstanding writes when returning from this function.
2201 *
2202 * This function must be used if non-volatile data is modified by a
2203 * device or the VMM. Typical cases are port access, MMIO access,
2204 * trapping instruction, etc.
2205 */
2206#if RT_INLINE_ASM_GNU_STYLE
2207# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
2208#elif RT_INLINE_ASM_USES_INTRIN
2209# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
2210#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
2211DECLINLINE(void) ASMCompilerBarrier(void)
2212{
2213 __asm
2214 {
2215 }
2216}
2217#endif
2218
2219
2220/**
2221 * Writes a 8-bit unsigned integer to an I/O port, ordered.
2222 *
2223 * @param Port I/O port to write to.
2224 * @param u8 8-bit integer to write.
2225 */
2226#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2227DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
2228#else
2229DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
2230{
2231# if RT_INLINE_ASM_GNU_STYLE
2232 __asm__ __volatile__("outb %b1, %w0\n\t"
2233 :: "Nd" (Port),
2234 "a" (u8));
2235
2236# elif RT_INLINE_ASM_USES_INTRIN
2237 __outbyte(Port, u8);
2238
2239# else
2240 __asm
2241 {
2242 mov dx, [Port]
2243 mov al, [u8]
2244 out dx, al
2245 }
2246# endif
2247}
2248#endif
2249
2250
2251/**
2252 * Reads a 8-bit unsigned integer from an I/O port, ordered.
2253 *
2254 * @returns 8-bit integer.
2255 * @param Port I/O port to read from.
2256 */
2257#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2258DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
2259#else
2260DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
2261{
2262 uint8_t u8;
2263# if RT_INLINE_ASM_GNU_STYLE
2264 __asm__ __volatile__("inb %w1, %b0\n\t"
2265 : "=a" (u8)
2266 : "Nd" (Port));
2267
2268# elif RT_INLINE_ASM_USES_INTRIN
2269 u8 = __inbyte(Port);
2270
2271# else
2272 __asm
2273 {
2274 mov dx, [Port]
2275 in al, dx
2276 mov [u8], al
2277 }
2278# endif
2279 return u8;
2280}
2281#endif
2282
2283
2284/**
2285 * Writes a 16-bit unsigned integer to an I/O port, ordered.
2286 *
2287 * @param Port I/O port to write to.
2288 * @param u16 16-bit integer to write.
2289 */
2290#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2291DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
2292#else
2293DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
2294{
2295# if RT_INLINE_ASM_GNU_STYLE
2296 __asm__ __volatile__("outw %w1, %w0\n\t"
2297 :: "Nd" (Port),
2298 "a" (u16));
2299
2300# elif RT_INLINE_ASM_USES_INTRIN
2301 __outword(Port, u16);
2302
2303# else
2304 __asm
2305 {
2306 mov dx, [Port]
2307 mov ax, [u16]
2308 out dx, ax
2309 }
2310# endif
2311}
2312#endif
2313
2314
2315/**
2316 * Reads a 16-bit unsigned integer from an I/O port, ordered.
2317 *
2318 * @returns 16-bit integer.
2319 * @param Port I/O port to read from.
2320 */
2321#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2322DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
2323#else
2324DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
2325{
2326 uint16_t u16;
2327# if RT_INLINE_ASM_GNU_STYLE
2328 __asm__ __volatile__("inw %w1, %w0\n\t"
2329 : "=a" (u16)
2330 : "Nd" (Port));
2331
2332# elif RT_INLINE_ASM_USES_INTRIN
2333 u16 = __inword(Port);
2334
2335# else
2336 __asm
2337 {
2338 mov dx, [Port]
2339 in ax, dx
2340 mov [u16], ax
2341 }
2342# endif
2343 return u16;
2344}
2345#endif
2346
2347
2348/**
2349 * Writes a 32-bit unsigned integer to an I/O port, ordered.
2350 *
2351 * @param Port I/O port to write to.
2352 * @param u32 32-bit integer to write.
2353 */
2354#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2355DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
2356#else
2357DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
2358{
2359# if RT_INLINE_ASM_GNU_STYLE
2360 __asm__ __volatile__("outl %1, %w0\n\t"
2361 :: "Nd" (Port),
2362 "a" (u32));
2363
2364# elif RT_INLINE_ASM_USES_INTRIN
2365 __outdword(Port, u32);
2366
2367# else
2368 __asm
2369 {
2370 mov dx, [Port]
2371 mov eax, [u32]
2372 out dx, eax
2373 }
2374# endif
2375}
2376#endif
2377
2378
2379/**
2380 * Reads a 32-bit unsigned integer from an I/O port, ordered.
2381 *
2382 * @returns 32-bit integer.
2383 * @param Port I/O port to read from.
2384 */
2385#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2386DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
2387#else
2388DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
2389{
2390 uint32_t u32;
2391# if RT_INLINE_ASM_GNU_STYLE
2392 __asm__ __volatile__("inl %w1, %0\n\t"
2393 : "=a" (u32)
2394 : "Nd" (Port));
2395
2396# elif RT_INLINE_ASM_USES_INTRIN
2397 u32 = __indword(Port);
2398
2399# else
2400 __asm
2401 {
2402 mov dx, [Port]
2403 in eax, dx
2404 mov [u32], eax
2405 }
2406# endif
2407 return u32;
2408}
2409#endif
2410
2411
2412/**
2413 * Writes a string of 8-bit unsigned integer items to an I/O port, ordered.
2414 *
2415 * @param Port I/O port to write to.
2416 * @param pau8 Pointer to the string buffer.
2417 * @param c The number of items to write.
2418 */
2419#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2420DECLASM(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c);
2421#else
2422DECLINLINE(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c)
2423{
2424# if RT_INLINE_ASM_GNU_STYLE
2425 __asm__ __volatile__("rep; outsb\n\t"
2426 : "+S" (pau8),
2427 "+c" (c)
2428 : "d" (Port));
2429
2430# elif RT_INLINE_ASM_USES_INTRIN
2431 __outbytestring(Port, (unsigned char *)pau8, (unsigned long)c);
2432
2433# else
2434 __asm
2435 {
2436 mov dx, [Port]
2437 mov ecx, [c]
2438 mov eax, [pau8]
2439 xchg esi, eax
2440 rep outsb
2441 xchg esi, eax
2442 }
2443# endif
2444}
2445#endif
2446
2447
2448/**
2449 * Reads a string of 8-bit unsigned integer items from an I/O port, ordered.
2450 *
2451 * @param Port I/O port to read from.
2452 * @param pau8 Pointer to the string buffer (output).
2453 * @param c The number of items to read.
2454 */
2455#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2456DECLASM(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c);
2457#else
2458DECLINLINE(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c)
2459{
2460# if RT_INLINE_ASM_GNU_STYLE
2461 __asm__ __volatile__("rep; insb\n\t"
2462 : "+D" (pau8),
2463 "+c" (c)
2464 : "d" (Port));
2465
2466# elif RT_INLINE_ASM_USES_INTRIN
2467 __inbytestring(Port, pau8, (unsigned long)c);
2468
2469# else
2470 __asm
2471 {
2472 mov dx, [Port]
2473 mov ecx, [c]
2474 mov eax, [pau8]
2475 xchg edi, eax
2476 rep insb
2477 xchg edi, eax
2478 }
2479# endif
2480}
2481#endif
2482
2483
2484/**
2485 * Writes a string of 16-bit unsigned integer items to an I/O port, ordered.
2486 *
2487 * @param Port I/O port to write to.
2488 * @param pau16 Pointer to the string buffer.
2489 * @param c The number of items to write.
2490 */
2491#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2492DECLASM(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c);
2493#else
2494DECLINLINE(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c)
2495{
2496# if RT_INLINE_ASM_GNU_STYLE
2497 __asm__ __volatile__("rep; outsw\n\t"
2498 : "+S" (pau16),
2499 "+c" (c)
2500 : "d" (Port));
2501
2502# elif RT_INLINE_ASM_USES_INTRIN
2503 __outwordstring(Port, (unsigned short *)pau16, (unsigned long)c);
2504
2505# else
2506 __asm
2507 {
2508 mov dx, [Port]
2509 mov ecx, [c]
2510 mov eax, [pau16]
2511 xchg esi, eax
2512 rep outsw
2513 xchg esi, eax
2514 }
2515# endif
2516}
2517#endif
2518
2519
2520/**
2521 * Reads a string of 16-bit unsigned integer items from an I/O port, ordered.
2522 *
2523 * @param Port I/O port to read from.
2524 * @param pau16 Pointer to the string buffer (output).
2525 * @param c The number of items to read.
2526 */
2527#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2528DECLASM(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c);
2529#else
2530DECLINLINE(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c)
2531{
2532# if RT_INLINE_ASM_GNU_STYLE
2533 __asm__ __volatile__("rep; insw\n\t"
2534 : "+D" (pau16),
2535 "+c" (c)
2536 : "d" (Port));
2537
2538# elif RT_INLINE_ASM_USES_INTRIN
2539 __inwordstring(Port, pau16, (unsigned long)c);
2540
2541# else
2542 __asm
2543 {
2544 mov dx, [Port]
2545 mov ecx, [c]
2546 mov eax, [pau16]
2547 xchg edi, eax
2548 rep insw
2549 xchg edi, eax
2550 }
2551# endif
2552}
2553#endif
2554
2555
2556/**
2557 * Writes a string of 32-bit unsigned integer items to an I/O port, ordered.
2558 *
2559 * @param Port I/O port to write to.
2560 * @param pau32 Pointer to the string buffer.
2561 * @param c The number of items to write.
2562 */
2563#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2564DECLASM(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c);
2565#else
2566DECLINLINE(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c)
2567{
2568# if RT_INLINE_ASM_GNU_STYLE
2569 __asm__ __volatile__("rep; outsl\n\t"
2570 : "+S" (pau32),
2571 "+c" (c)
2572 : "d" (Port));
2573
2574# elif RT_INLINE_ASM_USES_INTRIN
2575 __outdwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2576
2577# else
2578 __asm
2579 {
2580 mov dx, [Port]
2581 mov ecx, [c]
2582 mov eax, [pau32]
2583 xchg esi, eax
2584 rep outsd
2585 xchg esi, eax
2586 }
2587# endif
2588}
2589#endif
2590
2591
2592/**
2593 * Reads a string of 32-bit unsigned integer items from an I/O port, ordered.
2594 *
2595 * @param Port I/O port to read from.
2596 * @param pau32 Pointer to the string buffer (output).
2597 * @param c The number of items to read.
2598 */
2599#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2600DECLASM(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c);
2601#else
2602DECLINLINE(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c)
2603{
2604# if RT_INLINE_ASM_GNU_STYLE
2605 __asm__ __volatile__("rep; insl\n\t"
2606 : "+D" (pau32),
2607 "+c" (c)
2608 : "d" (Port));
2609
2610# elif RT_INLINE_ASM_USES_INTRIN
2611 __indwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2612
2613# else
2614 __asm
2615 {
2616 mov dx, [Port]
2617 mov ecx, [c]
2618 mov eax, [pau32]
2619 xchg edi, eax
2620 rep insd
2621 xchg edi, eax
2622 }
2623# endif
2624}
2625#endif
2626
2627
2628/**
2629 * Atomically Exchange an unsigned 8-bit value, ordered.
2630 *
2631 * @returns Current *pu8 value
2632 * @param pu8 Pointer to the 8-bit variable to update.
2633 * @param u8 The 8-bit value to assign to *pu8.
2634 */
2635#if RT_INLINE_ASM_EXTERNAL
2636DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
2637#else
2638DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
2639{
2640# if RT_INLINE_ASM_GNU_STYLE
2641 __asm__ __volatile__("xchgb %0, %1\n\t"
2642 : "=m" (*pu8),
2643 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
2644 : "1" (u8),
2645 "m" (*pu8));
2646# else
2647 __asm
2648 {
2649# ifdef RT_ARCH_AMD64
2650 mov rdx, [pu8]
2651 mov al, [u8]
2652 xchg [rdx], al
2653 mov [u8], al
2654# else
2655 mov edx, [pu8]
2656 mov al, [u8]
2657 xchg [edx], al
2658 mov [u8], al
2659# endif
2660 }
2661# endif
2662 return u8;
2663}
2664#endif
2665
2666
2667/**
2668 * Atomically Exchange a signed 8-bit value, ordered.
2669 *
2670 * @returns Current *pu8 value
2671 * @param pi8 Pointer to the 8-bit variable to update.
2672 * @param i8 The 8-bit value to assign to *pi8.
2673 */
2674DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
2675{
2676 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
2677}
2678
2679
2680/**
2681 * Atomically Exchange a bool value, ordered.
2682 *
2683 * @returns Current *pf value
2684 * @param pf Pointer to the 8-bit variable to update.
2685 * @param f The 8-bit value to assign to *pi8.
2686 */
2687DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
2688{
2689#ifdef _MSC_VER
2690 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2691#else
2692 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2693#endif
2694}
2695
2696
2697/**
2698 * Atomically Exchange an unsigned 16-bit value, ordered.
2699 *
2700 * @returns Current *pu16 value
2701 * @param pu16 Pointer to the 16-bit variable to update.
2702 * @param u16 The 16-bit value to assign to *pu16.
2703 */
2704#if RT_INLINE_ASM_EXTERNAL
2705DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
2706#else
2707DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
2708{
2709# if RT_INLINE_ASM_GNU_STYLE
2710 __asm__ __volatile__("xchgw %0, %1\n\t"
2711 : "=m" (*pu16),
2712 "=r" (u16)
2713 : "1" (u16),
2714 "m" (*pu16));
2715# else
2716 __asm
2717 {
2718# ifdef RT_ARCH_AMD64
2719 mov rdx, [pu16]
2720 mov ax, [u16]
2721 xchg [rdx], ax
2722 mov [u16], ax
2723# else
2724 mov edx, [pu16]
2725 mov ax, [u16]
2726 xchg [edx], ax
2727 mov [u16], ax
2728# endif
2729 }
2730# endif
2731 return u16;
2732}
2733#endif
2734
2735
2736/**
2737 * Atomically Exchange a signed 16-bit value, ordered.
2738 *
2739 * @returns Current *pu16 value
2740 * @param pi16 Pointer to the 16-bit variable to update.
2741 * @param i16 The 16-bit value to assign to *pi16.
2742 */
2743DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
2744{
2745 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
2746}
2747
2748
2749/**
2750 * Atomically Exchange an unsigned 32-bit value, ordered.
2751 *
2752 * @returns Current *pu32 value
2753 * @param pu32 Pointer to the 32-bit variable to update.
2754 * @param u32 The 32-bit value to assign to *pu32.
2755 */
2756#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2757DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
2758#else
2759DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
2760{
2761# if RT_INLINE_ASM_GNU_STYLE
2762 __asm__ __volatile__("xchgl %0, %1\n\t"
2763 : "=m" (*pu32),
2764 "=r" (u32)
2765 : "1" (u32),
2766 "m" (*pu32));
2767
2768# elif RT_INLINE_ASM_USES_INTRIN
2769 u32 = _InterlockedExchange((long *)pu32, u32);
2770
2771# else
2772 __asm
2773 {
2774# ifdef RT_ARCH_AMD64
2775 mov rdx, [pu32]
2776 mov eax, u32
2777 xchg [rdx], eax
2778 mov [u32], eax
2779# else
2780 mov edx, [pu32]
2781 mov eax, u32
2782 xchg [edx], eax
2783 mov [u32], eax
2784# endif
2785 }
2786# endif
2787 return u32;
2788}
2789#endif
2790
2791
2792/**
2793 * Atomically Exchange a signed 32-bit value, ordered.
2794 *
2795 * @returns Current *pu32 value
2796 * @param pi32 Pointer to the 32-bit variable to update.
2797 * @param i32 The 32-bit value to assign to *pi32.
2798 */
2799DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
2800{
2801 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
2802}
2803
2804
2805/**
2806 * Atomically Exchange an unsigned 64-bit value, ordered.
2807 *
2808 * @returns Current *pu64 value
2809 * @param pu64 Pointer to the 64-bit variable to update.
2810 * @param u64 The 64-bit value to assign to *pu64.
2811 */
2812#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2813DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2814#else
2815DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2816{
2817# if defined(RT_ARCH_AMD64)
2818# if RT_INLINE_ASM_USES_INTRIN
2819 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2820
2821# elif RT_INLINE_ASM_GNU_STYLE
2822 __asm__ __volatile__("xchgq %0, %1\n\t"
2823 : "=m" (*pu64),
2824 "=r" (u64)
2825 : "1" (u64),
2826 "m" (*pu64));
2827# else
2828 __asm
2829 {
2830 mov rdx, [pu64]
2831 mov rax, [u64]
2832 xchg [rdx], rax
2833 mov [u64], rax
2834 }
2835# endif
2836# else /* !RT_ARCH_AMD64 */
2837# if RT_INLINE_ASM_GNU_STYLE
2838# if defined(PIC) || defined(__PIC__)
2839 uint32_t u32EBX = (uint32_t)u64;
2840 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2841 "xchgl %%ebx, %3\n\t"
2842 "1:\n\t"
2843 "lock; cmpxchg8b (%5)\n\t"
2844 "jnz 1b\n\t"
2845 "movl %3, %%ebx\n\t"
2846 /*"xchgl %%esi, %5\n\t"*/
2847 : "=A" (u64),
2848 "=m" (*pu64)
2849 : "0" (*pu64),
2850 "m" ( u32EBX ),
2851 "c" ( (uint32_t)(u64 >> 32) ),
2852 "S" (pu64));
2853# else /* !PIC */
2854 __asm__ __volatile__("1:\n\t"
2855 "lock; cmpxchg8b %1\n\t"
2856 "jnz 1b\n\t"
2857 : "=A" (u64),
2858 "=m" (*pu64)
2859 : "0" (*pu64),
2860 "b" ( (uint32_t)u64 ),
2861 "c" ( (uint32_t)(u64 >> 32) ));
2862# endif
2863# else
2864 __asm
2865 {
2866 mov ebx, dword ptr [u64]
2867 mov ecx, dword ptr [u64 + 4]
2868 mov edi, pu64
2869 mov eax, dword ptr [edi]
2870 mov edx, dword ptr [edi + 4]
2871 retry:
2872 lock cmpxchg8b [edi]
2873 jnz retry
2874 mov dword ptr [u64], eax
2875 mov dword ptr [u64 + 4], edx
2876 }
2877# endif
2878# endif /* !RT_ARCH_AMD64 */
2879 return u64;
2880}
2881#endif
2882
2883
2884/**
2885 * Atomically Exchange an signed 64-bit value, ordered.
2886 *
2887 * @returns Current *pi64 value
2888 * @param pi64 Pointer to the 64-bit variable to update.
2889 * @param i64 The 64-bit value to assign to *pi64.
2890 */
2891DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2892{
2893 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2894}
2895
2896
2897/**
2898 * Atomically Exchange a pointer value, ordered.
2899 *
2900 * @returns Current *ppv value
2901 * @param ppv Pointer to the pointer variable to update.
2902 * @param pv The pointer value to assign to *ppv.
2903 */
2904DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
2905{
2906#if ARCH_BITS == 32
2907 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2908#elif ARCH_BITS == 64
2909 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2910#else
2911# error "ARCH_BITS is bogus"
2912#endif
2913}
2914
2915
2916/**
2917 * Atomically Exchange a raw-mode context pointer value, ordered.
2918 *
2919 * @returns Current *ppv value
2920 * @param ppvRC Pointer to the pointer variable to update.
2921 * @param pvRC The pointer value to assign to *ppv.
2922 */
2923DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
2924{
2925 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
2926}
2927
2928
2929/**
2930 * Atomically Exchange a ring-0 pointer value, ordered.
2931 *
2932 * @returns Current *ppv value
2933 * @param ppvR0 Pointer to the pointer variable to update.
2934 * @param pvR0 The pointer value to assign to *ppv.
2935 */
2936DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
2937{
2938#if R0_ARCH_BITS == 32
2939 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
2940#elif R0_ARCH_BITS == 64
2941 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
2942#else
2943# error "R0_ARCH_BITS is bogus"
2944#endif
2945}
2946
2947
2948/**
2949 * Atomically Exchange a ring-3 pointer value, ordered.
2950 *
2951 * @returns Current *ppv value
2952 * @param ppvR3 Pointer to the pointer variable to update.
2953 * @param pvR3 The pointer value to assign to *ppv.
2954 */
2955DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
2956{
2957#if R3_ARCH_BITS == 32
2958 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
2959#elif R3_ARCH_BITS == 64
2960 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
2961#else
2962# error "R3_ARCH_BITS is bogus"
2963#endif
2964}
2965
2966
2967/** @def ASMAtomicXchgHandle
2968 * Atomically Exchange a typical IPRT handle value, ordered.
2969 *
2970 * @param ph Pointer to the value to update.
2971 * @param hNew The new value to assigned to *pu.
2972 * @param phRes Where to store the current *ph value.
2973 *
2974 * @remarks This doesn't currently work for all handles (like RTFILE).
2975 */
2976#if HC_ARCH_BITS == 32
2977# define ASMAtomicXchgHandle(ph, hNew, phRes) \
2978 do { \
2979 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2980 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2981 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2982 } while (0)
2983#elif HC_ARCH_BITS == 64
2984# define ASMAtomicXchgHandle(ph, hNew, phRes) \
2985 do { \
2986 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2987 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2988 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
2989 } while (0)
2990#else
2991# error HC_ARCH_BITS
2992#endif
2993
2994
2995/**
2996 * Atomically Exchange a value which size might differ
2997 * between platforms or compilers, ordered.
2998 *
2999 * @param pu Pointer to the variable to update.
3000 * @param uNew The value to assign to *pu.
3001 * @todo This is busted as its missing the result argument.
3002 */
3003#define ASMAtomicXchgSize(pu, uNew) \
3004 do { \
3005 switch (sizeof(*(pu))) { \
3006 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
3007 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3008 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3009 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3010 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3011 } \
3012 } while (0)
3013
3014/**
3015 * Atomically Exchange a value which size might differ
3016 * between platforms or compilers, ordered.
3017 *
3018 * @param pu Pointer to the variable to update.
3019 * @param uNew The value to assign to *pu.
3020 * @param puRes Where to store the current *pu value.
3021 */
3022#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
3023 do { \
3024 switch (sizeof(*(pu))) { \
3025 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
3026 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3027 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3028 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3029 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3030 } \
3031 } while (0)
3032
3033
3034/**
3035 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
3036 *
3037 * @returns true if xchg was done.
3038 * @returns false if xchg wasn't done.
3039 *
3040 * @param pu32 Pointer to the value to update.
3041 * @param u32New The new value to assigned to *pu32.
3042 * @param u32Old The old value to *pu32 compare with.
3043 */
3044#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3045DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
3046#else
3047DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
3048{
3049# if RT_INLINE_ASM_GNU_STYLE
3050 uint8_t u8Ret;
3051 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3052 "setz %1\n\t"
3053 : "=m" (*pu32),
3054 "=qm" (u8Ret),
3055 "=a" (u32Old)
3056 : "r" (u32New),
3057 "2" (u32Old),
3058 "m" (*pu32));
3059 return (bool)u8Ret;
3060
3061# elif RT_INLINE_ASM_USES_INTRIN
3062 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
3063
3064# else
3065 uint32_t u32Ret;
3066 __asm
3067 {
3068# ifdef RT_ARCH_AMD64
3069 mov rdx, [pu32]
3070# else
3071 mov edx, [pu32]
3072# endif
3073 mov eax, [u32Old]
3074 mov ecx, [u32New]
3075# ifdef RT_ARCH_AMD64
3076 lock cmpxchg [rdx], ecx
3077# else
3078 lock cmpxchg [edx], ecx
3079# endif
3080 setz al
3081 movzx eax, al
3082 mov [u32Ret], eax
3083 }
3084 return !!u32Ret;
3085# endif
3086}
3087#endif
3088
3089
3090/**
3091 * Atomically Compare and Exchange a signed 32-bit value, ordered.
3092 *
3093 * @returns true if xchg was done.
3094 * @returns false if xchg wasn't done.
3095 *
3096 * @param pi32 Pointer to the value to update.
3097 * @param i32New The new value to assigned to *pi32.
3098 * @param i32Old The old value to *pi32 compare with.
3099 */
3100DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
3101{
3102 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
3103}
3104
3105
3106/**
3107 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
3108 *
3109 * @returns true if xchg was done.
3110 * @returns false if xchg wasn't done.
3111 *
3112 * @param pu64 Pointer to the 64-bit variable to update.
3113 * @param u64New The 64-bit value to assign to *pu64.
3114 * @param u64Old The value to compare with.
3115 */
3116#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
3117 || (RT_INLINE_ASM_GCC_4_3_X_X86 && defined(IN_RING3) && defined(__PIC__))
3118DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
3119#else
3120DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, uint64_t u64Old)
3121{
3122# if RT_INLINE_ASM_USES_INTRIN
3123 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
3124
3125# elif defined(RT_ARCH_AMD64)
3126# if RT_INLINE_ASM_GNU_STYLE
3127 uint8_t u8Ret;
3128 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3129 "setz %1\n\t"
3130 : "=m" (*pu64),
3131 "=qm" (u8Ret),
3132 "=a" (u64Old)
3133 : "r" (u64New),
3134 "2" (u64Old),
3135 "m" (*pu64));
3136 return (bool)u8Ret;
3137# else
3138 bool fRet;
3139 __asm
3140 {
3141 mov rdx, [pu32]
3142 mov rax, [u64Old]
3143 mov rcx, [u64New]
3144 lock cmpxchg [rdx], rcx
3145 setz al
3146 mov [fRet], al
3147 }
3148 return fRet;
3149# endif
3150# else /* !RT_ARCH_AMD64 */
3151 uint32_t u32Ret;
3152# if RT_INLINE_ASM_GNU_STYLE
3153# if defined(PIC) || defined(__PIC__)
3154 uint32_t u32EBX = (uint32_t)u64New;
3155 uint32_t u32Spill;
3156 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
3157 "lock; cmpxchg8b (%6)\n\t"
3158 "setz %%al\n\t"
3159 "movl %4, %%ebx\n\t"
3160 "movzbl %%al, %%eax\n\t"
3161 : "=a" (u32Ret),
3162 "=d" (u32Spill),
3163# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
3164 "+m" (*pu64)
3165# else
3166 "=m" (*pu64)
3167# endif
3168 : "A" (u64Old),
3169 "m" ( u32EBX ),
3170 "c" ( (uint32_t)(u64New >> 32) ),
3171 "S" (pu64));
3172# else /* !PIC */
3173 uint32_t u32Spill;
3174 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
3175 "setz %%al\n\t"
3176 "movzbl %%al, %%eax\n\t"
3177 : "=a" (u32Ret),
3178 "=d" (u32Spill),
3179 "+m" (*pu64)
3180 : "A" (u64Old),
3181 "b" ( (uint32_t)u64New ),
3182 "c" ( (uint32_t)(u64New >> 32) ));
3183# endif
3184 return (bool)u32Ret;
3185# else
3186 __asm
3187 {
3188 mov ebx, dword ptr [u64New]
3189 mov ecx, dword ptr [u64New + 4]
3190 mov edi, [pu64]
3191 mov eax, dword ptr [u64Old]
3192 mov edx, dword ptr [u64Old + 4]
3193 lock cmpxchg8b [edi]
3194 setz al
3195 movzx eax, al
3196 mov dword ptr [u32Ret], eax
3197 }
3198 return !!u32Ret;
3199# endif
3200# endif /* !RT_ARCH_AMD64 */
3201}
3202#endif
3203
3204
3205/**
3206 * Atomically Compare and exchange a signed 64-bit value, ordered.
3207 *
3208 * @returns true if xchg was done.
3209 * @returns false if xchg wasn't done.
3210 *
3211 * @param pi64 Pointer to the 64-bit variable to update.
3212 * @param i64 The 64-bit value to assign to *pu64.
3213 * @param i64Old The value to compare with.
3214 */
3215DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
3216{
3217 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
3218}
3219
3220
3221/**
3222 * Atomically Compare and Exchange a pointer value, ordered.
3223 *
3224 * @returns true if xchg was done.
3225 * @returns false if xchg wasn't done.
3226 *
3227 * @param ppv Pointer to the value to update.
3228 * @param pvNew The new value to assigned to *ppv.
3229 * @param pvOld The old value to *ppv compare with.
3230 */
3231DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, const void *pvNew, const void *pvOld)
3232{
3233#if ARCH_BITS == 32
3234 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
3235#elif ARCH_BITS == 64
3236 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
3237#else
3238# error "ARCH_BITS is bogus"
3239#endif
3240}
3241
3242
3243/** @def ASMAtomicCmpXchgHandle
3244 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3245 *
3246 * @param ph Pointer to the value to update.
3247 * @param hNew The new value to assigned to *pu.
3248 * @param hOld The old value to *pu compare with.
3249 * @param fRc Where to store the result.
3250 *
3251 * @remarks This doesn't currently work for all handles (like RTFILE).
3252 */
3253#if HC_ARCH_BITS == 32
3254# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
3255 do { \
3256 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3257 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
3258 } while (0)
3259#elif HC_ARCH_BITS == 64
3260# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
3261 do { \
3262 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3263 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
3264 } while (0)
3265#else
3266# error HC_ARCH_BITS
3267#endif
3268
3269
3270/** @def ASMAtomicCmpXchgSize
3271 * Atomically Compare and Exchange a value which size might differ
3272 * between platforms or compilers, ordered.
3273 *
3274 * @param pu Pointer to the value to update.
3275 * @param uNew The new value to assigned to *pu.
3276 * @param uOld The old value to *pu compare with.
3277 * @param fRc Where to store the result.
3278 */
3279#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
3280 do { \
3281 switch (sizeof(*(pu))) { \
3282 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
3283 break; \
3284 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
3285 break; \
3286 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3287 (fRc) = false; \
3288 break; \
3289 } \
3290 } while (0)
3291
3292
3293/**
3294 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
3295 * passes back old value, ordered.
3296 *
3297 * @returns true if xchg was done.
3298 * @returns false if xchg wasn't done.
3299 *
3300 * @param pu32 Pointer to the value to update.
3301 * @param u32New The new value to assigned to *pu32.
3302 * @param u32Old The old value to *pu32 compare with.
3303 * @param pu32Old Pointer store the old value at.
3304 */
3305#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3306DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
3307#else
3308DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
3309{
3310# if RT_INLINE_ASM_GNU_STYLE
3311 uint8_t u8Ret;
3312 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3313 "setz %1\n\t"
3314 : "=m" (*pu32),
3315 "=qm" (u8Ret),
3316 "=a" (*pu32Old)
3317 : "r" (u32New),
3318 "a" (u32Old),
3319 "m" (*pu32));
3320 return (bool)u8Ret;
3321
3322# elif RT_INLINE_ASM_USES_INTRIN
3323 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
3324
3325# else
3326 uint32_t u32Ret;
3327 __asm
3328 {
3329# ifdef RT_ARCH_AMD64
3330 mov rdx, [pu32]
3331# else
3332 mov edx, [pu32]
3333# endif
3334 mov eax, [u32Old]
3335 mov ecx, [u32New]
3336# ifdef RT_ARCH_AMD64
3337 lock cmpxchg [rdx], ecx
3338 mov rdx, [pu32Old]
3339 mov [rdx], eax
3340# else
3341 lock cmpxchg [edx], ecx
3342 mov edx, [pu32Old]
3343 mov [edx], eax
3344# endif
3345 setz al
3346 movzx eax, al
3347 mov [u32Ret], eax
3348 }
3349 return !!u32Ret;
3350# endif
3351}
3352#endif
3353
3354
3355/**
3356 * Atomically Compare and Exchange a signed 32-bit value, additionally
3357 * passes back old value, ordered.
3358 *
3359 * @returns true if xchg was done.
3360 * @returns false if xchg wasn't done.
3361 *
3362 * @param pi32 Pointer to the value to update.
3363 * @param i32New The new value to assigned to *pi32.
3364 * @param i32Old The old value to *pi32 compare with.
3365 * @param pi32Old Pointer store the old value at.
3366 */
3367DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
3368{
3369 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
3370}
3371
3372
3373/**
3374 * Atomically Compare and exchange an unsigned 64-bit value, additionally
3375 * passing back old value, ordered.
3376 *
3377 * @returns true if xchg was done.
3378 * @returns false if xchg wasn't done.
3379 *
3380 * @param pu64 Pointer to the 64-bit variable to update.
3381 * @param u64New The 64-bit value to assign to *pu64.
3382 * @param u64Old The value to compare with.
3383 * @param pu64Old Pointer store the old value at.
3384 */
3385#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3386DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
3387#else
3388DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
3389{
3390# if RT_INLINE_ASM_USES_INTRIN
3391 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
3392
3393# elif defined(RT_ARCH_AMD64)
3394# if RT_INLINE_ASM_GNU_STYLE
3395 uint8_t u8Ret;
3396 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3397 "setz %1\n\t"
3398 : "=m" (*pu64),
3399 "=qm" (u8Ret),
3400 "=a" (*pu64Old)
3401 : "r" (u64New),
3402 "a" (u64Old),
3403 "m" (*pu64));
3404 return (bool)u8Ret;
3405# else
3406 bool fRet;
3407 __asm
3408 {
3409 mov rdx, [pu32]
3410 mov rax, [u64Old]
3411 mov rcx, [u64New]
3412 lock cmpxchg [rdx], rcx
3413 mov rdx, [pu64Old]
3414 mov [rdx], rax
3415 setz al
3416 mov [fRet], al
3417 }
3418 return fRet;
3419# endif
3420# else /* !RT_ARCH_AMD64 */
3421# if RT_INLINE_ASM_GNU_STYLE
3422 uint64_t u64Ret;
3423# if defined(PIC) || defined(__PIC__)
3424 /* NB: this code uses a memory clobber description, because the clean
3425 * solution with an output value for *pu64 makes gcc run out of registers.
3426 * This will cause suboptimal code, and anyone with a better solution is
3427 * welcome to improve this. */
3428 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
3429 "lock; cmpxchg8b %3\n\t"
3430 "xchgl %%ebx, %1\n\t"
3431 : "=A" (u64Ret)
3432 : "DS" ((uint32_t)u64New),
3433 "c" ((uint32_t)(u64New >> 32)),
3434 "m" (*pu64),
3435 "0" (u64Old)
3436 : "memory" );
3437# else /* !PIC */
3438 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
3439 : "=A" (u64Ret),
3440 "=m" (*pu64)
3441 : "b" ((uint32_t)u64New),
3442 "c" ((uint32_t)(u64New >> 32)),
3443 "m" (*pu64),
3444 "0" (u64Old));
3445# endif
3446 *pu64Old = u64Ret;
3447 return u64Ret == u64Old;
3448# else
3449 uint32_t u32Ret;
3450 __asm
3451 {
3452 mov ebx, dword ptr [u64New]
3453 mov ecx, dword ptr [u64New + 4]
3454 mov edi, [pu64]
3455 mov eax, dword ptr [u64Old]
3456 mov edx, dword ptr [u64Old + 4]
3457 lock cmpxchg8b [edi]
3458 mov ebx, [pu64Old]
3459 mov [ebx], eax
3460 setz al
3461 movzx eax, al
3462 add ebx, 4
3463 mov [ebx], edx
3464 mov dword ptr [u32Ret], eax
3465 }
3466 return !!u32Ret;
3467# endif
3468# endif /* !RT_ARCH_AMD64 */
3469}
3470#endif
3471
3472
3473/**
3474 * Atomically Compare and exchange a signed 64-bit value, additionally
3475 * passing back old value, ordered.
3476 *
3477 * @returns true if xchg was done.
3478 * @returns false if xchg wasn't done.
3479 *
3480 * @param pi64 Pointer to the 64-bit variable to update.
3481 * @param i64 The 64-bit value to assign to *pu64.
3482 * @param i64Old The value to compare with.
3483 * @param pi64Old Pointer store the old value at.
3484 */
3485DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
3486{
3487 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
3488}
3489
3490/** @def ASMAtomicCmpXchgExHandle
3491 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3492 *
3493 * @param ph Pointer to the value to update.
3494 * @param hNew The new value to assigned to *pu.
3495 * @param hOld The old value to *pu compare with.
3496 * @param fRc Where to store the result.
3497 * @param phOldVal Pointer to where to store the old value.
3498 *
3499 * @remarks This doesn't currently work for all handles (like RTFILE).
3500 */
3501#if HC_ARCH_BITS == 32
3502# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3503 do { \
3504 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
3505 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
3506 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
3507 } while (0)
3508#elif HC_ARCH_BITS == 64
3509# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3510 do { \
3511 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3512 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
3513 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
3514 } while (0)
3515#else
3516# error HC_ARCH_BITS
3517#endif
3518
3519
3520/** @def ASMAtomicCmpXchgExSize
3521 * Atomically Compare and Exchange a value which size might differ
3522 * between platforms or compilers. Additionally passes back old value.
3523 *
3524 * @param pu Pointer to the value to update.
3525 * @param uNew The new value to assigned to *pu.
3526 * @param uOld The old value to *pu compare with.
3527 * @param fRc Where to store the result.
3528 * @param puOldVal Pointer to where to store the old value.
3529 */
3530#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
3531 do { \
3532 switch (sizeof(*(pu))) { \
3533 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
3534 break; \
3535 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
3536 break; \
3537 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3538 (fRc) = false; \
3539 (uOldVal) = 0; \
3540 break; \
3541 } \
3542 } while (0)
3543
3544
3545/**
3546 * Atomically Compare and Exchange a pointer value, additionally
3547 * passing back old value, ordered.
3548 *
3549 * @returns true if xchg was done.
3550 * @returns false if xchg wasn't done.
3551 *
3552 * @param ppv Pointer to the value to update.
3553 * @param pvNew The new value to assigned to *ppv.
3554 * @param pvOld The old value to *ppv compare with.
3555 * @param ppvOld Pointer store the old value at.
3556 */
3557DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
3558{
3559#if ARCH_BITS == 32
3560 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
3561#elif ARCH_BITS == 64
3562 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
3563#else
3564# error "ARCH_BITS is bogus"
3565#endif
3566}
3567
3568
3569/**
3570 * Atomically exchanges and adds to a 32-bit value, ordered.
3571 *
3572 * @returns The old value.
3573 * @param pu32 Pointer to the value.
3574 * @param u32 Number to add.
3575 */
3576#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3577DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
3578#else
3579DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
3580{
3581# if RT_INLINE_ASM_USES_INTRIN
3582 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
3583 return u32;
3584
3585# elif RT_INLINE_ASM_GNU_STYLE
3586 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3587 : "=r" (u32),
3588 "=m" (*pu32)
3589 : "0" (u32),
3590 "m" (*pu32)
3591 : "memory");
3592 return u32;
3593# else
3594 __asm
3595 {
3596 mov eax, [u32]
3597# ifdef RT_ARCH_AMD64
3598 mov rdx, [pu32]
3599 lock xadd [rdx], eax
3600# else
3601 mov edx, [pu32]
3602 lock xadd [edx], eax
3603# endif
3604 mov [u32], eax
3605 }
3606 return u32;
3607# endif
3608}
3609#endif
3610
3611
3612/**
3613 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3614 *
3615 * @returns The old value.
3616 * @param pi32 Pointer to the value.
3617 * @param i32 Number to add.
3618 */
3619DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
3620{
3621 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
3622}
3623
3624
3625/**
3626 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
3627 *
3628 * @returns The old value.
3629 * @param pu32 Pointer to the value.
3630 * @param u32 Number to subtract.
3631 */
3632DECLINLINE(uint32_t) ASMAtomicSubU32(int32_t volatile *pi32, uint32_t u32)
3633{
3634 return ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-(int32_t)u32);
3635}
3636
3637
3638/**
3639 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
3640 *
3641 * @returns The old value.
3642 * @param pi32 Pointer to the value.
3643 * @param i32 Number to subtract.
3644 */
3645DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
3646{
3647 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
3648}
3649
3650
3651/**
3652 * Atomically increment a 32-bit value, ordered.
3653 *
3654 * @returns The new value.
3655 * @param pu32 Pointer to the value to increment.
3656 */
3657#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3658DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
3659#else
3660DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
3661{
3662 uint32_t u32;
3663# if RT_INLINE_ASM_USES_INTRIN
3664 u32 = _InterlockedIncrement((long *)pu32);
3665 return u32;
3666
3667# elif RT_INLINE_ASM_GNU_STYLE
3668 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3669 : "=r" (u32),
3670 "=m" (*pu32)
3671 : "0" (1),
3672 "m" (*pu32)
3673 : "memory");
3674 return u32+1;
3675# else
3676 __asm
3677 {
3678 mov eax, 1
3679# ifdef RT_ARCH_AMD64
3680 mov rdx, [pu32]
3681 lock xadd [rdx], eax
3682# else
3683 mov edx, [pu32]
3684 lock xadd [edx], eax
3685# endif
3686 mov u32, eax
3687 }
3688 return u32+1;
3689# endif
3690}
3691#endif
3692
3693
3694/**
3695 * Atomically increment a signed 32-bit value, ordered.
3696 *
3697 * @returns The new value.
3698 * @param pi32 Pointer to the value to increment.
3699 */
3700DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
3701{
3702 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
3703}
3704
3705
3706/**
3707 * Atomically decrement an unsigned 32-bit value, ordered.
3708 *
3709 * @returns The new value.
3710 * @param pu32 Pointer to the value to decrement.
3711 */
3712#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3713DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3714#else
3715DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3716{
3717 uint32_t u32;
3718# if RT_INLINE_ASM_USES_INTRIN
3719 u32 = _InterlockedDecrement((long *)pu32);
3720 return u32;
3721
3722# elif RT_INLINE_ASM_GNU_STYLE
3723 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3724 : "=r" (u32),
3725 "=m" (*pu32)
3726 : "0" (-1),
3727 "m" (*pu32)
3728 : "memory");
3729 return u32-1;
3730# else
3731 __asm
3732 {
3733 mov eax, -1
3734# ifdef RT_ARCH_AMD64
3735 mov rdx, [pu32]
3736 lock xadd [rdx], eax
3737# else
3738 mov edx, [pu32]
3739 lock xadd [edx], eax
3740# endif
3741 mov u32, eax
3742 }
3743 return u32-1;
3744# endif
3745}
3746#endif
3747
3748
3749/**
3750 * Atomically decrement a signed 32-bit value, ordered.
3751 *
3752 * @returns The new value.
3753 * @param pi32 Pointer to the value to decrement.
3754 */
3755DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3756{
3757 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3758}
3759
3760
3761/**
3762 * Atomically Or an unsigned 32-bit value, ordered.
3763 *
3764 * @param pu32 Pointer to the pointer variable to OR u32 with.
3765 * @param u32 The value to OR *pu32 with.
3766 */
3767#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3768DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3769#else
3770DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3771{
3772# if RT_INLINE_ASM_USES_INTRIN
3773 _InterlockedOr((long volatile *)pu32, (long)u32);
3774
3775# elif RT_INLINE_ASM_GNU_STYLE
3776 __asm__ __volatile__("lock; orl %1, %0\n\t"
3777 : "=m" (*pu32)
3778 : "ir" (u32),
3779 "m" (*pu32));
3780# else
3781 __asm
3782 {
3783 mov eax, [u32]
3784# ifdef RT_ARCH_AMD64
3785 mov rdx, [pu32]
3786 lock or [rdx], eax
3787# else
3788 mov edx, [pu32]
3789 lock or [edx], eax
3790# endif
3791 }
3792# endif
3793}
3794#endif
3795
3796
3797/**
3798 * Atomically Or a signed 32-bit value, ordered.
3799 *
3800 * @param pi32 Pointer to the pointer variable to OR u32 with.
3801 * @param i32 The value to OR *pu32 with.
3802 */
3803DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3804{
3805 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3806}
3807
3808
3809/**
3810 * Atomically And an unsigned 32-bit value, ordered.
3811 *
3812 * @param pu32 Pointer to the pointer variable to AND u32 with.
3813 * @param u32 The value to AND *pu32 with.
3814 */
3815#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3816DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3817#else
3818DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3819{
3820# if RT_INLINE_ASM_USES_INTRIN
3821 _InterlockedAnd((long volatile *)pu32, u32);
3822
3823# elif RT_INLINE_ASM_GNU_STYLE
3824 __asm__ __volatile__("lock; andl %1, %0\n\t"
3825 : "=m" (*pu32)
3826 : "ir" (u32),
3827 "m" (*pu32));
3828# else
3829 __asm
3830 {
3831 mov eax, [u32]
3832# ifdef RT_ARCH_AMD64
3833 mov rdx, [pu32]
3834 lock and [rdx], eax
3835# else
3836 mov edx, [pu32]
3837 lock and [edx], eax
3838# endif
3839 }
3840# endif
3841}
3842#endif
3843
3844
3845/**
3846 * Atomically And a signed 32-bit value, ordered.
3847 *
3848 * @param pi32 Pointer to the pointer variable to AND i32 with.
3849 * @param i32 The value to AND *pi32 with.
3850 */
3851DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3852{
3853 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3854}
3855
3856
3857/**
3858 * Serialize Instruction.
3859 */
3860#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3861DECLASM(void) ASMSerializeInstruction(void);
3862#else
3863DECLINLINE(void) ASMSerializeInstruction(void)
3864{
3865# if RT_INLINE_ASM_GNU_STYLE
3866 RTCCUINTREG xAX = 0;
3867# ifdef RT_ARCH_AMD64
3868 __asm__ ("cpuid"
3869 : "=a" (xAX)
3870 : "0" (xAX)
3871 : "rbx", "rcx", "rdx");
3872# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
3873 __asm__ ("push %%ebx\n\t"
3874 "cpuid\n\t"
3875 "pop %%ebx\n\t"
3876 : "=a" (xAX)
3877 : "0" (xAX)
3878 : "ecx", "edx");
3879# else
3880 __asm__ ("cpuid"
3881 : "=a" (xAX)
3882 : "0" (xAX)
3883 : "ebx", "ecx", "edx");
3884# endif
3885
3886# elif RT_INLINE_ASM_USES_INTRIN
3887 int aInfo[4];
3888 __cpuid(aInfo, 0);
3889
3890# else
3891 __asm
3892 {
3893 push ebx
3894 xor eax, eax
3895 cpuid
3896 pop ebx
3897 }
3898# endif
3899}
3900#endif
3901
3902
3903/**
3904 * Memory load/store fence, waits for any pending writes and reads to complete.
3905 * Requires the X86_CPUID_FEATURE_EDX_SSE2 CPUID bit set.
3906 */
3907DECLINLINE(void) ASMMemoryFenceSSE2(void)
3908{
3909#if RT_INLINE_ASM_GNU_STYLE
3910 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
3911#elif RT_INLINE_ASM_USES_INTRIN
3912 _mm_mfence();
3913#else
3914 __asm
3915 {
3916 _emit 0x0f
3917 _emit 0xae
3918 _emit 0xf0
3919 }
3920#endif
3921}
3922
3923
3924/**
3925 * Memory store fence, waits for any writes to complete.
3926 * Requires the X86_CPUID_FEATURE_EDX_SSE CPUID bit set.
3927 */
3928DECLINLINE(void) ASMWriteFenceSSE(void)
3929{
3930#if RT_INLINE_ASM_GNU_STYLE
3931 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
3932#elif RT_INLINE_ASM_USES_INTRIN
3933 _mm_sfence();
3934#else
3935 __asm
3936 {
3937 _emit 0x0f
3938 _emit 0xae
3939 _emit 0xf8
3940 }
3941#endif
3942}
3943
3944
3945/**
3946 * Memory load fence, waits for any pending reads to complete.
3947 * Requires the X86_CPUID_FEATURE_EDX_SSE2 CPUID bit set.
3948 */
3949DECLINLINE(void) ASMReadFenceSSE2(void)
3950{
3951#if RT_INLINE_ASM_GNU_STYLE
3952 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
3953#elif RT_INLINE_ASM_USES_INTRIN
3954 _mm_lfence();
3955#else
3956 __asm
3957 {
3958 _emit 0x0f
3959 _emit 0xae
3960 _emit 0xe8
3961 }
3962#endif
3963}
3964
3965
3966/**
3967 * Memory fence, waits for any pending writes and reads to complete.
3968 */
3969DECLINLINE(void) ASMMemoryFence(void)
3970{
3971 /** @todo use mfence? check if all cpus we care for support it. */
3972 uint32_t volatile u32;
3973 ASMAtomicXchgU32(&u32, 0);
3974}
3975
3976
3977/**
3978 * Write fence, waits for any pending writes to complete.
3979 */
3980DECLINLINE(void) ASMWriteFence(void)
3981{
3982 /** @todo use sfence? check if all cpus we care for support it. */
3983 ASMMemoryFence();
3984}
3985
3986
3987/**
3988 * Read fence, waits for any pending reads to complete.
3989 */
3990DECLINLINE(void) ASMReadFence(void)
3991{
3992 /** @todo use lfence? check if all cpus we care for support it. */
3993 ASMMemoryFence();
3994}
3995
3996
3997/**
3998 * Atomically reads an unsigned 8-bit value, ordered.
3999 *
4000 * @returns Current *pu8 value
4001 * @param pu8 Pointer to the 8-bit variable to read.
4002 */
4003DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
4004{
4005 ASMMemoryFence();
4006 return *pu8; /* byte reads are atomic on x86 */
4007}
4008
4009
4010/**
4011 * Atomically reads an unsigned 8-bit value, unordered.
4012 *
4013 * @returns Current *pu8 value
4014 * @param pu8 Pointer to the 8-bit variable to read.
4015 */
4016DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
4017{
4018 return *pu8; /* byte reads are atomic on x86 */
4019}
4020
4021
4022/**
4023 * Atomically reads a signed 8-bit value, ordered.
4024 *
4025 * @returns Current *pi8 value
4026 * @param pi8 Pointer to the 8-bit variable to read.
4027 */
4028DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
4029{
4030 ASMMemoryFence();
4031 return *pi8; /* byte reads are atomic on x86 */
4032}
4033
4034
4035/**
4036 * Atomically reads a signed 8-bit value, unordered.
4037 *
4038 * @returns Current *pi8 value
4039 * @param pi8 Pointer to the 8-bit variable to read.
4040 */
4041DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
4042{
4043 return *pi8; /* byte reads are atomic on x86 */
4044}
4045
4046
4047/**
4048 * Atomically reads an unsigned 16-bit value, ordered.
4049 *
4050 * @returns Current *pu16 value
4051 * @param pu16 Pointer to the 16-bit variable to read.
4052 */
4053DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
4054{
4055 ASMMemoryFence();
4056 Assert(!((uintptr_t)pu16 & 1));
4057 return *pu16;
4058}
4059
4060
4061/**
4062 * Atomically reads an unsigned 16-bit value, unordered.
4063 *
4064 * @returns Current *pu16 value
4065 * @param pu16 Pointer to the 16-bit variable to read.
4066 */
4067DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
4068{
4069 Assert(!((uintptr_t)pu16 & 1));
4070 return *pu16;
4071}
4072
4073
4074/**
4075 * Atomically reads a signed 16-bit value, ordered.
4076 *
4077 * @returns Current *pi16 value
4078 * @param pi16 Pointer to the 16-bit variable to read.
4079 */
4080DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
4081{
4082 ASMMemoryFence();
4083 Assert(!((uintptr_t)pi16 & 1));
4084 return *pi16;
4085}
4086
4087
4088/**
4089 * Atomically reads a signed 16-bit value, unordered.
4090 *
4091 * @returns Current *pi16 value
4092 * @param pi16 Pointer to the 16-bit variable to read.
4093 */
4094DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
4095{
4096 Assert(!((uintptr_t)pi16 & 1));
4097 return *pi16;
4098}
4099
4100
4101/**
4102 * Atomically reads an unsigned 32-bit value, ordered.
4103 *
4104 * @returns Current *pu32 value
4105 * @param pu32 Pointer to the 32-bit variable to read.
4106 */
4107DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
4108{
4109 ASMMemoryFence();
4110 Assert(!((uintptr_t)pu32 & 3));
4111 return *pu32;
4112}
4113
4114
4115/**
4116 * Atomically reads an unsigned 32-bit value, unordered.
4117 *
4118 * @returns Current *pu32 value
4119 * @param pu32 Pointer to the 32-bit variable to read.
4120 */
4121DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
4122{
4123 Assert(!((uintptr_t)pu32 & 3));
4124 return *pu32;
4125}
4126
4127
4128/**
4129 * Atomically reads a signed 32-bit value, ordered.
4130 *
4131 * @returns Current *pi32 value
4132 * @param pi32 Pointer to the 32-bit variable to read.
4133 */
4134DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
4135{
4136 ASMMemoryFence();
4137 Assert(!((uintptr_t)pi32 & 3));
4138 return *pi32;
4139}
4140
4141
4142/**
4143 * Atomically reads a signed 32-bit value, unordered.
4144 *
4145 * @returns Current *pi32 value
4146 * @param pi32 Pointer to the 32-bit variable to read.
4147 */
4148DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
4149{
4150 Assert(!((uintptr_t)pi32 & 3));
4151 return *pi32;
4152}
4153
4154
4155/**
4156 * Atomically reads an unsigned 64-bit value, ordered.
4157 *
4158 * @returns Current *pu64 value
4159 * @param pu64 Pointer to the 64-bit variable to read.
4160 * The memory pointed to must be writable.
4161 * @remark This will fault if the memory is read-only!
4162 */
4163#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
4164 || (RT_INLINE_ASM_GCC_4_3_X_X86 && defined(IN_RING3) && defined(__PIC__))
4165DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
4166#else
4167DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
4168{
4169 uint64_t u64;
4170# ifdef RT_ARCH_AMD64
4171 Assert(!((uintptr_t)pu64 & 7));
4172/*# if RT_INLINE_ASM_GNU_STYLE
4173 __asm__ __volatile__( "mfence\n\t"
4174 "movq %1, %0\n\t"
4175 : "=r" (u64)
4176 : "m" (*pu64));
4177# else
4178 __asm
4179 {
4180 mfence
4181 mov rdx, [pu64]
4182 mov rax, [rdx]
4183 mov [u64], rax
4184 }
4185# endif*/
4186 ASMMemoryFence();
4187 u64 = *pu64;
4188# else /* !RT_ARCH_AMD64 */
4189# if RT_INLINE_ASM_GNU_STYLE
4190# if defined(PIC) || defined(__PIC__)
4191 uint32_t u32EBX = 0;
4192 Assert(!((uintptr_t)pu64 & 7));
4193 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
4194 "lock; cmpxchg8b (%5)\n\t"
4195 "movl %3, %%ebx\n\t"
4196 : "=A" (u64),
4197# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4198 "+m" (*pu64)
4199# else
4200 "=m" (*pu64)
4201# endif
4202 : "0" (0),
4203 "m" (u32EBX),
4204 "c" (0),
4205 "S" (pu64));
4206# else /* !PIC */
4207 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
4208 : "=A" (u64),
4209 "+m" (*pu64)
4210 : "0" (0),
4211 "b" (0),
4212 "c" (0));
4213# endif
4214# else
4215 Assert(!((uintptr_t)pu64 & 7));
4216 __asm
4217 {
4218 xor eax, eax
4219 xor edx, edx
4220 mov edi, pu64
4221 xor ecx, ecx
4222 xor ebx, ebx
4223 lock cmpxchg8b [edi]
4224 mov dword ptr [u64], eax
4225 mov dword ptr [u64 + 4], edx
4226 }
4227# endif
4228# endif /* !RT_ARCH_AMD64 */
4229 return u64;
4230}
4231#endif
4232
4233
4234/**
4235 * Atomically reads an unsigned 64-bit value, unordered.
4236 *
4237 * @returns Current *pu64 value
4238 * @param pu64 Pointer to the 64-bit variable to read.
4239 * The memory pointed to must be writable.
4240 * @remark This will fault if the memory is read-only!
4241 */
4242#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4243DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
4244#else
4245DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
4246{
4247 uint64_t u64;
4248# ifdef RT_ARCH_AMD64
4249 Assert(!((uintptr_t)pu64 & 7));
4250/*# if RT_INLINE_ASM_GNU_STYLE
4251 Assert(!((uintptr_t)pu64 & 7));
4252 __asm__ __volatile__("movq %1, %0\n\t"
4253 : "=r" (u64)
4254 : "m" (*pu64));
4255# else
4256 __asm
4257 {
4258 mov rdx, [pu64]
4259 mov rax, [rdx]
4260 mov [u64], rax
4261 }
4262# endif */
4263 u64 = *pu64;
4264# else /* !RT_ARCH_AMD64 */
4265# if RT_INLINE_ASM_GNU_STYLE
4266# if defined(PIC) || defined(__PIC__)
4267 uint32_t u32EBX = 0;
4268 uint32_t u32Spill;
4269 Assert(!((uintptr_t)pu64 & 7));
4270 __asm__ __volatile__("xor %%eax,%%eax\n\t"
4271 "xor %%ecx,%%ecx\n\t"
4272 "xor %%edx,%%edx\n\t"
4273 "xchgl %%ebx, %3\n\t"
4274 "lock; cmpxchg8b (%4)\n\t"
4275 "movl %3, %%ebx\n\t"
4276 : "=A" (u64),
4277# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4278 "+m" (*pu64),
4279# else
4280 "=m" (*pu64),
4281# endif
4282 "=c" (u32Spill)
4283 : "m" (u32EBX),
4284 "S" (pu64));
4285# else /* !PIC */
4286 __asm__ __volatile__("cmpxchg8b %1\n\t"
4287 : "=A" (u64),
4288 "+m" (*pu64)
4289 : "0" (0),
4290 "b" (0),
4291 "c" (0));
4292# endif
4293# else
4294 Assert(!((uintptr_t)pu64 & 7));
4295 __asm
4296 {
4297 xor eax, eax
4298 xor edx, edx
4299 mov edi, pu64
4300 xor ecx, ecx
4301 xor ebx, ebx
4302 lock cmpxchg8b [edi]
4303 mov dword ptr [u64], eax
4304 mov dword ptr [u64 + 4], edx
4305 }
4306# endif
4307# endif /* !RT_ARCH_AMD64 */
4308 return u64;
4309}
4310#endif
4311
4312
4313/**
4314 * Atomically reads a signed 64-bit value, ordered.
4315 *
4316 * @returns Current *pi64 value
4317 * @param pi64 Pointer to the 64-bit variable to read.
4318 * The memory pointed to must be writable.
4319 * @remark This will fault if the memory is read-only!
4320 */
4321DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
4322{
4323 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
4324}
4325
4326
4327/**
4328 * Atomically reads a signed 64-bit value, unordered.
4329 *
4330 * @returns Current *pi64 value
4331 * @param pi64 Pointer to the 64-bit variable to read.
4332 * The memory pointed to must be writable.
4333 * @remark This will fault if the memory is read-only!
4334 */
4335DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
4336{
4337 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
4338}
4339
4340
4341/**
4342 * Atomically reads a pointer value, ordered.
4343 *
4344 * @returns Current *pv value
4345 * @param ppv Pointer to the pointer variable to read.
4346 */
4347DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
4348{
4349#if ARCH_BITS == 32
4350 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
4351#elif ARCH_BITS == 64
4352 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
4353#else
4354# error "ARCH_BITS is bogus"
4355#endif
4356}
4357
4358
4359/**
4360 * Atomically reads a pointer value, unordered.
4361 *
4362 * @returns Current *pv value
4363 * @param ppv Pointer to the pointer variable to read.
4364 */
4365DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
4366{
4367#if ARCH_BITS == 32
4368 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
4369#elif ARCH_BITS == 64
4370 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
4371#else
4372# error "ARCH_BITS is bogus"
4373#endif
4374}
4375
4376
4377/**
4378 * Atomically reads a boolean value, ordered.
4379 *
4380 * @returns Current *pf value
4381 * @param pf Pointer to the boolean variable to read.
4382 */
4383DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
4384{
4385 ASMMemoryFence();
4386 return *pf; /* byte reads are atomic on x86 */
4387}
4388
4389
4390/**
4391 * Atomically reads a boolean value, unordered.
4392 *
4393 * @returns Current *pf value
4394 * @param pf Pointer to the boolean variable to read.
4395 */
4396DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
4397{
4398 return *pf; /* byte reads are atomic on x86 */
4399}
4400
4401
4402/**
4403 * Atomically read a typical IPRT handle value, ordered.
4404 *
4405 * @param ph Pointer to the handle variable to read.
4406 * @param phRes Where to store the result.
4407 *
4408 * @remarks This doesn't currently work for all handles (like RTFILE).
4409 */
4410#if HC_ARCH_BITS == 32
4411# define ASMAtomicReadHandle(ph, phRes) \
4412 do { \
4413 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4414 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
4415 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
4416 } while (0)
4417#elif HC_ARCH_BITS == 64
4418# define ASMAtomicReadHandle(ph, phRes) \
4419 do { \
4420 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4421 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
4422 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
4423 } while (0)
4424#else
4425# error HC_ARCH_BITS
4426#endif
4427
4428
4429/**
4430 * Atomically read a typical IPRT handle value, unordered.
4431 *
4432 * @param ph Pointer to the handle variable to read.
4433 * @param phRes Where to store the result.
4434 *
4435 * @remarks This doesn't currently work for all handles (like RTFILE).
4436 */
4437#if HC_ARCH_BITS == 32
4438# define ASMAtomicUoReadHandle(ph, phRes) \
4439 do { \
4440 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4441 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
4442 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
4443 } while (0)
4444#elif HC_ARCH_BITS == 64
4445# define ASMAtomicUoReadHandle(ph, phRes) \
4446 do { \
4447 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4448 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
4449 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
4450 } while (0)
4451#else
4452# error HC_ARCH_BITS
4453#endif
4454
4455
4456/**
4457 * Atomically read a value which size might differ
4458 * between platforms or compilers, ordered.
4459 *
4460 * @param pu Pointer to the variable to update.
4461 * @param puRes Where to store the result.
4462 */
4463#define ASMAtomicReadSize(pu, puRes) \
4464 do { \
4465 switch (sizeof(*(pu))) { \
4466 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4467 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
4468 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
4469 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
4470 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4471 } \
4472 } while (0)
4473
4474
4475/**
4476 * Atomically read a value which size might differ
4477 * between platforms or compilers, unordered.
4478 *
4479 * @param pu Pointer to the variable to read.
4480 * @param puRes Where to store the result.
4481 */
4482#define ASMAtomicUoReadSize(pu, puRes) \
4483 do { \
4484 switch (sizeof(*(pu))) { \
4485 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4486 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
4487 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
4488 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
4489 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4490 } \
4491 } while (0)
4492
4493
4494/**
4495 * Atomically writes an unsigned 8-bit value, ordered.
4496 *
4497 * @param pu8 Pointer to the 8-bit variable.
4498 * @param u8 The 8-bit value to assign to *pu8.
4499 */
4500DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
4501{
4502 ASMAtomicXchgU8(pu8, u8);
4503}
4504
4505
4506/**
4507 * Atomically writes an unsigned 8-bit value, unordered.
4508 *
4509 * @param pu8 Pointer to the 8-bit variable.
4510 * @param u8 The 8-bit value to assign to *pu8.
4511 */
4512DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
4513{
4514 *pu8 = u8; /* byte writes are atomic on x86 */
4515}
4516
4517
4518/**
4519 * Atomically writes a signed 8-bit value, ordered.
4520 *
4521 * @param pi8 Pointer to the 8-bit variable to read.
4522 * @param i8 The 8-bit value to assign to *pi8.
4523 */
4524DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
4525{
4526 ASMAtomicXchgS8(pi8, i8);
4527}
4528
4529
4530/**
4531 * Atomically writes a signed 8-bit value, unordered.
4532 *
4533 * @param pi8 Pointer to the 8-bit variable to read.
4534 * @param i8 The 8-bit value to assign to *pi8.
4535 */
4536DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
4537{
4538 *pi8 = i8; /* byte writes are atomic on x86 */
4539}
4540
4541
4542/**
4543 * Atomically writes an unsigned 16-bit value, ordered.
4544 *
4545 * @param pu16 Pointer to the 16-bit variable.
4546 * @param u16 The 16-bit value to assign to *pu16.
4547 */
4548DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
4549{
4550 ASMAtomicXchgU16(pu16, u16);
4551}
4552
4553
4554/**
4555 * Atomically writes an unsigned 16-bit value, unordered.
4556 *
4557 * @param pu16 Pointer to the 16-bit variable.
4558 * @param u16 The 16-bit value to assign to *pu16.
4559 */
4560DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
4561{
4562 Assert(!((uintptr_t)pu16 & 1));
4563 *pu16 = u16;
4564}
4565
4566
4567/**
4568 * Atomically writes a signed 16-bit value, ordered.
4569 *
4570 * @param pi16 Pointer to the 16-bit variable to read.
4571 * @param i16 The 16-bit value to assign to *pi16.
4572 */
4573DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
4574{
4575 ASMAtomicXchgS16(pi16, i16);
4576}
4577
4578
4579/**
4580 * Atomically writes a signed 16-bit value, unordered.
4581 *
4582 * @param pi16 Pointer to the 16-bit variable to read.
4583 * @param i16 The 16-bit value to assign to *pi16.
4584 */
4585DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
4586{
4587 Assert(!((uintptr_t)pi16 & 1));
4588 *pi16 = i16;
4589}
4590
4591
4592/**
4593 * Atomically writes an unsigned 32-bit value, ordered.
4594 *
4595 * @param pu32 Pointer to the 32-bit variable.
4596 * @param u32 The 32-bit value to assign to *pu32.
4597 */
4598DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
4599{
4600 ASMAtomicXchgU32(pu32, u32);
4601}
4602
4603
4604/**
4605 * Atomically writes an unsigned 32-bit value, unordered.
4606 *
4607 * @param pu32 Pointer to the 32-bit variable.
4608 * @param u32 The 32-bit value to assign to *pu32.
4609 */
4610DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
4611{
4612 Assert(!((uintptr_t)pu32 & 3));
4613 *pu32 = u32;
4614}
4615
4616
4617/**
4618 * Atomically writes a signed 32-bit value, ordered.
4619 *
4620 * @param pi32 Pointer to the 32-bit variable to read.
4621 * @param i32 The 32-bit value to assign to *pi32.
4622 */
4623DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
4624{
4625 ASMAtomicXchgS32(pi32, i32);
4626}
4627
4628
4629/**
4630 * Atomically writes a signed 32-bit value, unordered.
4631 *
4632 * @param pi32 Pointer to the 32-bit variable to read.
4633 * @param i32 The 32-bit value to assign to *pi32.
4634 */
4635DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
4636{
4637 Assert(!((uintptr_t)pi32 & 3));
4638 *pi32 = i32;
4639}
4640
4641
4642/**
4643 * Atomically writes an unsigned 64-bit value, ordered.
4644 *
4645 * @param pu64 Pointer to the 64-bit variable.
4646 * @param u64 The 64-bit value to assign to *pu64.
4647 */
4648DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
4649{
4650 ASMAtomicXchgU64(pu64, u64);
4651}
4652
4653
4654/**
4655 * Atomically writes an unsigned 64-bit value, unordered.
4656 *
4657 * @param pu64 Pointer to the 64-bit variable.
4658 * @param u64 The 64-bit value to assign to *pu64.
4659 */
4660DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
4661{
4662 Assert(!((uintptr_t)pu64 & 7));
4663#if ARCH_BITS == 64
4664 *pu64 = u64;
4665#else
4666 ASMAtomicXchgU64(pu64, u64);
4667#endif
4668}
4669
4670
4671/**
4672 * Atomically writes a signed 64-bit value, ordered.
4673 *
4674 * @param pi64 Pointer to the 64-bit variable.
4675 * @param i64 The 64-bit value to assign to *pi64.
4676 */
4677DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
4678{
4679 ASMAtomicXchgS64(pi64, i64);
4680}
4681
4682
4683/**
4684 * Atomically writes a signed 64-bit value, unordered.
4685 *
4686 * @param pi64 Pointer to the 64-bit variable.
4687 * @param i64 The 64-bit value to assign to *pi64.
4688 */
4689DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
4690{
4691 Assert(!((uintptr_t)pi64 & 7));
4692#if ARCH_BITS == 64
4693 *pi64 = i64;
4694#else
4695 ASMAtomicXchgS64(pi64, i64);
4696#endif
4697}
4698
4699
4700/**
4701 * Atomically writes a boolean value, unordered.
4702 *
4703 * @param pf Pointer to the boolean variable.
4704 * @param f The boolean value to assign to *pf.
4705 */
4706DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
4707{
4708 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
4709}
4710
4711
4712/**
4713 * Atomically writes a boolean value, unordered.
4714 *
4715 * @param pf Pointer to the boolean variable.
4716 * @param f The boolean value to assign to *pf.
4717 */
4718DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
4719{
4720 *pf = f; /* byte writes are atomic on x86 */
4721}
4722
4723
4724/**
4725 * Atomically writes a pointer value, ordered.
4726 *
4727 * @returns Current *pv value
4728 * @param ppv Pointer to the pointer variable.
4729 * @param pv The pointer value to assigne to *ppv.
4730 */
4731DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, const void *pv)
4732{
4733#if ARCH_BITS == 32
4734 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4735#elif ARCH_BITS == 64
4736 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4737#else
4738# error "ARCH_BITS is bogus"
4739#endif
4740}
4741
4742
4743/**
4744 * Atomically writes a pointer value, unordered.
4745 *
4746 * @returns Current *pv value
4747 * @param ppv Pointer to the pointer variable.
4748 * @param pv The pointer value to assigne to *ppv.
4749 */
4750DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, const void *pv)
4751{
4752#if ARCH_BITS == 32
4753 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4754#elif ARCH_BITS == 64
4755 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4756#else
4757# error "ARCH_BITS is bogus"
4758#endif
4759}
4760
4761
4762/**
4763 * Atomically write a typical IPRT handle value, ordered.
4764 *
4765 * @param ph Pointer to the variable to update.
4766 * @param hNew The value to assign to *ph.
4767 *
4768 * @remarks This doesn't currently work for all handles (like RTFILE).
4769 */
4770#if HC_ARCH_BITS == 32
4771# define ASMAtomicWriteHandle(ph, hNew) \
4772 do { \
4773 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4774 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
4775 } while (0)
4776#elif HC_ARCH_BITS == 64
4777# define ASMAtomicWriteHandle(ph, hNew) \
4778 do { \
4779 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4780 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
4781 } while (0)
4782#else
4783# error HC_ARCH_BITS
4784#endif
4785
4786
4787/**
4788 * Atomically write a typical IPRT handle value, unordered.
4789 *
4790 * @param ph Pointer to the variable to update.
4791 * @param hNew The value to assign to *ph.
4792 *
4793 * @remarks This doesn't currently work for all handles (like RTFILE).
4794 */
4795#if HC_ARCH_BITS == 32
4796# define ASMAtomicUoWriteHandle(ph, hNew) \
4797 do { \
4798 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4799 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
4800 } while (0)
4801#elif HC_ARCH_BITS == 64
4802# define ASMAtomicUoWriteHandle(ph, hNew) \
4803 do { \
4804 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4805 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
4806 } while (0)
4807#else
4808# error HC_ARCH_BITS
4809#endif
4810
4811
4812/**
4813 * Atomically write a value which size might differ
4814 * between platforms or compilers, ordered.
4815 *
4816 * @param pu Pointer to the variable to update.
4817 * @param uNew The value to assign to *pu.
4818 */
4819#define ASMAtomicWriteSize(pu, uNew) \
4820 do { \
4821 switch (sizeof(*(pu))) { \
4822 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4823 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4824 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4825 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4826 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4827 } \
4828 } while (0)
4829
4830/**
4831 * Atomically write a value which size might differ
4832 * between platforms or compilers, unordered.
4833 *
4834 * @param pu Pointer to the variable to update.
4835 * @param uNew The value to assign to *pu.
4836 */
4837#define ASMAtomicUoWriteSize(pu, uNew) \
4838 do { \
4839 switch (sizeof(*(pu))) { \
4840 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4841 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4842 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4843 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4844 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4845 } \
4846 } while (0)
4847
4848
4849
4850
4851/**
4852 * Invalidate page.
4853 *
4854 * @param pv Address of the page to invalidate.
4855 */
4856#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4857DECLASM(void) ASMInvalidatePage(void *pv);
4858#else
4859DECLINLINE(void) ASMInvalidatePage(void *pv)
4860{
4861# if RT_INLINE_ASM_USES_INTRIN
4862 __invlpg(pv);
4863
4864# elif RT_INLINE_ASM_GNU_STYLE
4865 __asm__ __volatile__("invlpg %0\n\t"
4866 : : "m" (*(uint8_t *)pv));
4867# else
4868 __asm
4869 {
4870# ifdef RT_ARCH_AMD64
4871 mov rax, [pv]
4872 invlpg [rax]
4873# else
4874 mov eax, [pv]
4875 invlpg [eax]
4876# endif
4877 }
4878# endif
4879}
4880#endif
4881
4882
4883/**
4884 * Write back the internal caches and invalidate them.
4885 */
4886#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4887DECLASM(void) ASMWriteBackAndInvalidateCaches(void);
4888#else
4889DECLINLINE(void) ASMWriteBackAndInvalidateCaches(void)
4890{
4891# if RT_INLINE_ASM_USES_INTRIN
4892 __wbinvd();
4893
4894# elif RT_INLINE_ASM_GNU_STYLE
4895 __asm__ __volatile__("wbinvd");
4896# else
4897 __asm
4898 {
4899 wbinvd
4900 }
4901# endif
4902}
4903#endif
4904
4905
4906/**
4907 * Invalidate internal and (perhaps) external caches without first
4908 * flushing dirty cache lines. Use with extreme care.
4909 */
4910#if RT_INLINE_ASM_EXTERNAL
4911DECLASM(void) ASMInvalidateInternalCaches(void);
4912#else
4913DECLINLINE(void) ASMInvalidateInternalCaches(void)
4914{
4915# if RT_INLINE_ASM_GNU_STYLE
4916 __asm__ __volatile__("invd");
4917# else
4918 __asm
4919 {
4920 invd
4921 }
4922# endif
4923}
4924#endif
4925
4926
4927#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
4928# if PAGE_SIZE != 0x1000
4929# error "PAGE_SIZE is not 0x1000!"
4930# endif
4931#endif
4932
4933/**
4934 * Zeros a 4K memory page.
4935 *
4936 * @param pv Pointer to the memory block. This must be page aligned.
4937 */
4938#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4939DECLASM(void) ASMMemZeroPage(volatile void *pv);
4940# else
4941DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
4942{
4943# if RT_INLINE_ASM_USES_INTRIN
4944# ifdef RT_ARCH_AMD64
4945 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
4946# else
4947 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
4948# endif
4949
4950# elif RT_INLINE_ASM_GNU_STYLE
4951 RTCCUINTREG uDummy;
4952# ifdef RT_ARCH_AMD64
4953 __asm__ __volatile__("rep stosq"
4954 : "=D" (pv),
4955 "=c" (uDummy)
4956 : "0" (pv),
4957 "c" (0x1000 >> 3),
4958 "a" (0)
4959 : "memory");
4960# else
4961 __asm__ __volatile__("rep stosl"
4962 : "=D" (pv),
4963 "=c" (uDummy)
4964 : "0" (pv),
4965 "c" (0x1000 >> 2),
4966 "a" (0)
4967 : "memory");
4968# endif
4969# else
4970 __asm
4971 {
4972# ifdef RT_ARCH_AMD64
4973 xor rax, rax
4974 mov ecx, 0200h
4975 mov rdi, [pv]
4976 rep stosq
4977# else
4978 xor eax, eax
4979 mov ecx, 0400h
4980 mov edi, [pv]
4981 rep stosd
4982# endif
4983 }
4984# endif
4985}
4986# endif
4987
4988
4989/**
4990 * Zeros a memory block with a 32-bit aligned size.
4991 *
4992 * @param pv Pointer to the memory block.
4993 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4994 */
4995#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4996DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
4997#else
4998DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
4999{
5000# if RT_INLINE_ASM_USES_INTRIN
5001# ifdef RT_ARCH_AMD64
5002 if (!(cb & 7))
5003 __stosq((unsigned __int64 *)pv, 0, cb / 8);
5004 else
5005# endif
5006 __stosd((unsigned long *)pv, 0, cb / 4);
5007
5008# elif RT_INLINE_ASM_GNU_STYLE
5009 __asm__ __volatile__("rep stosl"
5010 : "=D" (pv),
5011 "=c" (cb)
5012 : "0" (pv),
5013 "1" (cb >> 2),
5014 "a" (0)
5015 : "memory");
5016# else
5017 __asm
5018 {
5019 xor eax, eax
5020# ifdef RT_ARCH_AMD64
5021 mov rcx, [cb]
5022 shr rcx, 2
5023 mov rdi, [pv]
5024# else
5025 mov ecx, [cb]
5026 shr ecx, 2
5027 mov edi, [pv]
5028# endif
5029 rep stosd
5030 }
5031# endif
5032}
5033#endif
5034
5035
5036/**
5037 * Fills a memory block with a 32-bit aligned size.
5038 *
5039 * @param pv Pointer to the memory block.
5040 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5041 * @param u32 The value to fill with.
5042 */
5043#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5044DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
5045#else
5046DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
5047{
5048# if RT_INLINE_ASM_USES_INTRIN
5049# ifdef RT_ARCH_AMD64
5050 if (!(cb & 7))
5051 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
5052 else
5053# endif
5054 __stosd((unsigned long *)pv, u32, cb / 4);
5055
5056# elif RT_INLINE_ASM_GNU_STYLE
5057 __asm__ __volatile__("rep stosl"
5058 : "=D" (pv),
5059 "=c" (cb)
5060 : "0" (pv),
5061 "1" (cb >> 2),
5062 "a" (u32)
5063 : "memory");
5064# else
5065 __asm
5066 {
5067# ifdef RT_ARCH_AMD64
5068 mov rcx, [cb]
5069 shr rcx, 2
5070 mov rdi, [pv]
5071# else
5072 mov ecx, [cb]
5073 shr ecx, 2
5074 mov edi, [pv]
5075# endif
5076 mov eax, [u32]
5077 rep stosd
5078 }
5079# endif
5080}
5081#endif
5082
5083
5084/**
5085 * Checks if a memory page is all zeros.
5086 *
5087 * @returns true / false.
5088 *
5089 * @param pvPage Pointer to the page. Must be aligned on 16 byte
5090 * boundrary
5091 */
5092DECLINLINE(bool) ASMMemIsZeroPage(void const *pvPage)
5093{
5094# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
5095 union { RTCCUINTREG r; bool f; } uAX;
5096 RTCCUINTREG xCX, xDI;
5097 Assert(!((uintptr_t)pvPage & 15));
5098 __asm__ __volatile__("repe; "
5099# ifdef RT_ARCH_AMD64
5100 "scasq\n\t"
5101# else
5102 "scasl\n\t"
5103# endif
5104 "setnc %%al\n\t"
5105 : "=&c" (xCX),
5106 "=&D" (xDI),
5107 "=&a" (uAX.r)
5108 : "mr" (pvPage),
5109# ifdef RT_ARCH_AMD64
5110 "0" (0x1000/8),
5111# else
5112 "0" (0x1000/4),
5113# endif
5114 "1" (pvPage),
5115 "2" (0));
5116 return uAX.f;
5117# else
5118 uintptr_t const *puPtr = (uintptr_t const *)pvPage;
5119 int cLeft = 0x1000 / sizeof(uintptr_t) / 8;
5120 Assert(!((uintptr_t)pvPage & 15));
5121 for (;;)
5122 {
5123 if (puPtr[0]) return false;
5124 if (puPtr[4]) return false;
5125
5126 if (puPtr[2]) return false;
5127 if (puPtr[6]) return false;
5128
5129 if (puPtr[1]) return false;
5130 if (puPtr[5]) return false;
5131
5132 if (puPtr[3]) return false;
5133 if (puPtr[7]) return false;
5134
5135 if (!--cLeft)
5136 return true;
5137 puPtr += 8;
5138 }
5139 return true;
5140# endif
5141}
5142
5143
5144/**
5145 * Checks if a memory block is filled with the specified byte.
5146 *
5147 * This is a sort of inverted memchr.
5148 *
5149 * @returns Pointer to the byte which doesn't equal u8.
5150 * @returns NULL if all equal to u8.
5151 *
5152 * @param pv Pointer to the memory block.
5153 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5154 * @param u8 The value it's supposed to be filled with.
5155 *
5156 * @todo Fix name, it is a predicate function but it's not returning boolean!
5157 */
5158#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5159DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
5160#else
5161DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
5162{
5163/** @todo rewrite this in inline assembly? */
5164 uint8_t const *pb = (uint8_t const *)pv;
5165 for (; cb; cb--, pb++)
5166 if (RT_UNLIKELY(*pb != u8))
5167 return (void *)pb;
5168 return NULL;
5169}
5170#endif
5171
5172
5173/**
5174 * Checks if a memory block is filled with the specified 32-bit value.
5175 *
5176 * This is a sort of inverted memchr.
5177 *
5178 * @returns Pointer to the first value which doesn't equal u32.
5179 * @returns NULL if all equal to u32.
5180 *
5181 * @param pv Pointer to the memory block.
5182 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5183 * @param u32 The value it's supposed to be filled with.
5184 *
5185 * @todo Fix name, it is a predicate function but it's not returning boolean!
5186 */
5187#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5188DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
5189#else
5190DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
5191{
5192/** @todo rewrite this in inline assembly? */
5193 uint32_t const *pu32 = (uint32_t const *)pv;
5194 for (; cb; cb -= 4, pu32++)
5195 if (RT_UNLIKELY(*pu32 != u32))
5196 return (uint32_t *)pu32;
5197 return NULL;
5198}
5199#endif
5200
5201
5202/**
5203 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
5204 *
5205 * @returns u32F1 * u32F2.
5206 */
5207#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5208DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
5209#else
5210DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
5211{
5212# ifdef RT_ARCH_AMD64
5213 return (uint64_t)u32F1 * u32F2;
5214# else /* !RT_ARCH_AMD64 */
5215 uint64_t u64;
5216# if RT_INLINE_ASM_GNU_STYLE
5217 __asm__ __volatile__("mull %%edx"
5218 : "=A" (u64)
5219 : "a" (u32F2), "d" (u32F1));
5220# else
5221 __asm
5222 {
5223 mov edx, [u32F1]
5224 mov eax, [u32F2]
5225 mul edx
5226 mov dword ptr [u64], eax
5227 mov dword ptr [u64 + 4], edx
5228 }
5229# endif
5230 return u64;
5231# endif /* !RT_ARCH_AMD64 */
5232}
5233#endif
5234
5235
5236/**
5237 * Multiplies two signed 32-bit values returning a signed 64-bit result.
5238 *
5239 * @returns u32F1 * u32F2.
5240 */
5241#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5242DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
5243#else
5244DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
5245{
5246# ifdef RT_ARCH_AMD64
5247 return (int64_t)i32F1 * i32F2;
5248# else /* !RT_ARCH_AMD64 */
5249 int64_t i64;
5250# if RT_INLINE_ASM_GNU_STYLE
5251 __asm__ __volatile__("imull %%edx"
5252 : "=A" (i64)
5253 : "a" (i32F2), "d" (i32F1));
5254# else
5255 __asm
5256 {
5257 mov edx, [i32F1]
5258 mov eax, [i32F2]
5259 imul edx
5260 mov dword ptr [i64], eax
5261 mov dword ptr [i64 + 4], edx
5262 }
5263# endif
5264 return i64;
5265# endif /* !RT_ARCH_AMD64 */
5266}
5267#endif
5268
5269
5270/**
5271 * Divides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
5272 *
5273 * @returns u64 / u32.
5274 */
5275#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5276DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
5277#else
5278DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
5279{
5280# ifdef RT_ARCH_AMD64
5281 return (uint32_t)(u64 / u32);
5282# else /* !RT_ARCH_AMD64 */
5283# if RT_INLINE_ASM_GNU_STYLE
5284 RTCCUINTREG uDummy;
5285 __asm__ __volatile__("divl %3"
5286 : "=a" (u32), "=d"(uDummy)
5287 : "A" (u64), "r" (u32));
5288# else
5289 __asm
5290 {
5291 mov eax, dword ptr [u64]
5292 mov edx, dword ptr [u64 + 4]
5293 mov ecx, [u32]
5294 div ecx
5295 mov [u32], eax
5296 }
5297# endif
5298 return u32;
5299# endif /* !RT_ARCH_AMD64 */
5300}
5301#endif
5302
5303
5304/**
5305 * Divides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
5306 *
5307 * @returns u64 / u32.
5308 */
5309#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5310DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
5311#else
5312DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
5313{
5314# ifdef RT_ARCH_AMD64
5315 return (int32_t)(i64 / i32);
5316# else /* !RT_ARCH_AMD64 */
5317# if RT_INLINE_ASM_GNU_STYLE
5318 RTCCUINTREG iDummy;
5319 __asm__ __volatile__("idivl %3"
5320 : "=a" (i32), "=d"(iDummy)
5321 : "A" (i64), "r" (i32));
5322# else
5323 __asm
5324 {
5325 mov eax, dword ptr [i64]
5326 mov edx, dword ptr [i64 + 4]
5327 mov ecx, [i32]
5328 idiv ecx
5329 mov [i32], eax
5330 }
5331# endif
5332 return i32;
5333# endif /* !RT_ARCH_AMD64 */
5334}
5335#endif
5336
5337
5338/**
5339 * Performs 64-bit unsigned by a 32-bit unsigned division with a 32-bit unsigned result,
5340 * returning the rest.
5341 *
5342 * @returns u64 % u32.
5343 *
5344 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5345 */
5346#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5347DECLASM(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32);
5348#else
5349DECLINLINE(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32)
5350{
5351# ifdef RT_ARCH_AMD64
5352 return (uint32_t)(u64 % u32);
5353# else /* !RT_ARCH_AMD64 */
5354# if RT_INLINE_ASM_GNU_STYLE
5355 RTCCUINTREG uDummy;
5356 __asm__ __volatile__("divl %3"
5357 : "=a" (uDummy), "=d"(u32)
5358 : "A" (u64), "r" (u32));
5359# else
5360 __asm
5361 {
5362 mov eax, dword ptr [u64]
5363 mov edx, dword ptr [u64 + 4]
5364 mov ecx, [u32]
5365 div ecx
5366 mov [u32], edx
5367 }
5368# endif
5369 return u32;
5370# endif /* !RT_ARCH_AMD64 */
5371}
5372#endif
5373
5374
5375/**
5376 * Performs 64-bit signed by a 32-bit signed division with a 32-bit signed result,
5377 * returning the rest.
5378 *
5379 * @returns u64 % u32.
5380 *
5381 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5382 */
5383#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5384DECLASM(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32);
5385#else
5386DECLINLINE(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32)
5387{
5388# ifdef RT_ARCH_AMD64
5389 return (int32_t)(i64 % i32);
5390# else /* !RT_ARCH_AMD64 */
5391# if RT_INLINE_ASM_GNU_STYLE
5392 RTCCUINTREG iDummy;
5393 __asm__ __volatile__("idivl %3"
5394 : "=a" (iDummy), "=d"(i32)
5395 : "A" (i64), "r" (i32));
5396# else
5397 __asm
5398 {
5399 mov eax, dword ptr [i64]
5400 mov edx, dword ptr [i64 + 4]
5401 mov ecx, [i32]
5402 idiv ecx
5403 mov [i32], edx
5404 }
5405# endif
5406 return i32;
5407# endif /* !RT_ARCH_AMD64 */
5408}
5409#endif
5410
5411
5412/**
5413 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
5414 * using a 96 bit intermediate result.
5415 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
5416 * __udivdi3 and __umoddi3 even if this inline function is not used.
5417 *
5418 * @returns (u64A * u32B) / u32C.
5419 * @param u64A The 64-bit value.
5420 * @param u32B The 32-bit value to multiple by A.
5421 * @param u32C The 32-bit value to divide A*B by.
5422 */
5423#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
5424DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
5425#else
5426DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
5427{
5428# if RT_INLINE_ASM_GNU_STYLE
5429# ifdef RT_ARCH_AMD64
5430 uint64_t u64Result, u64Spill;
5431 __asm__ __volatile__("mulq %2\n\t"
5432 "divq %3\n\t"
5433 : "=a" (u64Result),
5434 "=d" (u64Spill)
5435 : "r" ((uint64_t)u32B),
5436 "r" ((uint64_t)u32C),
5437 "0" (u64A),
5438 "1" (0));
5439 return u64Result;
5440# else
5441 uint32_t u32Dummy;
5442 uint64_t u64Result;
5443 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
5444 edx = u64Lo.hi = (u64A.lo * u32B).hi */
5445 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
5446 eax = u64A.hi */
5447 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
5448 edx = u32C */
5449 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
5450 edx = u32B */
5451 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
5452 edx = u64Hi.hi = (u64A.hi * u32B).hi */
5453 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
5454 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
5455 "divl %%ecx \n\t" /* eax = u64Hi / u32C
5456 edx = u64Hi % u32C */
5457 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
5458 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
5459 "divl %%ecx \n\t" /* u64Result.lo */
5460 "movl %%edi,%%edx \n\t" /* u64Result.hi */
5461 : "=A"(u64Result), "=c"(u32Dummy),
5462 "=S"(u32Dummy), "=D"(u32Dummy)
5463 : "a"((uint32_t)u64A),
5464 "S"((uint32_t)(u64A >> 32)),
5465 "c"(u32B),
5466 "D"(u32C));
5467 return u64Result;
5468# endif
5469# else
5470 RTUINT64U u;
5471 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
5472 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
5473 u64Hi += (u64Lo >> 32);
5474 u.s.Hi = (uint32_t)(u64Hi / u32C);
5475 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
5476 return u.u;
5477# endif
5478}
5479#endif
5480
5481
5482/**
5483 * Probes a byte pointer for read access.
5484 *
5485 * While the function will not fault if the byte is not read accessible,
5486 * the idea is to do this in a safe place like before acquiring locks
5487 * and such like.
5488 *
5489 * Also, this functions guarantees that an eager compiler is not going
5490 * to optimize the probing away.
5491 *
5492 * @param pvByte Pointer to the byte.
5493 */
5494#if RT_INLINE_ASM_EXTERNAL
5495DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
5496#else
5497DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
5498{
5499 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5500 uint8_t u8;
5501# if RT_INLINE_ASM_GNU_STYLE
5502 __asm__ __volatile__("movb (%1), %0\n\t"
5503 : "=r" (u8)
5504 : "r" (pvByte));
5505# else
5506 __asm
5507 {
5508# ifdef RT_ARCH_AMD64
5509 mov rax, [pvByte]
5510 mov al, [rax]
5511# else
5512 mov eax, [pvByte]
5513 mov al, [eax]
5514# endif
5515 mov [u8], al
5516 }
5517# endif
5518 return u8;
5519}
5520#endif
5521
5522/**
5523 * Probes a buffer for read access page by page.
5524 *
5525 * While the function will fault if the buffer is not fully read
5526 * accessible, the idea is to do this in a safe place like before
5527 * acquiring locks and such like.
5528 *
5529 * Also, this functions guarantees that an eager compiler is not going
5530 * to optimize the probing away.
5531 *
5532 * @param pvBuf Pointer to the buffer.
5533 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
5534 */
5535DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
5536{
5537 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5538 /* the first byte */
5539 const uint8_t *pu8 = (const uint8_t *)pvBuf;
5540 ASMProbeReadByte(pu8);
5541
5542 /* the pages in between pages. */
5543 while (cbBuf > /*PAGE_SIZE*/0x1000)
5544 {
5545 ASMProbeReadByte(pu8);
5546 cbBuf -= /*PAGE_SIZE*/0x1000;
5547 pu8 += /*PAGE_SIZE*/0x1000;
5548 }
5549
5550 /* the last byte */
5551 ASMProbeReadByte(pu8 + cbBuf - 1);
5552}
5553
5554
5555/** @def ASMBreakpoint
5556 * Debugger Breakpoint.
5557 * @remark In the gnu world we add a nop instruction after the int3 to
5558 * force gdb to remain at the int3 source line.
5559 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
5560 * @internal
5561 */
5562#if RT_INLINE_ASM_GNU_STYLE
5563# ifndef __L4ENV__
5564# define ASMBreakpoint() do { __asm__ __volatile__("int3\n\tnop"); } while (0)
5565# else
5566# define ASMBreakpoint() do { __asm__ __volatile__("int3; jmp 1f; 1:"); } while (0)
5567# endif
5568#else
5569# define ASMBreakpoint() __debugbreak()
5570#endif
5571
5572
5573
5574/** @defgroup grp_inline_bits Bit Operations
5575 * @{
5576 */
5577
5578
5579/**
5580 * Sets a bit in a bitmap.
5581 *
5582 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
5583 * @param iBit The bit to set.
5584 *
5585 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5586 * However, doing so will yield better performance as well as avoiding
5587 * traps accessing the last bits in the bitmap.
5588 */
5589#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5590DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
5591#else
5592DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
5593{
5594# if RT_INLINE_ASM_USES_INTRIN
5595 _bittestandset((long *)pvBitmap, iBit);
5596
5597# elif RT_INLINE_ASM_GNU_STYLE
5598 __asm__ __volatile__("btsl %1, %0"
5599 : "=m" (*(volatile long *)pvBitmap)
5600 : "Ir" (iBit),
5601 "m" (*(volatile long *)pvBitmap)
5602 : "memory");
5603# else
5604 __asm
5605 {
5606# ifdef RT_ARCH_AMD64
5607 mov rax, [pvBitmap]
5608 mov edx, [iBit]
5609 bts [rax], edx
5610# else
5611 mov eax, [pvBitmap]
5612 mov edx, [iBit]
5613 bts [eax], edx
5614# endif
5615 }
5616# endif
5617}
5618#endif
5619
5620
5621/**
5622 * Atomically sets a bit in a bitmap, ordered.
5623 *
5624 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5625 * the memory access isn't atomic!
5626 * @param iBit The bit to set.
5627 */
5628#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5629DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
5630#else
5631DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
5632{
5633 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5634# if RT_INLINE_ASM_USES_INTRIN
5635 _interlockedbittestandset((long *)pvBitmap, iBit);
5636# elif RT_INLINE_ASM_GNU_STYLE
5637 __asm__ __volatile__("lock; btsl %1, %0"
5638 : "=m" (*(volatile long *)pvBitmap)
5639 : "Ir" (iBit),
5640 "m" (*(volatile long *)pvBitmap)
5641 : "memory");
5642# else
5643 __asm
5644 {
5645# ifdef RT_ARCH_AMD64
5646 mov rax, [pvBitmap]
5647 mov edx, [iBit]
5648 lock bts [rax], edx
5649# else
5650 mov eax, [pvBitmap]
5651 mov edx, [iBit]
5652 lock bts [eax], edx
5653# endif
5654 }
5655# endif
5656}
5657#endif
5658
5659
5660/**
5661 * Clears a bit in a bitmap.
5662 *
5663 * @param pvBitmap Pointer to the bitmap.
5664 * @param iBit The bit to clear.
5665 *
5666 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5667 * However, doing so will yield better performance as well as avoiding
5668 * traps accessing the last bits in the bitmap.
5669 */
5670#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5671DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
5672#else
5673DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
5674{
5675# if RT_INLINE_ASM_USES_INTRIN
5676 _bittestandreset((long *)pvBitmap, iBit);
5677
5678# elif RT_INLINE_ASM_GNU_STYLE
5679 __asm__ __volatile__("btrl %1, %0"
5680 : "=m" (*(volatile long *)pvBitmap)
5681 : "Ir" (iBit),
5682 "m" (*(volatile long *)pvBitmap)
5683 : "memory");
5684# else
5685 __asm
5686 {
5687# ifdef RT_ARCH_AMD64
5688 mov rax, [pvBitmap]
5689 mov edx, [iBit]
5690 btr [rax], edx
5691# else
5692 mov eax, [pvBitmap]
5693 mov edx, [iBit]
5694 btr [eax], edx
5695# endif
5696 }
5697# endif
5698}
5699#endif
5700
5701
5702/**
5703 * Atomically clears a bit in a bitmap, ordered.
5704 *
5705 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5706 * the memory access isn't atomic!
5707 * @param iBit The bit to toggle set.
5708 * @remarks No memory barrier, take care on smp.
5709 */
5710#if RT_INLINE_ASM_EXTERNAL
5711DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
5712#else
5713DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
5714{
5715 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5716# if RT_INLINE_ASM_GNU_STYLE
5717 __asm__ __volatile__("lock; btrl %1, %0"
5718 : "=m" (*(volatile long *)pvBitmap)
5719 : "Ir" (iBit),
5720 "m" (*(volatile long *)pvBitmap)
5721 : "memory");
5722# else
5723 __asm
5724 {
5725# ifdef RT_ARCH_AMD64
5726 mov rax, [pvBitmap]
5727 mov edx, [iBit]
5728 lock btr [rax], edx
5729# else
5730 mov eax, [pvBitmap]
5731 mov edx, [iBit]
5732 lock btr [eax], edx
5733# endif
5734 }
5735# endif
5736}
5737#endif
5738
5739
5740/**
5741 * Toggles a bit in a bitmap.
5742 *
5743 * @param pvBitmap Pointer to the bitmap.
5744 * @param iBit The bit to toggle.
5745 *
5746 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5747 * However, doing so will yield better performance as well as avoiding
5748 * traps accessing the last bits in the bitmap.
5749 */
5750#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5751DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
5752#else
5753DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
5754{
5755# if RT_INLINE_ASM_USES_INTRIN
5756 _bittestandcomplement((long *)pvBitmap, iBit);
5757# elif RT_INLINE_ASM_GNU_STYLE
5758 __asm__ __volatile__("btcl %1, %0"
5759 : "=m" (*(volatile long *)pvBitmap)
5760 : "Ir" (iBit),
5761 "m" (*(volatile long *)pvBitmap)
5762 : "memory");
5763# else
5764 __asm
5765 {
5766# ifdef RT_ARCH_AMD64
5767 mov rax, [pvBitmap]
5768 mov edx, [iBit]
5769 btc [rax], edx
5770# else
5771 mov eax, [pvBitmap]
5772 mov edx, [iBit]
5773 btc [eax], edx
5774# endif
5775 }
5776# endif
5777}
5778#endif
5779
5780
5781/**
5782 * Atomically toggles a bit in a bitmap, ordered.
5783 *
5784 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5785 * the memory access isn't atomic!
5786 * @param iBit The bit to test and set.
5787 */
5788#if RT_INLINE_ASM_EXTERNAL
5789DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
5790#else
5791DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
5792{
5793 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5794# if RT_INLINE_ASM_GNU_STYLE
5795 __asm__ __volatile__("lock; btcl %1, %0"
5796 : "=m" (*(volatile long *)pvBitmap)
5797 : "Ir" (iBit),
5798 "m" (*(volatile long *)pvBitmap)
5799 : "memory");
5800# else
5801 __asm
5802 {
5803# ifdef RT_ARCH_AMD64
5804 mov rax, [pvBitmap]
5805 mov edx, [iBit]
5806 lock btc [rax], edx
5807# else
5808 mov eax, [pvBitmap]
5809 mov edx, [iBit]
5810 lock btc [eax], edx
5811# endif
5812 }
5813# endif
5814}
5815#endif
5816
5817
5818/**
5819 * Tests and sets a bit in a bitmap.
5820 *
5821 * @returns true if the bit was set.
5822 * @returns false if the bit was clear.
5823 *
5824 * @param pvBitmap Pointer to the bitmap.
5825 * @param iBit The bit to test and set.
5826 *
5827 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5828 * However, doing so will yield better performance as well as avoiding
5829 * traps accessing the last bits in the bitmap.
5830 */
5831#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5832DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5833#else
5834DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5835{
5836 union { bool f; uint32_t u32; uint8_t u8; } rc;
5837# if RT_INLINE_ASM_USES_INTRIN
5838 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
5839
5840# elif RT_INLINE_ASM_GNU_STYLE
5841 __asm__ __volatile__("btsl %2, %1\n\t"
5842 "setc %b0\n\t"
5843 "andl $1, %0\n\t"
5844 : "=q" (rc.u32),
5845 "=m" (*(volatile long *)pvBitmap)
5846 : "Ir" (iBit),
5847 "m" (*(volatile long *)pvBitmap)
5848 : "memory");
5849# else
5850 __asm
5851 {
5852 mov edx, [iBit]
5853# ifdef RT_ARCH_AMD64
5854 mov rax, [pvBitmap]
5855 bts [rax], edx
5856# else
5857 mov eax, [pvBitmap]
5858 bts [eax], edx
5859# endif
5860 setc al
5861 and eax, 1
5862 mov [rc.u32], eax
5863 }
5864# endif
5865 return rc.f;
5866}
5867#endif
5868
5869
5870/**
5871 * Atomically tests and sets a bit in a bitmap, ordered.
5872 *
5873 * @returns true if the bit was set.
5874 * @returns false if the bit was clear.
5875 *
5876 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5877 * the memory access isn't atomic!
5878 * @param iBit The bit to set.
5879 */
5880#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5881DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5882#else
5883DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5884{
5885 union { bool f; uint32_t u32; uint8_t u8; } rc;
5886 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5887# if RT_INLINE_ASM_USES_INTRIN
5888 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
5889# elif RT_INLINE_ASM_GNU_STYLE
5890 __asm__ __volatile__("lock; btsl %2, %1\n\t"
5891 "setc %b0\n\t"
5892 "andl $1, %0\n\t"
5893 : "=q" (rc.u32),
5894 "=m" (*(volatile long *)pvBitmap)
5895 : "Ir" (iBit),
5896 "m" (*(volatile long *)pvBitmap)
5897 : "memory");
5898# else
5899 __asm
5900 {
5901 mov edx, [iBit]
5902# ifdef RT_ARCH_AMD64
5903 mov rax, [pvBitmap]
5904 lock bts [rax], edx
5905# else
5906 mov eax, [pvBitmap]
5907 lock bts [eax], edx
5908# endif
5909 setc al
5910 and eax, 1
5911 mov [rc.u32], eax
5912 }
5913# endif
5914 return rc.f;
5915}
5916#endif
5917
5918
5919/**
5920 * Tests and clears a bit in a bitmap.
5921 *
5922 * @returns true if the bit was set.
5923 * @returns false if the bit was clear.
5924 *
5925 * @param pvBitmap Pointer to the bitmap.
5926 * @param iBit The bit to test and clear.
5927 *
5928 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5929 * However, doing so will yield better performance as well as avoiding
5930 * traps accessing the last bits in the bitmap.
5931 */
5932#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5933DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5934#else
5935DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5936{
5937 union { bool f; uint32_t u32; uint8_t u8; } rc;
5938# if RT_INLINE_ASM_USES_INTRIN
5939 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
5940
5941# elif RT_INLINE_ASM_GNU_STYLE
5942 __asm__ __volatile__("btrl %2, %1\n\t"
5943 "setc %b0\n\t"
5944 "andl $1, %0\n\t"
5945 : "=q" (rc.u32),
5946 "=m" (*(volatile long *)pvBitmap)
5947 : "Ir" (iBit),
5948 "m" (*(volatile long *)pvBitmap)
5949 : "memory");
5950# else
5951 __asm
5952 {
5953 mov edx, [iBit]
5954# ifdef RT_ARCH_AMD64
5955 mov rax, [pvBitmap]
5956 btr [rax], edx
5957# else
5958 mov eax, [pvBitmap]
5959 btr [eax], edx
5960# endif
5961 setc al
5962 and eax, 1
5963 mov [rc.u32], eax
5964 }
5965# endif
5966 return rc.f;
5967}
5968#endif
5969
5970
5971/**
5972 * Atomically tests and clears a bit in a bitmap, ordered.
5973 *
5974 * @returns true if the bit was set.
5975 * @returns false if the bit was clear.
5976 *
5977 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5978 * the memory access isn't atomic!
5979 * @param iBit The bit to test and clear.
5980 *
5981 * @remarks No memory barrier, take care on smp.
5982 */
5983#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5984DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5985#else
5986DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5987{
5988 union { bool f; uint32_t u32; uint8_t u8; } rc;
5989 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5990# if RT_INLINE_ASM_USES_INTRIN
5991 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
5992
5993# elif RT_INLINE_ASM_GNU_STYLE
5994 __asm__ __volatile__("lock; btrl %2, %1\n\t"
5995 "setc %b0\n\t"
5996 "andl $1, %0\n\t"
5997 : "=q" (rc.u32),
5998 "=m" (*(volatile long *)pvBitmap)
5999 : "Ir" (iBit),
6000 "m" (*(volatile long *)pvBitmap)
6001 : "memory");
6002# else
6003 __asm
6004 {
6005 mov edx, [iBit]
6006# ifdef RT_ARCH_AMD64
6007 mov rax, [pvBitmap]
6008 lock btr [rax], edx
6009# else
6010 mov eax, [pvBitmap]
6011 lock btr [eax], edx
6012# endif
6013 setc al
6014 and eax, 1
6015 mov [rc.u32], eax
6016 }
6017# endif
6018 return rc.f;
6019}
6020#endif
6021
6022
6023/**
6024 * Tests and toggles a bit in a bitmap.
6025 *
6026 * @returns true if the bit was set.
6027 * @returns false if the bit was clear.
6028 *
6029 * @param pvBitmap Pointer to the bitmap.
6030 * @param iBit The bit to test and toggle.
6031 *
6032 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6033 * However, doing so will yield better performance as well as avoiding
6034 * traps accessing the last bits in the bitmap.
6035 */
6036#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6037DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
6038#else
6039DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
6040{
6041 union { bool f; uint32_t u32; uint8_t u8; } rc;
6042# if RT_INLINE_ASM_USES_INTRIN
6043 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
6044
6045# elif RT_INLINE_ASM_GNU_STYLE
6046 __asm__ __volatile__("btcl %2, %1\n\t"
6047 "setc %b0\n\t"
6048 "andl $1, %0\n\t"
6049 : "=q" (rc.u32),
6050 "=m" (*(volatile long *)pvBitmap)
6051 : "Ir" (iBit),
6052 "m" (*(volatile long *)pvBitmap)
6053 : "memory");
6054# else
6055 __asm
6056 {
6057 mov edx, [iBit]
6058# ifdef RT_ARCH_AMD64
6059 mov rax, [pvBitmap]
6060 btc [rax], edx
6061# else
6062 mov eax, [pvBitmap]
6063 btc [eax], edx
6064# endif
6065 setc al
6066 and eax, 1
6067 mov [rc.u32], eax
6068 }
6069# endif
6070 return rc.f;
6071}
6072#endif
6073
6074
6075/**
6076 * Atomically tests and toggles a bit in a bitmap, ordered.
6077 *
6078 * @returns true if the bit was set.
6079 * @returns false if the bit was clear.
6080 *
6081 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
6082 * the memory access isn't atomic!
6083 * @param iBit The bit to test and toggle.
6084 */
6085#if RT_INLINE_ASM_EXTERNAL
6086DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
6087#else
6088DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
6089{
6090 union { bool f; uint32_t u32; uint8_t u8; } rc;
6091 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6092# if RT_INLINE_ASM_GNU_STYLE
6093 __asm__ __volatile__("lock; btcl %2, %1\n\t"
6094 "setc %b0\n\t"
6095 "andl $1, %0\n\t"
6096 : "=q" (rc.u32),
6097 "=m" (*(volatile long *)pvBitmap)
6098 : "Ir" (iBit),
6099 "m" (*(volatile long *)pvBitmap)
6100 : "memory");
6101# else
6102 __asm
6103 {
6104 mov edx, [iBit]
6105# ifdef RT_ARCH_AMD64
6106 mov rax, [pvBitmap]
6107 lock btc [rax], edx
6108# else
6109 mov eax, [pvBitmap]
6110 lock btc [eax], edx
6111# endif
6112 setc al
6113 and eax, 1
6114 mov [rc.u32], eax
6115 }
6116# endif
6117 return rc.f;
6118}
6119#endif
6120
6121
6122/**
6123 * Tests if a bit in a bitmap is set.
6124 *
6125 * @returns true if the bit is set.
6126 * @returns false if the bit is clear.
6127 *
6128 * @param pvBitmap Pointer to the bitmap.
6129 * @param iBit The bit to test.
6130 *
6131 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6132 * However, doing so will yield better performance as well as avoiding
6133 * traps accessing the last bits in the bitmap.
6134 */
6135#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6136DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
6137#else
6138DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
6139{
6140 union { bool f; uint32_t u32; uint8_t u8; } rc;
6141# if RT_INLINE_ASM_USES_INTRIN
6142 rc.u32 = _bittest((long *)pvBitmap, iBit);
6143# elif RT_INLINE_ASM_GNU_STYLE
6144
6145 __asm__ __volatile__("btl %2, %1\n\t"
6146 "setc %b0\n\t"
6147 "andl $1, %0\n\t"
6148 : "=q" (rc.u32)
6149 : "m" (*(const volatile long *)pvBitmap),
6150 "Ir" (iBit)
6151 : "memory");
6152# else
6153 __asm
6154 {
6155 mov edx, [iBit]
6156# ifdef RT_ARCH_AMD64
6157 mov rax, [pvBitmap]
6158 bt [rax], edx
6159# else
6160 mov eax, [pvBitmap]
6161 bt [eax], edx
6162# endif
6163 setc al
6164 and eax, 1
6165 mov [rc.u32], eax
6166 }
6167# endif
6168 return rc.f;
6169}
6170#endif
6171
6172
6173/**
6174 * Clears a bit range within a bitmap.
6175 *
6176 * @param pvBitmap Pointer to the bitmap.
6177 * @param iBitStart The First bit to clear.
6178 * @param iBitEnd The first bit not to clear.
6179 */
6180DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
6181{
6182 if (iBitStart < iBitEnd)
6183 {
6184 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
6185 int iStart = iBitStart & ~31;
6186 int iEnd = iBitEnd & ~31;
6187 if (iStart == iEnd)
6188 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
6189 else
6190 {
6191 /* bits in first dword. */
6192 if (iBitStart & 31)
6193 {
6194 *pu32 &= (1 << (iBitStart & 31)) - 1;
6195 pu32++;
6196 iBitStart = iStart + 32;
6197 }
6198
6199 /* whole dword. */
6200 if (iBitStart != iEnd)
6201 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
6202
6203 /* bits in last dword. */
6204 if (iBitEnd & 31)
6205 {
6206 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
6207 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
6208 }
6209 }
6210 }
6211}
6212
6213
6214/**
6215 * Sets a bit range within a bitmap.
6216 *
6217 * @param pvBitmap Pointer to the bitmap.
6218 * @param iBitStart The First bit to set.
6219 * @param iBitEnd The first bit not to set.
6220 */
6221DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
6222{
6223 if (iBitStart < iBitEnd)
6224 {
6225 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
6226 int iStart = iBitStart & ~31;
6227 int iEnd = iBitEnd & ~31;
6228 if (iStart == iEnd)
6229 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
6230 else
6231 {
6232 /* bits in first dword. */
6233 if (iBitStart & 31)
6234 {
6235 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
6236 pu32++;
6237 iBitStart = iStart + 32;
6238 }
6239
6240 /* whole dword. */
6241 if (iBitStart != iEnd)
6242 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
6243
6244 /* bits in last dword. */
6245 if (iBitEnd & 31)
6246 {
6247 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
6248 *pu32 |= (1 << (iBitEnd & 31)) - 1;
6249 }
6250 }
6251 }
6252}
6253
6254
6255/**
6256 * Finds the first clear bit in a bitmap.
6257 *
6258 * @returns Index of the first zero bit.
6259 * @returns -1 if no clear bit was found.
6260 * @param pvBitmap Pointer to the bitmap.
6261 * @param cBits The number of bits in the bitmap. Multiple of 32.
6262 */
6263#if RT_INLINE_ASM_EXTERNAL
6264DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
6265#else
6266DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
6267{
6268 if (cBits)
6269 {
6270 int32_t iBit;
6271# if RT_INLINE_ASM_GNU_STYLE
6272 RTCCUINTREG uEAX, uECX, uEDI;
6273 cBits = RT_ALIGN_32(cBits, 32);
6274 __asm__ __volatile__("repe; scasl\n\t"
6275 "je 1f\n\t"
6276# ifdef RT_ARCH_AMD64
6277 "lea -4(%%rdi), %%rdi\n\t"
6278 "xorl (%%rdi), %%eax\n\t"
6279 "subq %5, %%rdi\n\t"
6280# else
6281 "lea -4(%%edi), %%edi\n\t"
6282 "xorl (%%edi), %%eax\n\t"
6283 "subl %5, %%edi\n\t"
6284# endif
6285 "shll $3, %%edi\n\t"
6286 "bsfl %%eax, %%edx\n\t"
6287 "addl %%edi, %%edx\n\t"
6288 "1:\t\n"
6289 : "=d" (iBit),
6290 "=&c" (uECX),
6291 "=&D" (uEDI),
6292 "=&a" (uEAX)
6293 : "0" (0xffffffff),
6294 "mr" (pvBitmap),
6295 "1" (cBits >> 5),
6296 "2" (pvBitmap),
6297 "3" (0xffffffff));
6298# else
6299 cBits = RT_ALIGN_32(cBits, 32);
6300 __asm
6301 {
6302# ifdef RT_ARCH_AMD64
6303 mov rdi, [pvBitmap]
6304 mov rbx, rdi
6305# else
6306 mov edi, [pvBitmap]
6307 mov ebx, edi
6308# endif
6309 mov edx, 0ffffffffh
6310 mov eax, edx
6311 mov ecx, [cBits]
6312 shr ecx, 5
6313 repe scasd
6314 je done
6315
6316# ifdef RT_ARCH_AMD64
6317 lea rdi, [rdi - 4]
6318 xor eax, [rdi]
6319 sub rdi, rbx
6320# else
6321 lea edi, [edi - 4]
6322 xor eax, [edi]
6323 sub edi, ebx
6324# endif
6325 shl edi, 3
6326 bsf edx, eax
6327 add edx, edi
6328 done:
6329 mov [iBit], edx
6330 }
6331# endif
6332 return iBit;
6333 }
6334 return -1;
6335}
6336#endif
6337
6338
6339/**
6340 * Finds the next clear bit in a bitmap.
6341 *
6342 * @returns Index of the first zero bit.
6343 * @returns -1 if no clear bit was found.
6344 * @param pvBitmap Pointer to the bitmap.
6345 * @param cBits The number of bits in the bitmap. Multiple of 32.
6346 * @param iBitPrev The bit returned from the last search.
6347 * The search will start at iBitPrev + 1.
6348 */
6349#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6350DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
6351#else
6352DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
6353{
6354 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
6355 int iBit = ++iBitPrev & 31;
6356 if (iBit)
6357 {
6358 /*
6359 * Inspect the 32-bit word containing the unaligned bit.
6360 */
6361 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
6362
6363# if RT_INLINE_ASM_USES_INTRIN
6364 unsigned long ulBit = 0;
6365 if (_BitScanForward(&ulBit, u32))
6366 return ulBit + iBitPrev;
6367# else
6368# if RT_INLINE_ASM_GNU_STYLE
6369 __asm__ __volatile__("bsf %1, %0\n\t"
6370 "jnz 1f\n\t"
6371 "movl $-1, %0\n\t"
6372 "1:\n\t"
6373 : "=r" (iBit)
6374 : "r" (u32));
6375# else
6376 __asm
6377 {
6378 mov edx, [u32]
6379 bsf eax, edx
6380 jnz done
6381 mov eax, 0ffffffffh
6382 done:
6383 mov [iBit], eax
6384 }
6385# endif
6386 if (iBit >= 0)
6387 return iBit + iBitPrev;
6388# endif
6389
6390 /*
6391 * Skip ahead and see if there is anything left to search.
6392 */
6393 iBitPrev |= 31;
6394 iBitPrev++;
6395 if (cBits <= (uint32_t)iBitPrev)
6396 return -1;
6397 }
6398
6399 /*
6400 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6401 */
6402 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6403 if (iBit >= 0)
6404 iBit += iBitPrev;
6405 return iBit;
6406}
6407#endif
6408
6409
6410/**
6411 * Finds the first set bit in a bitmap.
6412 *
6413 * @returns Index of the first set bit.
6414 * @returns -1 if no clear bit was found.
6415 * @param pvBitmap Pointer to the bitmap.
6416 * @param cBits The number of bits in the bitmap. Multiple of 32.
6417 */
6418#if RT_INLINE_ASM_EXTERNAL
6419DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
6420#else
6421DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
6422{
6423 if (cBits)
6424 {
6425 int32_t iBit;
6426# if RT_INLINE_ASM_GNU_STYLE
6427 RTCCUINTREG uEAX, uECX, uEDI;
6428 cBits = RT_ALIGN_32(cBits, 32);
6429 __asm__ __volatile__("repe; scasl\n\t"
6430 "je 1f\n\t"
6431# ifdef RT_ARCH_AMD64
6432 "lea -4(%%rdi), %%rdi\n\t"
6433 "movl (%%rdi), %%eax\n\t"
6434 "subq %5, %%rdi\n\t"
6435# else
6436 "lea -4(%%edi), %%edi\n\t"
6437 "movl (%%edi), %%eax\n\t"
6438 "subl %5, %%edi\n\t"
6439# endif
6440 "shll $3, %%edi\n\t"
6441 "bsfl %%eax, %%edx\n\t"
6442 "addl %%edi, %%edx\n\t"
6443 "1:\t\n"
6444 : "=d" (iBit),
6445 "=&c" (uECX),
6446 "=&D" (uEDI),
6447 "=&a" (uEAX)
6448 : "0" (0xffffffff),
6449 "mr" (pvBitmap),
6450 "1" (cBits >> 5),
6451 "2" (pvBitmap),
6452 "3" (0));
6453# else
6454 cBits = RT_ALIGN_32(cBits, 32);
6455 __asm
6456 {
6457# ifdef RT_ARCH_AMD64
6458 mov rdi, [pvBitmap]
6459 mov rbx, rdi
6460# else
6461 mov edi, [pvBitmap]
6462 mov ebx, edi
6463# endif
6464 mov edx, 0ffffffffh
6465 xor eax, eax
6466 mov ecx, [cBits]
6467 shr ecx, 5
6468 repe scasd
6469 je done
6470# ifdef RT_ARCH_AMD64
6471 lea rdi, [rdi - 4]
6472 mov eax, [rdi]
6473 sub rdi, rbx
6474# else
6475 lea edi, [edi - 4]
6476 mov eax, [edi]
6477 sub edi, ebx
6478# endif
6479 shl edi, 3
6480 bsf edx, eax
6481 add edx, edi
6482 done:
6483 mov [iBit], edx
6484 }
6485# endif
6486 return iBit;
6487 }
6488 return -1;
6489}
6490#endif
6491
6492
6493/**
6494 * Finds the next set bit in a bitmap.
6495 *
6496 * @returns Index of the next set bit.
6497 * @returns -1 if no set bit was found.
6498 * @param pvBitmap Pointer to the bitmap.
6499 * @param cBits The number of bits in the bitmap. Multiple of 32.
6500 * @param iBitPrev The bit returned from the last search.
6501 * The search will start at iBitPrev + 1.
6502 */
6503#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6504DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
6505#else
6506DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
6507{
6508 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
6509 int iBit = ++iBitPrev & 31;
6510 if (iBit)
6511 {
6512 /*
6513 * Inspect the 32-bit word containing the unaligned bit.
6514 */
6515 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
6516
6517# if RT_INLINE_ASM_USES_INTRIN
6518 unsigned long ulBit = 0;
6519 if (_BitScanForward(&ulBit, u32))
6520 return ulBit + iBitPrev;
6521# else
6522# if RT_INLINE_ASM_GNU_STYLE
6523 __asm__ __volatile__("bsf %1, %0\n\t"
6524 "jnz 1f\n\t"
6525 "movl $-1, %0\n\t"
6526 "1:\n\t"
6527 : "=r" (iBit)
6528 : "r" (u32));
6529# else
6530 __asm
6531 {
6532 mov edx, [u32]
6533 bsf eax, edx
6534 jnz done
6535 mov eax, 0ffffffffh
6536 done:
6537 mov [iBit], eax
6538 }
6539# endif
6540 if (iBit >= 0)
6541 return iBit + iBitPrev;
6542# endif
6543
6544 /*
6545 * Skip ahead and see if there is anything left to search.
6546 */
6547 iBitPrev |= 31;
6548 iBitPrev++;
6549 if (cBits <= (uint32_t)iBitPrev)
6550 return -1;
6551 }
6552
6553 /*
6554 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6555 */
6556 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6557 if (iBit >= 0)
6558 iBit += iBitPrev;
6559 return iBit;
6560}
6561#endif
6562
6563
6564/**
6565 * Finds the first bit which is set in the given 32-bit integer.
6566 * Bits are numbered from 1 (least significant) to 32.
6567 *
6568 * @returns index [1..32] of the first set bit.
6569 * @returns 0 if all bits are cleared.
6570 * @param u32 Integer to search for set bits.
6571 * @remark Similar to ffs() in BSD.
6572 */
6573DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
6574{
6575# if RT_INLINE_ASM_USES_INTRIN
6576 unsigned long iBit;
6577 if (_BitScanForward(&iBit, u32))
6578 iBit++;
6579 else
6580 iBit = 0;
6581# elif RT_INLINE_ASM_GNU_STYLE
6582 uint32_t iBit;
6583 __asm__ __volatile__("bsf %1, %0\n\t"
6584 "jnz 1f\n\t"
6585 "xorl %0, %0\n\t"
6586 "jmp 2f\n"
6587 "1:\n\t"
6588 "incl %0\n"
6589 "2:\n\t"
6590 : "=r" (iBit)
6591 : "rm" (u32));
6592# else
6593 uint32_t iBit;
6594 _asm
6595 {
6596 bsf eax, [u32]
6597 jnz found
6598 xor eax, eax
6599 jmp done
6600 found:
6601 inc eax
6602 done:
6603 mov [iBit], eax
6604 }
6605# endif
6606 return iBit;
6607}
6608
6609
6610/**
6611 * Finds the first bit which is set in the given 32-bit integer.
6612 * Bits are numbered from 1 (least significant) to 32.
6613 *
6614 * @returns index [1..32] of the first set bit.
6615 * @returns 0 if all bits are cleared.
6616 * @param i32 Integer to search for set bits.
6617 * @remark Similar to ffs() in BSD.
6618 */
6619DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
6620{
6621 return ASMBitFirstSetU32((uint32_t)i32);
6622}
6623
6624
6625/**
6626 * Finds the last bit which is set in the given 32-bit integer.
6627 * Bits are numbered from 1 (least significant) to 32.
6628 *
6629 * @returns index [1..32] of the last set bit.
6630 * @returns 0 if all bits are cleared.
6631 * @param u32 Integer to search for set bits.
6632 * @remark Similar to fls() in BSD.
6633 */
6634DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
6635{
6636# if RT_INLINE_ASM_USES_INTRIN
6637 unsigned long iBit;
6638 if (_BitScanReverse(&iBit, u32))
6639 iBit++;
6640 else
6641 iBit = 0;
6642# elif RT_INLINE_ASM_GNU_STYLE
6643 uint32_t iBit;
6644 __asm__ __volatile__("bsrl %1, %0\n\t"
6645 "jnz 1f\n\t"
6646 "xorl %0, %0\n\t"
6647 "jmp 2f\n"
6648 "1:\n\t"
6649 "incl %0\n"
6650 "2:\n\t"
6651 : "=r" (iBit)
6652 : "rm" (u32));
6653# else
6654 uint32_t iBit;
6655 _asm
6656 {
6657 bsr eax, [u32]
6658 jnz found
6659 xor eax, eax
6660 jmp done
6661 found:
6662 inc eax
6663 done:
6664 mov [iBit], eax
6665 }
6666# endif
6667 return iBit;
6668}
6669
6670
6671/**
6672 * Finds the last bit which is set in the given 32-bit integer.
6673 * Bits are numbered from 1 (least significant) to 32.
6674 *
6675 * @returns index [1..32] of the last set bit.
6676 * @returns 0 if all bits are cleared.
6677 * @param i32 Integer to search for set bits.
6678 * @remark Similar to fls() in BSD.
6679 */
6680DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
6681{
6682 return ASMBitLastSetU32((uint32_t)i32);
6683}
6684
6685/**
6686 * Reverse the byte order of the given 16-bit integer.
6687 *
6688 * @returns Revert
6689 * @param u16 16-bit integer value.
6690 */
6691DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
6692{
6693#if RT_INLINE_ASM_USES_INTRIN
6694 u16 = _byteswap_ushort(u16);
6695#elif RT_INLINE_ASM_GNU_STYLE
6696 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
6697#else
6698 _asm
6699 {
6700 mov ax, [u16]
6701 ror ax, 8
6702 mov [u16], ax
6703 }
6704#endif
6705 return u16;
6706}
6707
6708/**
6709 * Reverse the byte order of the given 32-bit integer.
6710 *
6711 * @returns Revert
6712 * @param u32 32-bit integer value.
6713 */
6714DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
6715{
6716#if RT_INLINE_ASM_USES_INTRIN
6717 u32 = _byteswap_ulong(u32);
6718#elif RT_INLINE_ASM_GNU_STYLE
6719 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
6720#else
6721 _asm
6722 {
6723 mov eax, [u32]
6724 bswap eax
6725 mov [u32], eax
6726 }
6727#endif
6728 return u32;
6729}
6730
6731
6732/**
6733 * Reverse the byte order of the given 64-bit integer.
6734 *
6735 * @returns Revert
6736 * @param u64 64-bit integer value.
6737 */
6738DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
6739{
6740#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
6741 u64 = _byteswap_uint64(u64);
6742#else
6743 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
6744 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
6745#endif
6746 return u64;
6747}
6748
6749
6750/** @} */
6751
6752
6753/** @} */
6754#endif
6755
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette