VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 25759

Last change on this file since 25759 was 25671, checked in by vboxsync, 15 years ago

iprt/asm.h,ASMAtomciUoReadU64.asm: consistent use of the lock prefix in the 32-bit version of ASMAtomicUoReadU64; play safe and use it.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 174.6 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 */
29
30#ifndef ___iprt_asm_h
31#define ___iprt_asm_h
32
33#include <iprt/cdefs.h>
34#include <iprt/types.h>
35#include <iprt/assert.h>
36/** @todo @code #include <iprt/param.h> @endcode for PAGE_SIZE. */
37/** @def RT_INLINE_ASM_USES_INTRIN
38 * Defined as 1 if we're using a _MSC_VER 1400.
39 * Otherwise defined as 0.
40 */
41
42/* Solaris 10 header ugliness */
43#ifdef u
44#undef u
45#endif
46
47#ifdef _MSC_VER
48# if _MSC_VER >= 1400
49# define RT_INLINE_ASM_USES_INTRIN 1
50# include <intrin.h>
51 /* Emit the intrinsics at all optimization levels. */
52# pragma intrinsic(_ReadWriteBarrier)
53# pragma intrinsic(__cpuid)
54# pragma intrinsic(_enable)
55# pragma intrinsic(_disable)
56# pragma intrinsic(__rdtsc)
57# pragma intrinsic(__readmsr)
58# pragma intrinsic(__writemsr)
59# pragma intrinsic(__outbyte)
60# pragma intrinsic(__outbytestring)
61# pragma intrinsic(__outword)
62# pragma intrinsic(__outwordstring)
63# pragma intrinsic(__outdword)
64# pragma intrinsic(__outdwordstring)
65# pragma intrinsic(__inbyte)
66# pragma intrinsic(__inbytestring)
67# pragma intrinsic(__inword)
68# pragma intrinsic(__inwordstring)
69# pragma intrinsic(__indword)
70# pragma intrinsic(__indwordstring)
71# pragma intrinsic(__invlpg)
72# pragma intrinsic(__wbinvd)
73# pragma intrinsic(__stosd)
74# pragma intrinsic(__stosw)
75# pragma intrinsic(__stosb)
76# pragma intrinsic(__readcr0)
77# pragma intrinsic(__readcr2)
78# pragma intrinsic(__readcr3)
79# pragma intrinsic(__readcr4)
80# pragma intrinsic(__writecr0)
81# pragma intrinsic(__writecr3)
82# pragma intrinsic(__writecr4)
83# pragma intrinsic(__readdr)
84# pragma intrinsic(__writedr)
85# pragma intrinsic(_BitScanForward)
86# pragma intrinsic(_BitScanReverse)
87# pragma intrinsic(_bittest)
88# pragma intrinsic(_bittestandset)
89# pragma intrinsic(_bittestandreset)
90# pragma intrinsic(_bittestandcomplement)
91# pragma intrinsic(_byteswap_ushort)
92# pragma intrinsic(_byteswap_ulong)
93# pragma intrinsic(_interlockedbittestandset)
94# pragma intrinsic(_interlockedbittestandreset)
95# pragma intrinsic(_InterlockedAnd)
96# pragma intrinsic(_InterlockedOr)
97# pragma intrinsic(_InterlockedIncrement)
98# pragma intrinsic(_InterlockedDecrement)
99# pragma intrinsic(_InterlockedExchange)
100# pragma intrinsic(_InterlockedExchangeAdd)
101# pragma intrinsic(_InterlockedCompareExchange)
102# pragma intrinsic(_InterlockedCompareExchange64)
103# ifdef RT_ARCH_AMD64
104# pragma intrinsic(_mm_mfence)
105# pragma intrinsic(_mm_sfence)
106# pragma intrinsic(_mm_lfence)
107# pragma intrinsic(__stosq)
108# pragma intrinsic(__readcr8)
109# pragma intrinsic(__writecr8)
110# pragma intrinsic(_byteswap_uint64)
111# pragma intrinsic(_InterlockedExchange64)
112# endif
113# endif
114#endif
115#ifndef RT_INLINE_ASM_USES_INTRIN
116# define RT_INLINE_ASM_USES_INTRIN 0
117#endif
118
119
120/** @defgroup grp_asm ASM - Assembly Routines
121 * @ingroup grp_rt
122 *
123 * @remarks The difference between ordered and unordered atomic operations are that
124 * the former will complete outstanding reads and writes before continuing
125 * while the latter doesn't make any promisses about the order. Ordered
126 * operations doesn't, it seems, make any 100% promise wrt to whether
127 * the operation will complete before any subsequent memory access.
128 * (please, correct if wrong.)
129 *
130 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
131 * are unordered (note the Uo).
132 *
133 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
134 * or even optimize assembler instructions away. For instance, in the following code
135 * the second rdmsr instruction is optimized away because gcc treats that instruction
136 * as deterministic:
137 *
138 * @code
139 * static inline uint64_t rdmsr_low(int idx)
140 * {
141 * uint32_t low;
142 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
143 * }
144 * ...
145 * uint32_t msr1 = rdmsr_low(1);
146 * foo(msr1);
147 * msr1 = rdmsr_low(1);
148 * bar(msr1);
149 * @endcode
150 *
151 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
152 * use the result of the first call as input parameter for bar() as well. For rdmsr this
153 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
154 * machine status information in general.
155 *
156 * @{
157 */
158
159/** @def RT_INLINE_ASM_GCC_4_3_X_X86
160 * Used to work around some 4.3.x register allocation issues in this version of
161 * the compiler. */
162#ifdef __GNUC__
163# define RT_INLINE_ASM_GCC_4_3_X_X86 (__GNUC__ == 4 && __GNUC_MINOR__ == 3 && defined(__i386__))
164#endif
165#ifndef RT_INLINE_ASM_GCC_4_3_X_X86
166# define RT_INLINE_ASM_GCC_4_3_X_X86 0
167#endif
168
169/** @def RT_INLINE_DONT_USE_CMPXCHG8B
170 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
171 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
172 * mode, x86.
173 *
174 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
175 * when in PIC mode on x86.
176 */
177#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
178# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC \
179 ( (defined(PIC) || defined(__PIC__)) \
180 && defined(RT_ARCH_X86) \
181 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
182 || defined(RT_OS_DARWIN)) )
183#endif
184
185/** @def RT_INLINE_ASM_EXTERNAL
186 * Defined as 1 if the compiler does not support inline assembly.
187 * The ASM* functions will then be implemented in an external .asm file.
188 *
189 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
190 * inline assembly in their AMD64 compiler.
191 */
192#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
193# define RT_INLINE_ASM_EXTERNAL 1
194#else
195# define RT_INLINE_ASM_EXTERNAL 0
196#endif
197
198/** @def RT_INLINE_ASM_GNU_STYLE
199 * Defined as 1 if the compiler understands GNU style inline assembly.
200 */
201#if defined(_MSC_VER)
202# define RT_INLINE_ASM_GNU_STYLE 0
203#else
204# define RT_INLINE_ASM_GNU_STYLE 1
205#endif
206
207
208/** @todo find a more proper place for this structure? */
209#pragma pack(1)
210/** IDTR */
211typedef struct RTIDTR
212{
213 /** Size of the IDT. */
214 uint16_t cbIdt;
215 /** Address of the IDT. */
216 uintptr_t pIdt;
217} RTIDTR, *PRTIDTR;
218#pragma pack()
219
220#pragma pack(1)
221/** GDTR */
222typedef struct RTGDTR
223{
224 /** Size of the GDT. */
225 uint16_t cbGdt;
226 /** Address of the GDT. */
227 uintptr_t pGdt;
228} RTGDTR, *PRTGDTR;
229#pragma pack()
230
231
232/** @def ASMReturnAddress
233 * Gets the return address of the current (or calling if you like) function or method.
234 */
235#ifdef _MSC_VER
236# ifdef __cplusplus
237extern "C"
238# endif
239void * _ReturnAddress(void);
240# pragma intrinsic(_ReturnAddress)
241# define ASMReturnAddress() _ReturnAddress()
242#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
243# define ASMReturnAddress() __builtin_return_address(0)
244#else
245# error "Unsupported compiler."
246#endif
247
248
249/**
250 * Gets the content of the IDTR CPU register.
251 * @param pIdtr Where to store the IDTR contents.
252 */
253#if RT_INLINE_ASM_EXTERNAL
254DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
255#else
256DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
257{
258# if RT_INLINE_ASM_GNU_STYLE
259 __asm__ __volatile__("sidt %0" : "=m" (*pIdtr));
260# else
261 __asm
262 {
263# ifdef RT_ARCH_AMD64
264 mov rax, [pIdtr]
265 sidt [rax]
266# else
267 mov eax, [pIdtr]
268 sidt [eax]
269# endif
270 }
271# endif
272}
273#endif
274
275
276/**
277 * Sets the content of the IDTR CPU register.
278 * @param pIdtr Where to load the IDTR contents from
279 */
280#if RT_INLINE_ASM_EXTERNAL
281DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
282#else
283DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
284{
285# if RT_INLINE_ASM_GNU_STYLE
286 __asm__ __volatile__("lidt %0" : : "m" (*pIdtr));
287# else
288 __asm
289 {
290# ifdef RT_ARCH_AMD64
291 mov rax, [pIdtr]
292 lidt [rax]
293# else
294 mov eax, [pIdtr]
295 lidt [eax]
296# endif
297 }
298# endif
299}
300#endif
301
302
303/**
304 * Gets the content of the GDTR CPU register.
305 * @param pGdtr Where to store the GDTR contents.
306 */
307#if RT_INLINE_ASM_EXTERNAL
308DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
309#else
310DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
311{
312# if RT_INLINE_ASM_GNU_STYLE
313 __asm__ __volatile__("sgdt %0" : "=m" (*pGdtr));
314# else
315 __asm
316 {
317# ifdef RT_ARCH_AMD64
318 mov rax, [pGdtr]
319 sgdt [rax]
320# else
321 mov eax, [pGdtr]
322 sgdt [eax]
323# endif
324 }
325# endif
326}
327#endif
328
329/**
330 * Get the cs register.
331 * @returns cs.
332 */
333#if RT_INLINE_ASM_EXTERNAL
334DECLASM(RTSEL) ASMGetCS(void);
335#else
336DECLINLINE(RTSEL) ASMGetCS(void)
337{
338 RTSEL SelCS;
339# if RT_INLINE_ASM_GNU_STYLE
340 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
341# else
342 __asm
343 {
344 mov ax, cs
345 mov [SelCS], ax
346 }
347# endif
348 return SelCS;
349}
350#endif
351
352
353/**
354 * Get the DS register.
355 * @returns DS.
356 */
357#if RT_INLINE_ASM_EXTERNAL
358DECLASM(RTSEL) ASMGetDS(void);
359#else
360DECLINLINE(RTSEL) ASMGetDS(void)
361{
362 RTSEL SelDS;
363# if RT_INLINE_ASM_GNU_STYLE
364 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
365# else
366 __asm
367 {
368 mov ax, ds
369 mov [SelDS], ax
370 }
371# endif
372 return SelDS;
373}
374#endif
375
376
377/**
378 * Get the ES register.
379 * @returns ES.
380 */
381#if RT_INLINE_ASM_EXTERNAL
382DECLASM(RTSEL) ASMGetES(void);
383#else
384DECLINLINE(RTSEL) ASMGetES(void)
385{
386 RTSEL SelES;
387# if RT_INLINE_ASM_GNU_STYLE
388 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
389# else
390 __asm
391 {
392 mov ax, es
393 mov [SelES], ax
394 }
395# endif
396 return SelES;
397}
398#endif
399
400
401/**
402 * Get the FS register.
403 * @returns FS.
404 */
405#if RT_INLINE_ASM_EXTERNAL
406DECLASM(RTSEL) ASMGetFS(void);
407#else
408DECLINLINE(RTSEL) ASMGetFS(void)
409{
410 RTSEL SelFS;
411# if RT_INLINE_ASM_GNU_STYLE
412 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
413# else
414 __asm
415 {
416 mov ax, fs
417 mov [SelFS], ax
418 }
419# endif
420 return SelFS;
421}
422# endif
423
424
425/**
426 * Get the GS register.
427 * @returns GS.
428 */
429#if RT_INLINE_ASM_EXTERNAL
430DECLASM(RTSEL) ASMGetGS(void);
431#else
432DECLINLINE(RTSEL) ASMGetGS(void)
433{
434 RTSEL SelGS;
435# if RT_INLINE_ASM_GNU_STYLE
436 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
437# else
438 __asm
439 {
440 mov ax, gs
441 mov [SelGS], ax
442 }
443# endif
444 return SelGS;
445}
446#endif
447
448
449/**
450 * Get the SS register.
451 * @returns SS.
452 */
453#if RT_INLINE_ASM_EXTERNAL
454DECLASM(RTSEL) ASMGetSS(void);
455#else
456DECLINLINE(RTSEL) ASMGetSS(void)
457{
458 RTSEL SelSS;
459# if RT_INLINE_ASM_GNU_STYLE
460 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
461# else
462 __asm
463 {
464 mov ax, ss
465 mov [SelSS], ax
466 }
467# endif
468 return SelSS;
469}
470#endif
471
472
473/**
474 * Get the TR register.
475 * @returns TR.
476 */
477#if RT_INLINE_ASM_EXTERNAL
478DECLASM(RTSEL) ASMGetTR(void);
479#else
480DECLINLINE(RTSEL) ASMGetTR(void)
481{
482 RTSEL SelTR;
483# if RT_INLINE_ASM_GNU_STYLE
484 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
485# else
486 __asm
487 {
488 str ax
489 mov [SelTR], ax
490 }
491# endif
492 return SelTR;
493}
494#endif
495
496
497/**
498 * Get the [RE]FLAGS register.
499 * @returns [RE]FLAGS.
500 */
501#if RT_INLINE_ASM_EXTERNAL
502DECLASM(RTCCUINTREG) ASMGetFlags(void);
503#else
504DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
505{
506 RTCCUINTREG uFlags;
507# if RT_INLINE_ASM_GNU_STYLE
508# ifdef RT_ARCH_AMD64
509 __asm__ __volatile__("pushfq\n\t"
510 "popq %0\n\t"
511 : "=r" (uFlags));
512# else
513 __asm__ __volatile__("pushfl\n\t"
514 "popl %0\n\t"
515 : "=r" (uFlags));
516# endif
517# else
518 __asm
519 {
520# ifdef RT_ARCH_AMD64
521 pushfq
522 pop [uFlags]
523# else
524 pushfd
525 pop [uFlags]
526# endif
527 }
528# endif
529 return uFlags;
530}
531#endif
532
533
534/**
535 * Set the [RE]FLAGS register.
536 * @param uFlags The new [RE]FLAGS value.
537 */
538#if RT_INLINE_ASM_EXTERNAL
539DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
540#else
541DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
542{
543# if RT_INLINE_ASM_GNU_STYLE
544# ifdef RT_ARCH_AMD64
545 __asm__ __volatile__("pushq %0\n\t"
546 "popfq\n\t"
547 : : "g" (uFlags));
548# else
549 __asm__ __volatile__("pushl %0\n\t"
550 "popfl\n\t"
551 : : "g" (uFlags));
552# endif
553# else
554 __asm
555 {
556# ifdef RT_ARCH_AMD64
557 push [uFlags]
558 popfq
559# else
560 push [uFlags]
561 popfd
562# endif
563 }
564# endif
565}
566#endif
567
568
569/**
570 * Gets the content of the CPU timestamp counter register.
571 *
572 * @returns TSC.
573 */
574#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
575DECLASM(uint64_t) ASMReadTSC(void);
576#else
577DECLINLINE(uint64_t) ASMReadTSC(void)
578{
579 RTUINT64U u;
580# if RT_INLINE_ASM_GNU_STYLE
581 __asm__ __volatile__("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
582# else
583# if RT_INLINE_ASM_USES_INTRIN
584 u.u = __rdtsc();
585# else
586 __asm
587 {
588 rdtsc
589 mov [u.s.Lo], eax
590 mov [u.s.Hi], edx
591 }
592# endif
593# endif
594 return u.u;
595}
596#endif
597
598
599/**
600 * Performs the cpuid instruction returning all registers.
601 *
602 * @param uOperator CPUID operation (eax).
603 * @param pvEAX Where to store eax.
604 * @param pvEBX Where to store ebx.
605 * @param pvECX Where to store ecx.
606 * @param pvEDX Where to store edx.
607 * @remark We're using void pointers to ease the use of special bitfield structures and such.
608 */
609#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
610DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
611#else
612DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
613{
614# if RT_INLINE_ASM_GNU_STYLE
615# ifdef RT_ARCH_AMD64
616 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
617 __asm__ ("cpuid\n\t"
618 : "=a" (uRAX),
619 "=b" (uRBX),
620 "=c" (uRCX),
621 "=d" (uRDX)
622 : "0" (uOperator));
623 *(uint32_t *)pvEAX = (uint32_t)uRAX;
624 *(uint32_t *)pvEBX = (uint32_t)uRBX;
625 *(uint32_t *)pvECX = (uint32_t)uRCX;
626 *(uint32_t *)pvEDX = (uint32_t)uRDX;
627# else
628 __asm__ ("xchgl %%ebx, %1\n\t"
629 "cpuid\n\t"
630 "xchgl %%ebx, %1\n\t"
631 : "=a" (*(uint32_t *)pvEAX),
632 "=r" (*(uint32_t *)pvEBX),
633 "=c" (*(uint32_t *)pvECX),
634 "=d" (*(uint32_t *)pvEDX)
635 : "0" (uOperator));
636# endif
637
638# elif RT_INLINE_ASM_USES_INTRIN
639 int aInfo[4];
640 __cpuid(aInfo, uOperator);
641 *(uint32_t *)pvEAX = aInfo[0];
642 *(uint32_t *)pvEBX = aInfo[1];
643 *(uint32_t *)pvECX = aInfo[2];
644 *(uint32_t *)pvEDX = aInfo[3];
645
646# else
647 uint32_t uEAX;
648 uint32_t uEBX;
649 uint32_t uECX;
650 uint32_t uEDX;
651 __asm
652 {
653 push ebx
654 mov eax, [uOperator]
655 cpuid
656 mov [uEAX], eax
657 mov [uEBX], ebx
658 mov [uECX], ecx
659 mov [uEDX], edx
660 pop ebx
661 }
662 *(uint32_t *)pvEAX = uEAX;
663 *(uint32_t *)pvEBX = uEBX;
664 *(uint32_t *)pvECX = uECX;
665 *(uint32_t *)pvEDX = uEDX;
666# endif
667}
668#endif
669
670
671/**
672 * Performs the cpuid instruction returning all registers.
673 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
674 *
675 * @param uOperator CPUID operation (eax).
676 * @param uIdxECX ecx index
677 * @param pvEAX Where to store eax.
678 * @param pvEBX Where to store ebx.
679 * @param pvECX Where to store ecx.
680 * @param pvEDX Where to store edx.
681 * @remark We're using void pointers to ease the use of special bitfield structures and such.
682 */
683#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
684DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
685#else
686DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
687{
688# if RT_INLINE_ASM_GNU_STYLE
689# ifdef RT_ARCH_AMD64
690 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
691 __asm__ ("cpuid\n\t"
692 : "=a" (uRAX),
693 "=b" (uRBX),
694 "=c" (uRCX),
695 "=d" (uRDX)
696 : "0" (uOperator),
697 "2" (uIdxECX));
698 *(uint32_t *)pvEAX = (uint32_t)uRAX;
699 *(uint32_t *)pvEBX = (uint32_t)uRBX;
700 *(uint32_t *)pvECX = (uint32_t)uRCX;
701 *(uint32_t *)pvEDX = (uint32_t)uRDX;
702# else
703 __asm__ ("xchgl %%ebx, %1\n\t"
704 "cpuid\n\t"
705 "xchgl %%ebx, %1\n\t"
706 : "=a" (*(uint32_t *)pvEAX),
707 "=r" (*(uint32_t *)pvEBX),
708 "=c" (*(uint32_t *)pvECX),
709 "=d" (*(uint32_t *)pvEDX)
710 : "0" (uOperator),
711 "2" (uIdxECX));
712# endif
713
714# elif RT_INLINE_ASM_USES_INTRIN
715 int aInfo[4];
716 /* ??? another intrinsic ??? */
717 __cpuid(aInfo, uOperator);
718 *(uint32_t *)pvEAX = aInfo[0];
719 *(uint32_t *)pvEBX = aInfo[1];
720 *(uint32_t *)pvECX = aInfo[2];
721 *(uint32_t *)pvEDX = aInfo[3];
722
723# else
724 uint32_t uEAX;
725 uint32_t uEBX;
726 uint32_t uECX;
727 uint32_t uEDX;
728 __asm
729 {
730 push ebx
731 mov eax, [uOperator]
732 mov ecx, [uIdxECX]
733 cpuid
734 mov [uEAX], eax
735 mov [uEBX], ebx
736 mov [uECX], ecx
737 mov [uEDX], edx
738 pop ebx
739 }
740 *(uint32_t *)pvEAX = uEAX;
741 *(uint32_t *)pvEBX = uEBX;
742 *(uint32_t *)pvECX = uECX;
743 *(uint32_t *)pvEDX = uEDX;
744# endif
745}
746#endif
747
748
749/**
750 * Performs the cpuid instruction returning ecx and edx.
751 *
752 * @param uOperator CPUID operation (eax).
753 * @param pvECX Where to store ecx.
754 * @param pvEDX Where to store edx.
755 * @remark We're using void pointers to ease the use of special bitfield structures and such.
756 */
757#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
758DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
759#else
760DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
761{
762 uint32_t uEBX;
763 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
764}
765#endif
766
767
768/**
769 * Performs the cpuid instruction returning edx.
770 *
771 * @param uOperator CPUID operation (eax).
772 * @returns EDX after cpuid operation.
773 */
774#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
775DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
776#else
777DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
778{
779 RTCCUINTREG xDX;
780# if RT_INLINE_ASM_GNU_STYLE
781# ifdef RT_ARCH_AMD64
782 RTCCUINTREG uSpill;
783 __asm__ ("cpuid"
784 : "=a" (uSpill),
785 "=d" (xDX)
786 : "0" (uOperator)
787 : "rbx", "rcx");
788# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
789 __asm__ ("push %%ebx\n\t"
790 "cpuid\n\t"
791 "pop %%ebx\n\t"
792 : "=a" (uOperator),
793 "=d" (xDX)
794 : "0" (uOperator)
795 : "ecx");
796# else
797 __asm__ ("cpuid"
798 : "=a" (uOperator),
799 "=d" (xDX)
800 : "0" (uOperator)
801 : "ebx", "ecx");
802# endif
803
804# elif RT_INLINE_ASM_USES_INTRIN
805 int aInfo[4];
806 __cpuid(aInfo, uOperator);
807 xDX = aInfo[3];
808
809# else
810 __asm
811 {
812 push ebx
813 mov eax, [uOperator]
814 cpuid
815 mov [xDX], edx
816 pop ebx
817 }
818# endif
819 return (uint32_t)xDX;
820}
821#endif
822
823
824/**
825 * Performs the cpuid instruction returning ecx.
826 *
827 * @param uOperator CPUID operation (eax).
828 * @returns ECX after cpuid operation.
829 */
830#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
831DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
832#else
833DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
834{
835 RTCCUINTREG xCX;
836# if RT_INLINE_ASM_GNU_STYLE
837# ifdef RT_ARCH_AMD64
838 RTCCUINTREG uSpill;
839 __asm__ ("cpuid"
840 : "=a" (uSpill),
841 "=c" (xCX)
842 : "0" (uOperator)
843 : "rbx", "rdx");
844# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
845 __asm__ ("push %%ebx\n\t"
846 "cpuid\n\t"
847 "pop %%ebx\n\t"
848 : "=a" (uOperator),
849 "=c" (xCX)
850 : "0" (uOperator)
851 : "edx");
852# else
853 __asm__ ("cpuid"
854 : "=a" (uOperator),
855 "=c" (xCX)
856 : "0" (uOperator)
857 : "ebx", "edx");
858
859# endif
860
861# elif RT_INLINE_ASM_USES_INTRIN
862 int aInfo[4];
863 __cpuid(aInfo, uOperator);
864 xCX = aInfo[2];
865
866# else
867 __asm
868 {
869 push ebx
870 mov eax, [uOperator]
871 cpuid
872 mov [xCX], ecx
873 pop ebx
874 }
875# endif
876 return (uint32_t)xCX;
877}
878#endif
879
880
881/**
882 * Checks if the current CPU supports CPUID.
883 *
884 * @returns true if CPUID is supported.
885 */
886DECLINLINE(bool) ASMHasCpuId(void)
887{
888#ifdef RT_ARCH_AMD64
889 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
890#else /* !RT_ARCH_AMD64 */
891 bool fRet = false;
892# if RT_INLINE_ASM_GNU_STYLE
893 uint32_t u1;
894 uint32_t u2;
895 __asm__ ("pushf\n\t"
896 "pop %1\n\t"
897 "mov %1, %2\n\t"
898 "xorl $0x200000, %1\n\t"
899 "push %1\n\t"
900 "popf\n\t"
901 "pushf\n\t"
902 "pop %1\n\t"
903 "cmpl %1, %2\n\t"
904 "setne %0\n\t"
905 "push %2\n\t"
906 "popf\n\t"
907 : "=m" (fRet), "=r" (u1), "=r" (u2));
908# else
909 __asm
910 {
911 pushfd
912 pop eax
913 mov ebx, eax
914 xor eax, 0200000h
915 push eax
916 popfd
917 pushfd
918 pop eax
919 cmp eax, ebx
920 setne fRet
921 push ebx
922 popfd
923 }
924# endif
925 return fRet;
926#endif /* !RT_ARCH_AMD64 */
927}
928
929
930/**
931 * Gets the APIC ID of the current CPU.
932 *
933 * @returns the APIC ID.
934 */
935#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
936DECLASM(uint8_t) ASMGetApicId(void);
937#else
938DECLINLINE(uint8_t) ASMGetApicId(void)
939{
940 RTCCUINTREG xBX;
941# if RT_INLINE_ASM_GNU_STYLE
942# ifdef RT_ARCH_AMD64
943 RTCCUINTREG uSpill;
944 __asm__ ("cpuid"
945 : "=a" (uSpill),
946 "=b" (xBX)
947 : "0" (1)
948 : "rcx", "rdx");
949# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
950 RTCCUINTREG uSpill;
951 __asm__ ("mov %%ebx,%1\n\t"
952 "cpuid\n\t"
953 "xchgl %%ebx,%1\n\t"
954 : "=a" (uSpill),
955 "=r" (xBX)
956 : "0" (1)
957 : "ecx", "edx");
958# else
959 RTCCUINTREG uSpill;
960 __asm__ ("cpuid"
961 : "=a" (uSpill),
962 "=b" (xBX)
963 : "0" (1)
964 : "ecx", "edx");
965# endif
966
967# elif RT_INLINE_ASM_USES_INTRIN
968 int aInfo[4];
969 __cpuid(aInfo, 1);
970 xBX = aInfo[1];
971
972# else
973 __asm
974 {
975 push ebx
976 mov eax, 1
977 cpuid
978 mov [xBX], ebx
979 pop ebx
980 }
981# endif
982 return (uint8_t)(xBX >> 24);
983}
984#endif
985
986
987/**
988 * Tests if it a genuine Intel CPU based on the ASMCpuId(0) output.
989 *
990 * @returns true/false.
991 * @param uEBX EBX return from ASMCpuId(0)
992 * @param uECX ECX return from ASMCpuId(0)
993 * @param uEDX EDX return from ASMCpuId(0)
994 */
995DECLINLINE(bool) ASMIsIntelCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
996{
997 return uEBX == UINT32_C(0x756e6547)
998 && uECX == UINT32_C(0x6c65746e)
999 && uEDX == UINT32_C(0x49656e69);
1000}
1001
1002
1003/**
1004 * Tests if this is a genuine Intel CPU.
1005 *
1006 * @returns true/false.
1007 * @remarks ASSUMES that cpuid is supported by the CPU.
1008 */
1009DECLINLINE(bool) ASMIsIntelCpu(void)
1010{
1011 uint32_t uEAX, uEBX, uECX, uEDX;
1012 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
1013 return ASMIsIntelCpuEx(uEBX, uECX, uEDX);
1014}
1015
1016
1017/**
1018 * Tests if it a authentic AMD CPU based on the ASMCpuId(0) output.
1019 *
1020 * @returns true/false.
1021 * @param uEBX EBX return from ASMCpuId(0)
1022 * @param uECX ECX return from ASMCpuId(0)
1023 * @param uEDX EDX return from ASMCpuId(0)
1024 */
1025DECLINLINE(bool) ASMIsAmdCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
1026{
1027 return uEBX == UINT32_C(0x68747541)
1028 && uECX == UINT32_C(0x444d4163)
1029 && uEDX == UINT32_C(0x69746e65);
1030}
1031
1032
1033/**
1034 * Tests if this is an authentic AMD CPU.
1035 *
1036 * @returns true/false.
1037 * @remarks ASSUMES that cpuid is supported by the CPU.
1038 */
1039DECLINLINE(bool) ASMIsAmdCpu(void)
1040{
1041 uint32_t uEAX, uEBX, uECX, uEDX;
1042 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
1043 return ASMIsAmdCpuEx(uEBX, uECX, uEDX);
1044}
1045
1046
1047/**
1048 * Extracts the CPU family from ASMCpuId(1) or ASMCpuId(0x80000001)
1049 *
1050 * @returns Family.
1051 * @param uEAX EAX return from ASMCpuId(1) or ASMCpuId(0x80000001).
1052 */
1053DECLINLINE(uint32_t) ASMGetCpuFamily(uint32_t uEAX)
1054{
1055 return ((uEAX >> 8) & 0xf) == 0xf
1056 ? ((uEAX >> 20) & 0x7f) + 0xf
1057 : ((uEAX >> 8) & 0xf);
1058}
1059
1060
1061/**
1062 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), Intel variant.
1063 *
1064 * @returns Model.
1065 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1066 */
1067DECLINLINE(uint32_t) ASMGetCpuModelIntel(uint32_t uEAX)
1068{
1069 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6) /* family! */
1070 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1071 : ((uEAX >> 4) & 0xf);
1072}
1073
1074
1075/**
1076 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), AMD variant.
1077 *
1078 * @returns Model.
1079 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1080 */
1081DECLINLINE(uint32_t) ASMGetCpuModelAMD(uint32_t uEAX)
1082{
1083 return ((uEAX >> 8) & 0xf) == 0xf
1084 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1085 : ((uEAX >> 4) & 0xf);
1086}
1087
1088
1089/**
1090 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001)
1091 *
1092 * @returns Model.
1093 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1094 * @param fIntel Whether it's an intel CPU. Use ASMIsIntelCpuEx() or ASMIsIntelCpu().
1095 */
1096DECLINLINE(uint32_t) ASMGetCpuModel(uint32_t uEAX, bool fIntel)
1097{
1098 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6 && fIntel) /* family! */
1099 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1100 : ((uEAX >> 4) & 0xf);
1101}
1102
1103
1104/**
1105 * Extracts the CPU stepping from ASMCpuId(1) or ASMCpuId(0x80000001)
1106 *
1107 * @returns Model.
1108 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1109 */
1110DECLINLINE(uint32_t) ASMGetCpuStepping(uint32_t uEAX)
1111{
1112 return uEAX & 0xf;
1113}
1114
1115
1116/**
1117 * Get cr0.
1118 * @returns cr0.
1119 */
1120#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1121DECLASM(RTCCUINTREG) ASMGetCR0(void);
1122#else
1123DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
1124{
1125 RTCCUINTREG uCR0;
1126# if RT_INLINE_ASM_USES_INTRIN
1127 uCR0 = __readcr0();
1128
1129# elif RT_INLINE_ASM_GNU_STYLE
1130# ifdef RT_ARCH_AMD64
1131 __asm__ __volatile__("movq %%cr0, %0\t\n" : "=r" (uCR0));
1132# else
1133 __asm__ __volatile__("movl %%cr0, %0\t\n" : "=r" (uCR0));
1134# endif
1135# else
1136 __asm
1137 {
1138# ifdef RT_ARCH_AMD64
1139 mov rax, cr0
1140 mov [uCR0], rax
1141# else
1142 mov eax, cr0
1143 mov [uCR0], eax
1144# endif
1145 }
1146# endif
1147 return uCR0;
1148}
1149#endif
1150
1151
1152/**
1153 * Sets the CR0 register.
1154 * @param uCR0 The new CR0 value.
1155 */
1156#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1157DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
1158#else
1159DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
1160{
1161# if RT_INLINE_ASM_USES_INTRIN
1162 __writecr0(uCR0);
1163
1164# elif RT_INLINE_ASM_GNU_STYLE
1165# ifdef RT_ARCH_AMD64
1166 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
1167# else
1168 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
1169# endif
1170# else
1171 __asm
1172 {
1173# ifdef RT_ARCH_AMD64
1174 mov rax, [uCR0]
1175 mov cr0, rax
1176# else
1177 mov eax, [uCR0]
1178 mov cr0, eax
1179# endif
1180 }
1181# endif
1182}
1183#endif
1184
1185
1186/**
1187 * Get cr2.
1188 * @returns cr2.
1189 */
1190#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1191DECLASM(RTCCUINTREG) ASMGetCR2(void);
1192#else
1193DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
1194{
1195 RTCCUINTREG uCR2;
1196# if RT_INLINE_ASM_USES_INTRIN
1197 uCR2 = __readcr2();
1198
1199# elif RT_INLINE_ASM_GNU_STYLE
1200# ifdef RT_ARCH_AMD64
1201 __asm__ __volatile__("movq %%cr2, %0\t\n" : "=r" (uCR2));
1202# else
1203 __asm__ __volatile__("movl %%cr2, %0\t\n" : "=r" (uCR2));
1204# endif
1205# else
1206 __asm
1207 {
1208# ifdef RT_ARCH_AMD64
1209 mov rax, cr2
1210 mov [uCR2], rax
1211# else
1212 mov eax, cr2
1213 mov [uCR2], eax
1214# endif
1215 }
1216# endif
1217 return uCR2;
1218}
1219#endif
1220
1221
1222/**
1223 * Sets the CR2 register.
1224 * @param uCR2 The new CR0 value.
1225 */
1226#if RT_INLINE_ASM_EXTERNAL
1227DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1228#else
1229DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1230{
1231# if RT_INLINE_ASM_GNU_STYLE
1232# ifdef RT_ARCH_AMD64
1233 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1234# else
1235 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1236# endif
1237# else
1238 __asm
1239 {
1240# ifdef RT_ARCH_AMD64
1241 mov rax, [uCR2]
1242 mov cr2, rax
1243# else
1244 mov eax, [uCR2]
1245 mov cr2, eax
1246# endif
1247 }
1248# endif
1249}
1250#endif
1251
1252
1253/**
1254 * Get cr3.
1255 * @returns cr3.
1256 */
1257#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1258DECLASM(RTCCUINTREG) ASMGetCR3(void);
1259#else
1260DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1261{
1262 RTCCUINTREG uCR3;
1263# if RT_INLINE_ASM_USES_INTRIN
1264 uCR3 = __readcr3();
1265
1266# elif RT_INLINE_ASM_GNU_STYLE
1267# ifdef RT_ARCH_AMD64
1268 __asm__ __volatile__("movq %%cr3, %0\t\n" : "=r" (uCR3));
1269# else
1270 __asm__ __volatile__("movl %%cr3, %0\t\n" : "=r" (uCR3));
1271# endif
1272# else
1273 __asm
1274 {
1275# ifdef RT_ARCH_AMD64
1276 mov rax, cr3
1277 mov [uCR3], rax
1278# else
1279 mov eax, cr3
1280 mov [uCR3], eax
1281# endif
1282 }
1283# endif
1284 return uCR3;
1285}
1286#endif
1287
1288
1289/**
1290 * Sets the CR3 register.
1291 *
1292 * @param uCR3 New CR3 value.
1293 */
1294#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1295DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1296#else
1297DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1298{
1299# if RT_INLINE_ASM_USES_INTRIN
1300 __writecr3(uCR3);
1301
1302# elif RT_INLINE_ASM_GNU_STYLE
1303# ifdef RT_ARCH_AMD64
1304 __asm__ __volatile__("movq %0, %%cr3\n\t" : : "r" (uCR3));
1305# else
1306 __asm__ __volatile__("movl %0, %%cr3\n\t" : : "r" (uCR3));
1307# endif
1308# else
1309 __asm
1310 {
1311# ifdef RT_ARCH_AMD64
1312 mov rax, [uCR3]
1313 mov cr3, rax
1314# else
1315 mov eax, [uCR3]
1316 mov cr3, eax
1317# endif
1318 }
1319# endif
1320}
1321#endif
1322
1323
1324/**
1325 * Reloads the CR3 register.
1326 */
1327#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1328DECLASM(void) ASMReloadCR3(void);
1329#else
1330DECLINLINE(void) ASMReloadCR3(void)
1331{
1332# if RT_INLINE_ASM_USES_INTRIN
1333 __writecr3(__readcr3());
1334
1335# elif RT_INLINE_ASM_GNU_STYLE
1336 RTCCUINTREG u;
1337# ifdef RT_ARCH_AMD64
1338 __asm__ __volatile__("movq %%cr3, %0\n\t"
1339 "movq %0, %%cr3\n\t"
1340 : "=r" (u));
1341# else
1342 __asm__ __volatile__("movl %%cr3, %0\n\t"
1343 "movl %0, %%cr3\n\t"
1344 : "=r" (u));
1345# endif
1346# else
1347 __asm
1348 {
1349# ifdef RT_ARCH_AMD64
1350 mov rax, cr3
1351 mov cr3, rax
1352# else
1353 mov eax, cr3
1354 mov cr3, eax
1355# endif
1356 }
1357# endif
1358}
1359#endif
1360
1361
1362/**
1363 * Get cr4.
1364 * @returns cr4.
1365 */
1366#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1367DECLASM(RTCCUINTREG) ASMGetCR4(void);
1368#else
1369DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1370{
1371 RTCCUINTREG uCR4;
1372# if RT_INLINE_ASM_USES_INTRIN
1373 uCR4 = __readcr4();
1374
1375# elif RT_INLINE_ASM_GNU_STYLE
1376# ifdef RT_ARCH_AMD64
1377 __asm__ __volatile__("movq %%cr4, %0\t\n" : "=r" (uCR4));
1378# else
1379 __asm__ __volatile__("movl %%cr4, %0\t\n" : "=r" (uCR4));
1380# endif
1381# else
1382 __asm
1383 {
1384# ifdef RT_ARCH_AMD64
1385 mov rax, cr4
1386 mov [uCR4], rax
1387# else
1388 push eax /* just in case */
1389 /*mov eax, cr4*/
1390 _emit 0x0f
1391 _emit 0x20
1392 _emit 0xe0
1393 mov [uCR4], eax
1394 pop eax
1395# endif
1396 }
1397# endif
1398 return uCR4;
1399}
1400#endif
1401
1402
1403/**
1404 * Sets the CR4 register.
1405 *
1406 * @param uCR4 New CR4 value.
1407 */
1408#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1409DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1410#else
1411DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1412{
1413# if RT_INLINE_ASM_USES_INTRIN
1414 __writecr4(uCR4);
1415
1416# elif RT_INLINE_ASM_GNU_STYLE
1417# ifdef RT_ARCH_AMD64
1418 __asm__ __volatile__("movq %0, %%cr4\n\t" : : "r" (uCR4));
1419# else
1420 __asm__ __volatile__("movl %0, %%cr4\n\t" : : "r" (uCR4));
1421# endif
1422# else
1423 __asm
1424 {
1425# ifdef RT_ARCH_AMD64
1426 mov rax, [uCR4]
1427 mov cr4, rax
1428# else
1429 mov eax, [uCR4]
1430 _emit 0x0F
1431 _emit 0x22
1432 _emit 0xE0 /* mov cr4, eax */
1433# endif
1434 }
1435# endif
1436}
1437#endif
1438
1439
1440/**
1441 * Get cr8.
1442 * @returns cr8.
1443 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1444 */
1445#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1446DECLASM(RTCCUINTREG) ASMGetCR8(void);
1447#else
1448DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1449{
1450# ifdef RT_ARCH_AMD64
1451 RTCCUINTREG uCR8;
1452# if RT_INLINE_ASM_USES_INTRIN
1453 uCR8 = __readcr8();
1454
1455# elif RT_INLINE_ASM_GNU_STYLE
1456 __asm__ __volatile__("movq %%cr8, %0\t\n" : "=r" (uCR8));
1457# else
1458 __asm
1459 {
1460 mov rax, cr8
1461 mov [uCR8], rax
1462 }
1463# endif
1464 return uCR8;
1465# else /* !RT_ARCH_AMD64 */
1466 return 0;
1467# endif /* !RT_ARCH_AMD64 */
1468}
1469#endif
1470
1471
1472/**
1473 * Enables interrupts (EFLAGS.IF).
1474 */
1475#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1476DECLASM(void) ASMIntEnable(void);
1477#else
1478DECLINLINE(void) ASMIntEnable(void)
1479{
1480# if RT_INLINE_ASM_GNU_STYLE
1481 __asm("sti\n");
1482# elif RT_INLINE_ASM_USES_INTRIN
1483 _enable();
1484# else
1485 __asm sti
1486# endif
1487}
1488#endif
1489
1490
1491/**
1492 * Disables interrupts (!EFLAGS.IF).
1493 */
1494#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1495DECLASM(void) ASMIntDisable(void);
1496#else
1497DECLINLINE(void) ASMIntDisable(void)
1498{
1499# if RT_INLINE_ASM_GNU_STYLE
1500 __asm("cli\n");
1501# elif RT_INLINE_ASM_USES_INTRIN
1502 _disable();
1503# else
1504 __asm cli
1505# endif
1506}
1507#endif
1508
1509
1510/**
1511 * Disables interrupts and returns previous xFLAGS.
1512 */
1513#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1514DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1515#else
1516DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1517{
1518 RTCCUINTREG xFlags;
1519# if RT_INLINE_ASM_GNU_STYLE
1520# ifdef RT_ARCH_AMD64
1521 __asm__ __volatile__("pushfq\n\t"
1522 "cli\n\t"
1523 "popq %0\n\t"
1524 : "=r" (xFlags));
1525# else
1526 __asm__ __volatile__("pushfl\n\t"
1527 "cli\n\t"
1528 "popl %0\n\t"
1529 : "=r" (xFlags));
1530# endif
1531# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1532 xFlags = ASMGetFlags();
1533 _disable();
1534# else
1535 __asm {
1536 pushfd
1537 cli
1538 pop [xFlags]
1539 }
1540# endif
1541 return xFlags;
1542}
1543#endif
1544
1545
1546/**
1547 * Are interrupts enabled?
1548 *
1549 * @returns true / false.
1550 */
1551DECLINLINE(RTCCUINTREG) ASMIntAreEnabled(void)
1552{
1553 RTCCUINTREG uFlags = ASMGetFlags();
1554 return uFlags & 0x200 /* X86_EFL_IF */ ? true : false;
1555}
1556
1557
1558/**
1559 * Halts the CPU until interrupted.
1560 */
1561#if RT_INLINE_ASM_EXTERNAL
1562DECLASM(void) ASMHalt(void);
1563#else
1564DECLINLINE(void) ASMHalt(void)
1565{
1566# if RT_INLINE_ASM_GNU_STYLE
1567 __asm__ __volatile__("hlt\n\t");
1568# else
1569 __asm {
1570 hlt
1571 }
1572# endif
1573}
1574#endif
1575
1576
1577/**
1578 * The PAUSE variant of NOP for helping hyperthreaded CPUs detecing spin locks.
1579 */
1580#if RT_INLINE_ASM_EXTERNAL
1581DECLASM(void) ASMNopPause(void);
1582#else
1583DECLINLINE(void) ASMNopPause(void)
1584{
1585# if RT_INLINE_ASM_GNU_STYLE
1586 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
1587# else
1588 __asm {
1589 _emit 0f3h
1590 _emit 090h
1591 }
1592# endif
1593}
1594#endif
1595
1596
1597/**
1598 * Reads a machine specific register.
1599 *
1600 * @returns Register content.
1601 * @param uRegister Register to read.
1602 */
1603#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1604DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1605#else
1606DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1607{
1608 RTUINT64U u;
1609# if RT_INLINE_ASM_GNU_STYLE
1610 __asm__ __volatile__("rdmsr\n\t"
1611 : "=a" (u.s.Lo),
1612 "=d" (u.s.Hi)
1613 : "c" (uRegister));
1614
1615# elif RT_INLINE_ASM_USES_INTRIN
1616 u.u = __readmsr(uRegister);
1617
1618# else
1619 __asm
1620 {
1621 mov ecx, [uRegister]
1622 rdmsr
1623 mov [u.s.Lo], eax
1624 mov [u.s.Hi], edx
1625 }
1626# endif
1627
1628 return u.u;
1629}
1630#endif
1631
1632
1633/**
1634 * Writes a machine specific register.
1635 *
1636 * @returns Register content.
1637 * @param uRegister Register to write to.
1638 * @param u64Val Value to write.
1639 */
1640#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1641DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1642#else
1643DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1644{
1645 RTUINT64U u;
1646
1647 u.u = u64Val;
1648# if RT_INLINE_ASM_GNU_STYLE
1649 __asm__ __volatile__("wrmsr\n\t"
1650 ::"a" (u.s.Lo),
1651 "d" (u.s.Hi),
1652 "c" (uRegister));
1653
1654# elif RT_INLINE_ASM_USES_INTRIN
1655 __writemsr(uRegister, u.u);
1656
1657# else
1658 __asm
1659 {
1660 mov ecx, [uRegister]
1661 mov edx, [u.s.Hi]
1662 mov eax, [u.s.Lo]
1663 wrmsr
1664 }
1665# endif
1666}
1667#endif
1668
1669
1670/**
1671 * Reads low part of a machine specific register.
1672 *
1673 * @returns Register content.
1674 * @param uRegister Register to read.
1675 */
1676#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1677DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1678#else
1679DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1680{
1681 uint32_t u32;
1682# if RT_INLINE_ASM_GNU_STYLE
1683 __asm__ __volatile__("rdmsr\n\t"
1684 : "=a" (u32)
1685 : "c" (uRegister)
1686 : "edx");
1687
1688# elif RT_INLINE_ASM_USES_INTRIN
1689 u32 = (uint32_t)__readmsr(uRegister);
1690
1691#else
1692 __asm
1693 {
1694 mov ecx, [uRegister]
1695 rdmsr
1696 mov [u32], eax
1697 }
1698# endif
1699
1700 return u32;
1701}
1702#endif
1703
1704
1705/**
1706 * Reads high part of a machine specific register.
1707 *
1708 * @returns Register content.
1709 * @param uRegister Register to read.
1710 */
1711#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1712DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1713#else
1714DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1715{
1716 uint32_t u32;
1717# if RT_INLINE_ASM_GNU_STYLE
1718 __asm__ __volatile__("rdmsr\n\t"
1719 : "=d" (u32)
1720 : "c" (uRegister)
1721 : "eax");
1722
1723# elif RT_INLINE_ASM_USES_INTRIN
1724 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1725
1726# else
1727 __asm
1728 {
1729 mov ecx, [uRegister]
1730 rdmsr
1731 mov [u32], edx
1732 }
1733# endif
1734
1735 return u32;
1736}
1737#endif
1738
1739
1740/**
1741 * Gets dr0.
1742 *
1743 * @returns dr0.
1744 */
1745#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1746DECLASM(RTCCUINTREG) ASMGetDR0(void);
1747#else
1748DECLINLINE(RTCCUINTREG) ASMGetDR0(void)
1749{
1750 RTCCUINTREG uDR0;
1751# if RT_INLINE_ASM_USES_INTRIN
1752 uDR0 = __readdr(0);
1753# elif RT_INLINE_ASM_GNU_STYLE
1754# ifdef RT_ARCH_AMD64
1755 __asm__ __volatile__("movq %%dr0, %0\n\t" : "=r" (uDR0));
1756# else
1757 __asm__ __volatile__("movl %%dr0, %0\n\t" : "=r" (uDR0));
1758# endif
1759# else
1760 __asm
1761 {
1762# ifdef RT_ARCH_AMD64
1763 mov rax, dr0
1764 mov [uDR0], rax
1765# else
1766 mov eax, dr0
1767 mov [uDR0], eax
1768# endif
1769 }
1770# endif
1771 return uDR0;
1772}
1773#endif
1774
1775
1776/**
1777 * Gets dr1.
1778 *
1779 * @returns dr1.
1780 */
1781#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1782DECLASM(RTCCUINTREG) ASMGetDR1(void);
1783#else
1784DECLINLINE(RTCCUINTREG) ASMGetDR1(void)
1785{
1786 RTCCUINTREG uDR1;
1787# if RT_INLINE_ASM_USES_INTRIN
1788 uDR1 = __readdr(1);
1789# elif RT_INLINE_ASM_GNU_STYLE
1790# ifdef RT_ARCH_AMD64
1791 __asm__ __volatile__("movq %%dr1, %0\n\t" : "=r" (uDR1));
1792# else
1793 __asm__ __volatile__("movl %%dr1, %0\n\t" : "=r" (uDR1));
1794# endif
1795# else
1796 __asm
1797 {
1798# ifdef RT_ARCH_AMD64
1799 mov rax, dr1
1800 mov [uDR1], rax
1801# else
1802 mov eax, dr1
1803 mov [uDR1], eax
1804# endif
1805 }
1806# endif
1807 return uDR1;
1808}
1809#endif
1810
1811
1812/**
1813 * Gets dr2.
1814 *
1815 * @returns dr2.
1816 */
1817#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1818DECLASM(RTCCUINTREG) ASMGetDR2(void);
1819#else
1820DECLINLINE(RTCCUINTREG) ASMGetDR2(void)
1821{
1822 RTCCUINTREG uDR2;
1823# if RT_INLINE_ASM_USES_INTRIN
1824 uDR2 = __readdr(2);
1825# elif RT_INLINE_ASM_GNU_STYLE
1826# ifdef RT_ARCH_AMD64
1827 __asm__ __volatile__("movq %%dr2, %0\n\t" : "=r" (uDR2));
1828# else
1829 __asm__ __volatile__("movl %%dr2, %0\n\t" : "=r" (uDR2));
1830# endif
1831# else
1832 __asm
1833 {
1834# ifdef RT_ARCH_AMD64
1835 mov rax, dr2
1836 mov [uDR2], rax
1837# else
1838 mov eax, dr2
1839 mov [uDR2], eax
1840# endif
1841 }
1842# endif
1843 return uDR2;
1844}
1845#endif
1846
1847
1848/**
1849 * Gets dr3.
1850 *
1851 * @returns dr3.
1852 */
1853#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1854DECLASM(RTCCUINTREG) ASMGetDR3(void);
1855#else
1856DECLINLINE(RTCCUINTREG) ASMGetDR3(void)
1857{
1858 RTCCUINTREG uDR3;
1859# if RT_INLINE_ASM_USES_INTRIN
1860 uDR3 = __readdr(3);
1861# elif RT_INLINE_ASM_GNU_STYLE
1862# ifdef RT_ARCH_AMD64
1863 __asm__ __volatile__("movq %%dr3, %0\n\t" : "=r" (uDR3));
1864# else
1865 __asm__ __volatile__("movl %%dr3, %0\n\t" : "=r" (uDR3));
1866# endif
1867# else
1868 __asm
1869 {
1870# ifdef RT_ARCH_AMD64
1871 mov rax, dr3
1872 mov [uDR3], rax
1873# else
1874 mov eax, dr3
1875 mov [uDR3], eax
1876# endif
1877 }
1878# endif
1879 return uDR3;
1880}
1881#endif
1882
1883
1884/**
1885 * Gets dr6.
1886 *
1887 * @returns dr6.
1888 */
1889#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1890DECLASM(RTCCUINTREG) ASMGetDR6(void);
1891#else
1892DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1893{
1894 RTCCUINTREG uDR6;
1895# if RT_INLINE_ASM_USES_INTRIN
1896 uDR6 = __readdr(6);
1897# elif RT_INLINE_ASM_GNU_STYLE
1898# ifdef RT_ARCH_AMD64
1899 __asm__ __volatile__("movq %%dr6, %0\n\t" : "=r" (uDR6));
1900# else
1901 __asm__ __volatile__("movl %%dr6, %0\n\t" : "=r" (uDR6));
1902# endif
1903# else
1904 __asm
1905 {
1906# ifdef RT_ARCH_AMD64
1907 mov rax, dr6
1908 mov [uDR6], rax
1909# else
1910 mov eax, dr6
1911 mov [uDR6], eax
1912# endif
1913 }
1914# endif
1915 return uDR6;
1916}
1917#endif
1918
1919
1920/**
1921 * Reads and clears DR6.
1922 *
1923 * @returns DR6.
1924 */
1925#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1926DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1927#else
1928DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1929{
1930 RTCCUINTREG uDR6;
1931# if RT_INLINE_ASM_USES_INTRIN
1932 uDR6 = __readdr(6);
1933 __writedr(6, 0xffff0ff0U); /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1934# elif RT_INLINE_ASM_GNU_STYLE
1935 RTCCUINTREG uNewValue = 0xffff0ff0U;/* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1936# ifdef RT_ARCH_AMD64
1937 __asm__ __volatile__("movq %%dr6, %0\n\t"
1938 "movq %1, %%dr6\n\t"
1939 : "=r" (uDR6)
1940 : "r" (uNewValue));
1941# else
1942 __asm__ __volatile__("movl %%dr6, %0\n\t"
1943 "movl %1, %%dr6\n\t"
1944 : "=r" (uDR6)
1945 : "r" (uNewValue));
1946# endif
1947# else
1948 __asm
1949 {
1950# ifdef RT_ARCH_AMD64
1951 mov rax, dr6
1952 mov [uDR6], rax
1953 mov rcx, rax
1954 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1955 mov dr6, rcx
1956# else
1957 mov eax, dr6
1958 mov [uDR6], eax
1959 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1960 mov dr6, ecx
1961# endif
1962 }
1963# endif
1964 return uDR6;
1965}
1966#endif
1967
1968
1969/**
1970 * Gets dr7.
1971 *
1972 * @returns dr7.
1973 */
1974#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1975DECLASM(RTCCUINTREG) ASMGetDR7(void);
1976#else
1977DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1978{
1979 RTCCUINTREG uDR7;
1980# if RT_INLINE_ASM_USES_INTRIN
1981 uDR7 = __readdr(7);
1982# elif RT_INLINE_ASM_GNU_STYLE
1983# ifdef RT_ARCH_AMD64
1984 __asm__ __volatile__("movq %%dr7, %0\n\t" : "=r" (uDR7));
1985# else
1986 __asm__ __volatile__("movl %%dr7, %0\n\t" : "=r" (uDR7));
1987# endif
1988# else
1989 __asm
1990 {
1991# ifdef RT_ARCH_AMD64
1992 mov rax, dr7
1993 mov [uDR7], rax
1994# else
1995 mov eax, dr7
1996 mov [uDR7], eax
1997# endif
1998 }
1999# endif
2000 return uDR7;
2001}
2002#endif
2003
2004
2005/**
2006 * Sets dr0.
2007 *
2008 * @param uDRVal Debug register value to write
2009 */
2010#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2011DECLASM(void) ASMSetDR0(RTCCUINTREG uDRVal);
2012#else
2013DECLINLINE(void) ASMSetDR0(RTCCUINTREG uDRVal)
2014{
2015# if RT_INLINE_ASM_USES_INTRIN
2016 __writedr(0, uDRVal);
2017# elif RT_INLINE_ASM_GNU_STYLE
2018# ifdef RT_ARCH_AMD64
2019 __asm__ __volatile__("movq %0, %%dr0\n\t" : : "r" (uDRVal));
2020# else
2021 __asm__ __volatile__("movl %0, %%dr0\n\t" : : "r" (uDRVal));
2022# endif
2023# else
2024 __asm
2025 {
2026# ifdef RT_ARCH_AMD64
2027 mov rax, [uDRVal]
2028 mov dr0, rax
2029# else
2030 mov eax, [uDRVal]
2031 mov dr0, eax
2032# endif
2033 }
2034# endif
2035}
2036#endif
2037
2038
2039/**
2040 * Sets dr1.
2041 *
2042 * @param uDRVal Debug register value to write
2043 */
2044#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2045DECLASM(void) ASMSetDR1(RTCCUINTREG uDRVal);
2046#else
2047DECLINLINE(void) ASMSetDR1(RTCCUINTREG uDRVal)
2048{
2049# if RT_INLINE_ASM_USES_INTRIN
2050 __writedr(1, uDRVal);
2051# elif RT_INLINE_ASM_GNU_STYLE
2052# ifdef RT_ARCH_AMD64
2053 __asm__ __volatile__("movq %0, %%dr1\n\t" : : "r" (uDRVal));
2054# else
2055 __asm__ __volatile__("movl %0, %%dr1\n\t" : : "r" (uDRVal));
2056# endif
2057# else
2058 __asm
2059 {
2060# ifdef RT_ARCH_AMD64
2061 mov rax, [uDRVal]
2062 mov dr1, rax
2063# else
2064 mov eax, [uDRVal]
2065 mov dr1, eax
2066# endif
2067 }
2068# endif
2069}
2070#endif
2071
2072
2073/**
2074 * Sets dr2.
2075 *
2076 * @param uDRVal Debug register value to write
2077 */
2078#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2079DECLASM(void) ASMSetDR2(RTCCUINTREG uDRVal);
2080#else
2081DECLINLINE(void) ASMSetDR2(RTCCUINTREG uDRVal)
2082{
2083# if RT_INLINE_ASM_USES_INTRIN
2084 __writedr(2, uDRVal);
2085# elif RT_INLINE_ASM_GNU_STYLE
2086# ifdef RT_ARCH_AMD64
2087 __asm__ __volatile__("movq %0, %%dr2\n\t" : : "r" (uDRVal));
2088# else
2089 __asm__ __volatile__("movl %0, %%dr2\n\t" : : "r" (uDRVal));
2090# endif
2091# else
2092 __asm
2093 {
2094# ifdef RT_ARCH_AMD64
2095 mov rax, [uDRVal]
2096 mov dr2, rax
2097# else
2098 mov eax, [uDRVal]
2099 mov dr2, eax
2100# endif
2101 }
2102# endif
2103}
2104#endif
2105
2106
2107/**
2108 * Sets dr3.
2109 *
2110 * @param uDRVal Debug register value to write
2111 */
2112#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2113DECLASM(void) ASMSetDR3(RTCCUINTREG uDRVal);
2114#else
2115DECLINLINE(void) ASMSetDR3(RTCCUINTREG uDRVal)
2116{
2117# if RT_INLINE_ASM_USES_INTRIN
2118 __writedr(3, uDRVal);
2119# elif RT_INLINE_ASM_GNU_STYLE
2120# ifdef RT_ARCH_AMD64
2121 __asm__ __volatile__("movq %0, %%dr3\n\t" : : "r" (uDRVal));
2122# else
2123 __asm__ __volatile__("movl %0, %%dr3\n\t" : : "r" (uDRVal));
2124# endif
2125# else
2126 __asm
2127 {
2128# ifdef RT_ARCH_AMD64
2129 mov rax, [uDRVal]
2130 mov dr3, rax
2131# else
2132 mov eax, [uDRVal]
2133 mov dr3, eax
2134# endif
2135 }
2136# endif
2137}
2138#endif
2139
2140
2141/**
2142 * Sets dr6.
2143 *
2144 * @param uDRVal Debug register value to write
2145 */
2146#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2147DECLASM(void) ASMSetDR6(RTCCUINTREG uDRVal);
2148#else
2149DECLINLINE(void) ASMSetDR6(RTCCUINTREG uDRVal)
2150{
2151# if RT_INLINE_ASM_USES_INTRIN
2152 __writedr(6, uDRVal);
2153# elif RT_INLINE_ASM_GNU_STYLE
2154# ifdef RT_ARCH_AMD64
2155 __asm__ __volatile__("movq %0, %%dr6\n\t" : : "r" (uDRVal));
2156# else
2157 __asm__ __volatile__("movl %0, %%dr6\n\t" : : "r" (uDRVal));
2158# endif
2159# else
2160 __asm
2161 {
2162# ifdef RT_ARCH_AMD64
2163 mov rax, [uDRVal]
2164 mov dr6, rax
2165# else
2166 mov eax, [uDRVal]
2167 mov dr6, eax
2168# endif
2169 }
2170# endif
2171}
2172#endif
2173
2174
2175/**
2176 * Sets dr7.
2177 *
2178 * @param uDRVal Debug register value to write
2179 */
2180#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2181DECLASM(void) ASMSetDR7(RTCCUINTREG uDRVal);
2182#else
2183DECLINLINE(void) ASMSetDR7(RTCCUINTREG uDRVal)
2184{
2185# if RT_INLINE_ASM_USES_INTRIN
2186 __writedr(7, uDRVal);
2187# elif RT_INLINE_ASM_GNU_STYLE
2188# ifdef RT_ARCH_AMD64
2189 __asm__ __volatile__("movq %0, %%dr7\n\t" : : "r" (uDRVal));
2190# else
2191 __asm__ __volatile__("movl %0, %%dr7\n\t" : : "r" (uDRVal));
2192# endif
2193# else
2194 __asm
2195 {
2196# ifdef RT_ARCH_AMD64
2197 mov rax, [uDRVal]
2198 mov dr7, rax
2199# else
2200 mov eax, [uDRVal]
2201 mov dr7, eax
2202# endif
2203 }
2204# endif
2205}
2206#endif
2207
2208
2209/**
2210 * Compiler memory barrier.
2211 *
2212 * Ensure that the compiler does not use any cached (register/tmp stack) memory
2213 * values or any outstanding writes when returning from this function.
2214 *
2215 * This function must be used if non-volatile data is modified by a
2216 * device or the VMM. Typical cases are port access, MMIO access,
2217 * trapping instruction, etc.
2218 */
2219#if RT_INLINE_ASM_GNU_STYLE
2220# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
2221#elif RT_INLINE_ASM_USES_INTRIN
2222# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
2223#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
2224DECLINLINE(void) ASMCompilerBarrier(void)
2225{
2226 __asm
2227 {
2228 }
2229}
2230#endif
2231
2232
2233/**
2234 * Writes a 8-bit unsigned integer to an I/O port, ordered.
2235 *
2236 * @param Port I/O port to write to.
2237 * @param u8 8-bit integer to write.
2238 */
2239#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2240DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
2241#else
2242DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
2243{
2244# if RT_INLINE_ASM_GNU_STYLE
2245 __asm__ __volatile__("outb %b1, %w0\n\t"
2246 :: "Nd" (Port),
2247 "a" (u8));
2248
2249# elif RT_INLINE_ASM_USES_INTRIN
2250 __outbyte(Port, u8);
2251
2252# else
2253 __asm
2254 {
2255 mov dx, [Port]
2256 mov al, [u8]
2257 out dx, al
2258 }
2259# endif
2260}
2261#endif
2262
2263
2264/**
2265 * Reads a 8-bit unsigned integer from an I/O port, ordered.
2266 *
2267 * @returns 8-bit integer.
2268 * @param Port I/O port to read from.
2269 */
2270#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2271DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
2272#else
2273DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
2274{
2275 uint8_t u8;
2276# if RT_INLINE_ASM_GNU_STYLE
2277 __asm__ __volatile__("inb %w1, %b0\n\t"
2278 : "=a" (u8)
2279 : "Nd" (Port));
2280
2281# elif RT_INLINE_ASM_USES_INTRIN
2282 u8 = __inbyte(Port);
2283
2284# else
2285 __asm
2286 {
2287 mov dx, [Port]
2288 in al, dx
2289 mov [u8], al
2290 }
2291# endif
2292 return u8;
2293}
2294#endif
2295
2296
2297/**
2298 * Writes a 16-bit unsigned integer to an I/O port, ordered.
2299 *
2300 * @param Port I/O port to write to.
2301 * @param u16 16-bit integer to write.
2302 */
2303#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2304DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
2305#else
2306DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
2307{
2308# if RT_INLINE_ASM_GNU_STYLE
2309 __asm__ __volatile__("outw %w1, %w0\n\t"
2310 :: "Nd" (Port),
2311 "a" (u16));
2312
2313# elif RT_INLINE_ASM_USES_INTRIN
2314 __outword(Port, u16);
2315
2316# else
2317 __asm
2318 {
2319 mov dx, [Port]
2320 mov ax, [u16]
2321 out dx, ax
2322 }
2323# endif
2324}
2325#endif
2326
2327
2328/**
2329 * Reads a 16-bit unsigned integer from an I/O port, ordered.
2330 *
2331 * @returns 16-bit integer.
2332 * @param Port I/O port to read from.
2333 */
2334#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2335DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
2336#else
2337DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
2338{
2339 uint16_t u16;
2340# if RT_INLINE_ASM_GNU_STYLE
2341 __asm__ __volatile__("inw %w1, %w0\n\t"
2342 : "=a" (u16)
2343 : "Nd" (Port));
2344
2345# elif RT_INLINE_ASM_USES_INTRIN
2346 u16 = __inword(Port);
2347
2348# else
2349 __asm
2350 {
2351 mov dx, [Port]
2352 in ax, dx
2353 mov [u16], ax
2354 }
2355# endif
2356 return u16;
2357}
2358#endif
2359
2360
2361/**
2362 * Writes a 32-bit unsigned integer to an I/O port, ordered.
2363 *
2364 * @param Port I/O port to write to.
2365 * @param u32 32-bit integer to write.
2366 */
2367#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2368DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
2369#else
2370DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
2371{
2372# if RT_INLINE_ASM_GNU_STYLE
2373 __asm__ __volatile__("outl %1, %w0\n\t"
2374 :: "Nd" (Port),
2375 "a" (u32));
2376
2377# elif RT_INLINE_ASM_USES_INTRIN
2378 __outdword(Port, u32);
2379
2380# else
2381 __asm
2382 {
2383 mov dx, [Port]
2384 mov eax, [u32]
2385 out dx, eax
2386 }
2387# endif
2388}
2389#endif
2390
2391
2392/**
2393 * Reads a 32-bit unsigned integer from an I/O port, ordered.
2394 *
2395 * @returns 32-bit integer.
2396 * @param Port I/O port to read from.
2397 */
2398#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2399DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
2400#else
2401DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
2402{
2403 uint32_t u32;
2404# if RT_INLINE_ASM_GNU_STYLE
2405 __asm__ __volatile__("inl %w1, %0\n\t"
2406 : "=a" (u32)
2407 : "Nd" (Port));
2408
2409# elif RT_INLINE_ASM_USES_INTRIN
2410 u32 = __indword(Port);
2411
2412# else
2413 __asm
2414 {
2415 mov dx, [Port]
2416 in eax, dx
2417 mov [u32], eax
2418 }
2419# endif
2420 return u32;
2421}
2422#endif
2423
2424
2425/**
2426 * Writes a string of 8-bit unsigned integer items to an I/O port, ordered.
2427 *
2428 * @param Port I/O port to write to.
2429 * @param pau8 Pointer to the string buffer.
2430 * @param c The number of items to write.
2431 */
2432#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2433DECLASM(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c);
2434#else
2435DECLINLINE(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c)
2436{
2437# if RT_INLINE_ASM_GNU_STYLE
2438 __asm__ __volatile__("rep; outsb\n\t"
2439 : "+S" (pau8),
2440 "+c" (c)
2441 : "d" (Port));
2442
2443# elif RT_INLINE_ASM_USES_INTRIN
2444 __outbytestring(Port, (unsigned char *)pau8, (unsigned long)c);
2445
2446# else
2447 __asm
2448 {
2449 mov dx, [Port]
2450 mov ecx, [c]
2451 mov eax, [pau8]
2452 xchg esi, eax
2453 rep outsb
2454 xchg esi, eax
2455 }
2456# endif
2457}
2458#endif
2459
2460
2461/**
2462 * Reads a string of 8-bit unsigned integer items from an I/O port, ordered.
2463 *
2464 * @param Port I/O port to read from.
2465 * @param pau8 Pointer to the string buffer (output).
2466 * @param c The number of items to read.
2467 */
2468#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2469DECLASM(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c);
2470#else
2471DECLINLINE(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c)
2472{
2473# if RT_INLINE_ASM_GNU_STYLE
2474 __asm__ __volatile__("rep; insb\n\t"
2475 : "+D" (pau8),
2476 "+c" (c)
2477 : "d" (Port));
2478
2479# elif RT_INLINE_ASM_USES_INTRIN
2480 __inbytestring(Port, pau8, (unsigned long)c);
2481
2482# else
2483 __asm
2484 {
2485 mov dx, [Port]
2486 mov ecx, [c]
2487 mov eax, [pau8]
2488 xchg edi, eax
2489 rep insb
2490 xchg edi, eax
2491 }
2492# endif
2493}
2494#endif
2495
2496
2497/**
2498 * Writes a string of 16-bit unsigned integer items to an I/O port, ordered.
2499 *
2500 * @param Port I/O port to write to.
2501 * @param pau16 Pointer to the string buffer.
2502 * @param c The number of items to write.
2503 */
2504#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2505DECLASM(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c);
2506#else
2507DECLINLINE(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c)
2508{
2509# if RT_INLINE_ASM_GNU_STYLE
2510 __asm__ __volatile__("rep; outsw\n\t"
2511 : "+S" (pau16),
2512 "+c" (c)
2513 : "d" (Port));
2514
2515# elif RT_INLINE_ASM_USES_INTRIN
2516 __outwordstring(Port, (unsigned short *)pau16, (unsigned long)c);
2517
2518# else
2519 __asm
2520 {
2521 mov dx, [Port]
2522 mov ecx, [c]
2523 mov eax, [pau16]
2524 xchg esi, eax
2525 rep outsw
2526 xchg esi, eax
2527 }
2528# endif
2529}
2530#endif
2531
2532
2533/**
2534 * Reads a string of 16-bit unsigned integer items from an I/O port, ordered.
2535 *
2536 * @param Port I/O port to read from.
2537 * @param pau16 Pointer to the string buffer (output).
2538 * @param c The number of items to read.
2539 */
2540#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2541DECLASM(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c);
2542#else
2543DECLINLINE(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c)
2544{
2545# if RT_INLINE_ASM_GNU_STYLE
2546 __asm__ __volatile__("rep; insw\n\t"
2547 : "+D" (pau16),
2548 "+c" (c)
2549 : "d" (Port));
2550
2551# elif RT_INLINE_ASM_USES_INTRIN
2552 __inwordstring(Port, pau16, (unsigned long)c);
2553
2554# else
2555 __asm
2556 {
2557 mov dx, [Port]
2558 mov ecx, [c]
2559 mov eax, [pau16]
2560 xchg edi, eax
2561 rep insw
2562 xchg edi, eax
2563 }
2564# endif
2565}
2566#endif
2567
2568
2569/**
2570 * Writes a string of 32-bit unsigned integer items to an I/O port, ordered.
2571 *
2572 * @param Port I/O port to write to.
2573 * @param pau32 Pointer to the string buffer.
2574 * @param c The number of items to write.
2575 */
2576#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2577DECLASM(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c);
2578#else
2579DECLINLINE(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c)
2580{
2581# if RT_INLINE_ASM_GNU_STYLE
2582 __asm__ __volatile__("rep; outsl\n\t"
2583 : "+S" (pau32),
2584 "+c" (c)
2585 : "d" (Port));
2586
2587# elif RT_INLINE_ASM_USES_INTRIN
2588 __outdwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2589
2590# else
2591 __asm
2592 {
2593 mov dx, [Port]
2594 mov ecx, [c]
2595 mov eax, [pau32]
2596 xchg esi, eax
2597 rep outsd
2598 xchg esi, eax
2599 }
2600# endif
2601}
2602#endif
2603
2604
2605/**
2606 * Reads a string of 32-bit unsigned integer items from an I/O port, ordered.
2607 *
2608 * @param Port I/O port to read from.
2609 * @param pau32 Pointer to the string buffer (output).
2610 * @param c The number of items to read.
2611 */
2612#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2613DECLASM(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c);
2614#else
2615DECLINLINE(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c)
2616{
2617# if RT_INLINE_ASM_GNU_STYLE
2618 __asm__ __volatile__("rep; insl\n\t"
2619 : "+D" (pau32),
2620 "+c" (c)
2621 : "d" (Port));
2622
2623# elif RT_INLINE_ASM_USES_INTRIN
2624 __indwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2625
2626# else
2627 __asm
2628 {
2629 mov dx, [Port]
2630 mov ecx, [c]
2631 mov eax, [pau32]
2632 xchg edi, eax
2633 rep insd
2634 xchg edi, eax
2635 }
2636# endif
2637}
2638#endif
2639
2640
2641/**
2642 * Atomically Exchange an unsigned 8-bit value, ordered.
2643 *
2644 * @returns Current *pu8 value
2645 * @param pu8 Pointer to the 8-bit variable to update.
2646 * @param u8 The 8-bit value to assign to *pu8.
2647 */
2648#if RT_INLINE_ASM_EXTERNAL
2649DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
2650#else
2651DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
2652{
2653# if RT_INLINE_ASM_GNU_STYLE
2654 __asm__ __volatile__("xchgb %0, %1\n\t"
2655 : "=m" (*pu8),
2656 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
2657 : "1" (u8),
2658 "m" (*pu8));
2659# else
2660 __asm
2661 {
2662# ifdef RT_ARCH_AMD64
2663 mov rdx, [pu8]
2664 mov al, [u8]
2665 xchg [rdx], al
2666 mov [u8], al
2667# else
2668 mov edx, [pu8]
2669 mov al, [u8]
2670 xchg [edx], al
2671 mov [u8], al
2672# endif
2673 }
2674# endif
2675 return u8;
2676}
2677#endif
2678
2679
2680/**
2681 * Atomically Exchange a signed 8-bit value, ordered.
2682 *
2683 * @returns Current *pu8 value
2684 * @param pi8 Pointer to the 8-bit variable to update.
2685 * @param i8 The 8-bit value to assign to *pi8.
2686 */
2687DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
2688{
2689 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
2690}
2691
2692
2693/**
2694 * Atomically Exchange a bool value, ordered.
2695 *
2696 * @returns Current *pf value
2697 * @param pf Pointer to the 8-bit variable to update.
2698 * @param f The 8-bit value to assign to *pi8.
2699 */
2700DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
2701{
2702#ifdef _MSC_VER
2703 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2704#else
2705 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2706#endif
2707}
2708
2709
2710/**
2711 * Atomically Exchange an unsigned 16-bit value, ordered.
2712 *
2713 * @returns Current *pu16 value
2714 * @param pu16 Pointer to the 16-bit variable to update.
2715 * @param u16 The 16-bit value to assign to *pu16.
2716 */
2717#if RT_INLINE_ASM_EXTERNAL
2718DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
2719#else
2720DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
2721{
2722# if RT_INLINE_ASM_GNU_STYLE
2723 __asm__ __volatile__("xchgw %0, %1\n\t"
2724 : "=m" (*pu16),
2725 "=r" (u16)
2726 : "1" (u16),
2727 "m" (*pu16));
2728# else
2729 __asm
2730 {
2731# ifdef RT_ARCH_AMD64
2732 mov rdx, [pu16]
2733 mov ax, [u16]
2734 xchg [rdx], ax
2735 mov [u16], ax
2736# else
2737 mov edx, [pu16]
2738 mov ax, [u16]
2739 xchg [edx], ax
2740 mov [u16], ax
2741# endif
2742 }
2743# endif
2744 return u16;
2745}
2746#endif
2747
2748
2749/**
2750 * Atomically Exchange a signed 16-bit value, ordered.
2751 *
2752 * @returns Current *pu16 value
2753 * @param pi16 Pointer to the 16-bit variable to update.
2754 * @param i16 The 16-bit value to assign to *pi16.
2755 */
2756DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
2757{
2758 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
2759}
2760
2761
2762/**
2763 * Atomically Exchange an unsigned 32-bit value, ordered.
2764 *
2765 * @returns Current *pu32 value
2766 * @param pu32 Pointer to the 32-bit variable to update.
2767 * @param u32 The 32-bit value to assign to *pu32.
2768 */
2769#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2770DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
2771#else
2772DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
2773{
2774# if RT_INLINE_ASM_GNU_STYLE
2775 __asm__ __volatile__("xchgl %0, %1\n\t"
2776 : "=m" (*pu32),
2777 "=r" (u32)
2778 : "1" (u32),
2779 "m" (*pu32));
2780
2781# elif RT_INLINE_ASM_USES_INTRIN
2782 u32 = _InterlockedExchange((long *)pu32, u32);
2783
2784# else
2785 __asm
2786 {
2787# ifdef RT_ARCH_AMD64
2788 mov rdx, [pu32]
2789 mov eax, u32
2790 xchg [rdx], eax
2791 mov [u32], eax
2792# else
2793 mov edx, [pu32]
2794 mov eax, u32
2795 xchg [edx], eax
2796 mov [u32], eax
2797# endif
2798 }
2799# endif
2800 return u32;
2801}
2802#endif
2803
2804
2805/**
2806 * Atomically Exchange a signed 32-bit value, ordered.
2807 *
2808 * @returns Current *pu32 value
2809 * @param pi32 Pointer to the 32-bit variable to update.
2810 * @param i32 The 32-bit value to assign to *pi32.
2811 */
2812DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
2813{
2814 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
2815}
2816
2817
2818/**
2819 * Atomically Exchange an unsigned 64-bit value, ordered.
2820 *
2821 * @returns Current *pu64 value
2822 * @param pu64 Pointer to the 64-bit variable to update.
2823 * @param u64 The 64-bit value to assign to *pu64.
2824 */
2825#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
2826 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
2827DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2828#else
2829DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2830{
2831# if defined(RT_ARCH_AMD64)
2832# if RT_INLINE_ASM_USES_INTRIN
2833 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2834
2835# elif RT_INLINE_ASM_GNU_STYLE
2836 __asm__ __volatile__("xchgq %0, %1\n\t"
2837 : "=m" (*pu64),
2838 "=r" (u64)
2839 : "1" (u64),
2840 "m" (*pu64));
2841# else
2842 __asm
2843 {
2844 mov rdx, [pu64]
2845 mov rax, [u64]
2846 xchg [rdx], rax
2847 mov [u64], rax
2848 }
2849# endif
2850# else /* !RT_ARCH_AMD64 */
2851# if RT_INLINE_ASM_GNU_STYLE
2852# if defined(PIC) || defined(__PIC__)
2853 uint32_t u32EBX = (uint32_t)u64;
2854 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2855 "xchgl %%ebx, %3\n\t"
2856 "1:\n\t"
2857 "lock; cmpxchg8b (%5)\n\t"
2858 "jnz 1b\n\t"
2859 "movl %3, %%ebx\n\t"
2860 /*"xchgl %%esi, %5\n\t"*/
2861 : "=A" (u64),
2862 "=m" (*pu64)
2863 : "0" (*pu64),
2864 "m" ( u32EBX ),
2865 "c" ( (uint32_t)(u64 >> 32) ),
2866 "S" (pu64));
2867# else /* !PIC */
2868 __asm__ __volatile__("1:\n\t"
2869 "lock; cmpxchg8b %1\n\t"
2870 "jnz 1b\n\t"
2871 : "=A" (u64),
2872 "=m" (*pu64)
2873 : "0" (*pu64),
2874 "b" ( (uint32_t)u64 ),
2875 "c" ( (uint32_t)(u64 >> 32) ));
2876# endif
2877# else
2878 __asm
2879 {
2880 mov ebx, dword ptr [u64]
2881 mov ecx, dword ptr [u64 + 4]
2882 mov edi, pu64
2883 mov eax, dword ptr [edi]
2884 mov edx, dword ptr [edi + 4]
2885 retry:
2886 lock cmpxchg8b [edi]
2887 jnz retry
2888 mov dword ptr [u64], eax
2889 mov dword ptr [u64 + 4], edx
2890 }
2891# endif
2892# endif /* !RT_ARCH_AMD64 */
2893 return u64;
2894}
2895#endif
2896
2897
2898/**
2899 * Atomically Exchange an signed 64-bit value, ordered.
2900 *
2901 * @returns Current *pi64 value
2902 * @param pi64 Pointer to the 64-bit variable to update.
2903 * @param i64 The 64-bit value to assign to *pi64.
2904 */
2905DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2906{
2907 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2908}
2909
2910
2911/**
2912 * Atomically Exchange a pointer value, ordered.
2913 *
2914 * @returns Current *ppv value
2915 * @param ppv Pointer to the pointer variable to update.
2916 * @param pv The pointer value to assign to *ppv.
2917 */
2918DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
2919{
2920#if ARCH_BITS == 32
2921 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2922#elif ARCH_BITS == 64
2923 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2924#else
2925# error "ARCH_BITS is bogus"
2926#endif
2927}
2928
2929
2930/**
2931 * Atomically Exchange a raw-mode context pointer value, ordered.
2932 *
2933 * @returns Current *ppv value
2934 * @param ppvRC Pointer to the pointer variable to update.
2935 * @param pvRC The pointer value to assign to *ppv.
2936 */
2937DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
2938{
2939 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
2940}
2941
2942
2943/**
2944 * Atomically Exchange a ring-0 pointer value, ordered.
2945 *
2946 * @returns Current *ppv value
2947 * @param ppvR0 Pointer to the pointer variable to update.
2948 * @param pvR0 The pointer value to assign to *ppv.
2949 */
2950DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
2951{
2952#if R0_ARCH_BITS == 32
2953 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
2954#elif R0_ARCH_BITS == 64
2955 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
2956#else
2957# error "R0_ARCH_BITS is bogus"
2958#endif
2959}
2960
2961
2962/**
2963 * Atomically Exchange a ring-3 pointer value, ordered.
2964 *
2965 * @returns Current *ppv value
2966 * @param ppvR3 Pointer to the pointer variable to update.
2967 * @param pvR3 The pointer value to assign to *ppv.
2968 */
2969DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
2970{
2971#if R3_ARCH_BITS == 32
2972 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
2973#elif R3_ARCH_BITS == 64
2974 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
2975#else
2976# error "R3_ARCH_BITS is bogus"
2977#endif
2978}
2979
2980
2981/** @def ASMAtomicXchgHandle
2982 * Atomically Exchange a typical IPRT handle value, ordered.
2983 *
2984 * @param ph Pointer to the value to update.
2985 * @param hNew The new value to assigned to *pu.
2986 * @param phRes Where to store the current *ph value.
2987 *
2988 * @remarks This doesn't currently work for all handles (like RTFILE).
2989 */
2990#if HC_ARCH_BITS == 32
2991# define ASMAtomicXchgHandle(ph, hNew, phRes) \
2992 do { \
2993 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2994 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2995 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2996 } while (0)
2997#elif HC_ARCH_BITS == 64
2998# define ASMAtomicXchgHandle(ph, hNew, phRes) \
2999 do { \
3000 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3001 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
3002 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
3003 } while (0)
3004#else
3005# error HC_ARCH_BITS
3006#endif
3007
3008
3009/**
3010 * Atomically Exchange a value which size might differ
3011 * between platforms or compilers, ordered.
3012 *
3013 * @param pu Pointer to the variable to update.
3014 * @param uNew The value to assign to *pu.
3015 * @todo This is busted as its missing the result argument.
3016 */
3017#define ASMAtomicXchgSize(pu, uNew) \
3018 do { \
3019 switch (sizeof(*(pu))) { \
3020 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
3021 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3022 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3023 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3024 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3025 } \
3026 } while (0)
3027
3028/**
3029 * Atomically Exchange a value which size might differ
3030 * between platforms or compilers, ordered.
3031 *
3032 * @param pu Pointer to the variable to update.
3033 * @param uNew The value to assign to *pu.
3034 * @param puRes Where to store the current *pu value.
3035 */
3036#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
3037 do { \
3038 switch (sizeof(*(pu))) { \
3039 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
3040 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3041 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3042 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3043 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3044 } \
3045 } while (0)
3046
3047
3048
3049/**
3050 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
3051 *
3052 * @returns true if xchg was done.
3053 * @returns false if xchg wasn't done.
3054 *
3055 * @param pu8 Pointer to the value to update.
3056 * @param u8New The new value to assigned to *pu8.
3057 * @param u8Old The old value to *pu8 compare with.
3058 */
3059#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
3060DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, const uint8_t u8Old);
3061#else
3062DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, uint8_t u8Old)
3063{
3064 uint8_t u8Ret;
3065 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
3066 "setz %1\n\t"
3067 : "=m" (*pu8),
3068 "=qm" (u8Ret),
3069 "=a" (u8Old)
3070 : "q" (u8New),
3071 "2" (u8Old),
3072 "m" (*pu8));
3073 return (bool)u8Ret;
3074}
3075#endif
3076
3077
3078/**
3079 * Atomically Compare and Exchange a signed 8-bit value, ordered.
3080 *
3081 * @returns true if xchg was done.
3082 * @returns false if xchg wasn't done.
3083 *
3084 * @param pi8 Pointer to the value to update.
3085 * @param i8New The new value to assigned to *pi8.
3086 * @param i8Old The old value to *pi8 compare with.
3087 */
3088DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t *pi8, const int8_t i8New, const int8_t i8Old)
3089{
3090 return ASMAtomicCmpXchgU8((volatile uint8_t *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old);
3091}
3092
3093
3094/**
3095 * Atomically Compare and Exchange a bool value, ordered.
3096 *
3097 * @returns true if xchg was done.
3098 * @returns false if xchg wasn't done.
3099 *
3100 * @param pf Pointer to the value to update.
3101 * @param fNew The new value to assigned to *pf.
3102 * @param fOld The old value to *pf compare with.
3103 */
3104DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool *pf, const bool fNew, const bool fOld)
3105{
3106 return ASMAtomicCmpXchgU8((volatile uint8_t *)pf, (const uint8_t)fNew, (const uint8_t)fOld);
3107}
3108
3109
3110/**
3111 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
3112 *
3113 * @returns true if xchg was done.
3114 * @returns false if xchg wasn't done.
3115 *
3116 * @param pu32 Pointer to the value to update.
3117 * @param u32New The new value to assigned to *pu32.
3118 * @param u32Old The old value to *pu32 compare with.
3119 */
3120#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3121DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
3122#else
3123DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
3124{
3125# if RT_INLINE_ASM_GNU_STYLE
3126 uint8_t u8Ret;
3127 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3128 "setz %1\n\t"
3129 : "=m" (*pu32),
3130 "=qm" (u8Ret),
3131 "=a" (u32Old)
3132 : "r" (u32New),
3133 "2" (u32Old),
3134 "m" (*pu32));
3135 return (bool)u8Ret;
3136
3137# elif RT_INLINE_ASM_USES_INTRIN
3138 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
3139
3140# else
3141 uint32_t u32Ret;
3142 __asm
3143 {
3144# ifdef RT_ARCH_AMD64
3145 mov rdx, [pu32]
3146# else
3147 mov edx, [pu32]
3148# endif
3149 mov eax, [u32Old]
3150 mov ecx, [u32New]
3151# ifdef RT_ARCH_AMD64
3152 lock cmpxchg [rdx], ecx
3153# else
3154 lock cmpxchg [edx], ecx
3155# endif
3156 setz al
3157 movzx eax, al
3158 mov [u32Ret], eax
3159 }
3160 return !!u32Ret;
3161# endif
3162}
3163#endif
3164
3165
3166/**
3167 * Atomically Compare and Exchange a signed 32-bit value, ordered.
3168 *
3169 * @returns true if xchg was done.
3170 * @returns false if xchg wasn't done.
3171 *
3172 * @param pi32 Pointer to the value to update.
3173 * @param i32New The new value to assigned to *pi32.
3174 * @param i32Old The old value to *pi32 compare with.
3175 */
3176DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
3177{
3178 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
3179}
3180
3181
3182/**
3183 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
3184 *
3185 * @returns true if xchg was done.
3186 * @returns false if xchg wasn't done.
3187 *
3188 * @param pu64 Pointer to the 64-bit variable to update.
3189 * @param u64New The 64-bit value to assign to *pu64.
3190 * @param u64Old The value to compare with.
3191 */
3192#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
3193 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
3194DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
3195#else
3196DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, uint64_t u64New, uint64_t u64Old)
3197{
3198# if RT_INLINE_ASM_USES_INTRIN
3199 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
3200
3201# elif defined(RT_ARCH_AMD64)
3202# if RT_INLINE_ASM_GNU_STYLE
3203 uint8_t u8Ret;
3204 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3205 "setz %1\n\t"
3206 : "=m" (*pu64),
3207 "=qm" (u8Ret),
3208 "=a" (u64Old)
3209 : "r" (u64New),
3210 "2" (u64Old),
3211 "m" (*pu64));
3212 return (bool)u8Ret;
3213# else
3214 bool fRet;
3215 __asm
3216 {
3217 mov rdx, [pu32]
3218 mov rax, [u64Old]
3219 mov rcx, [u64New]
3220 lock cmpxchg [rdx], rcx
3221 setz al
3222 mov [fRet], al
3223 }
3224 return fRet;
3225# endif
3226# else /* !RT_ARCH_AMD64 */
3227 uint32_t u32Ret;
3228# if RT_INLINE_ASM_GNU_STYLE
3229# if defined(PIC) || defined(__PIC__)
3230 uint32_t u32EBX = (uint32_t)u64New;
3231 uint32_t u32Spill;
3232 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
3233 "lock; cmpxchg8b (%6)\n\t"
3234 "setz %%al\n\t"
3235 "movl %4, %%ebx\n\t"
3236 "movzbl %%al, %%eax\n\t"
3237 : "=a" (u32Ret),
3238 "=d" (u32Spill),
3239# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
3240 "+m" (*pu64)
3241# else
3242 "=m" (*pu64)
3243# endif
3244 : "A" (u64Old),
3245 "m" ( u32EBX ),
3246 "c" ( (uint32_t)(u64New >> 32) ),
3247 "S" (pu64));
3248# else /* !PIC */
3249 uint32_t u32Spill;
3250 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
3251 "setz %%al\n\t"
3252 "movzbl %%al, %%eax\n\t"
3253 : "=a" (u32Ret),
3254 "=d" (u32Spill),
3255 "+m" (*pu64)
3256 : "A" (u64Old),
3257 "b" ( (uint32_t)u64New ),
3258 "c" ( (uint32_t)(u64New >> 32) ));
3259# endif
3260 return (bool)u32Ret;
3261# else
3262 __asm
3263 {
3264 mov ebx, dword ptr [u64New]
3265 mov ecx, dword ptr [u64New + 4]
3266 mov edi, [pu64]
3267 mov eax, dword ptr [u64Old]
3268 mov edx, dword ptr [u64Old + 4]
3269 lock cmpxchg8b [edi]
3270 setz al
3271 movzx eax, al
3272 mov dword ptr [u32Ret], eax
3273 }
3274 return !!u32Ret;
3275# endif
3276# endif /* !RT_ARCH_AMD64 */
3277}
3278#endif
3279
3280
3281/**
3282 * Atomically Compare and exchange a signed 64-bit value, ordered.
3283 *
3284 * @returns true if xchg was done.
3285 * @returns false if xchg wasn't done.
3286 *
3287 * @param pi64 Pointer to the 64-bit variable to update.
3288 * @param i64 The 64-bit value to assign to *pu64.
3289 * @param i64Old The value to compare with.
3290 */
3291DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
3292{
3293 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
3294}
3295
3296
3297/**
3298 * Atomically Compare and Exchange a pointer value, ordered.
3299 *
3300 * @returns true if xchg was done.
3301 * @returns false if xchg wasn't done.
3302 *
3303 * @param ppv Pointer to the value to update.
3304 * @param pvNew The new value to assigned to *ppv.
3305 * @param pvOld The old value to *ppv compare with.
3306 */
3307DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, const void *pvNew, const void *pvOld)
3308{
3309#if ARCH_BITS == 32
3310 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
3311#elif ARCH_BITS == 64
3312 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
3313#else
3314# error "ARCH_BITS is bogus"
3315#endif
3316}
3317
3318
3319/** @def ASMAtomicCmpXchgHandle
3320 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3321 *
3322 * @param ph Pointer to the value to update.
3323 * @param hNew The new value to assigned to *pu.
3324 * @param hOld The old value to *pu compare with.
3325 * @param fRc Where to store the result.
3326 *
3327 * @remarks This doesn't currently work for all handles (like RTFILE).
3328 */
3329#if HC_ARCH_BITS == 32
3330# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
3331 do { \
3332 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3333 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
3334 } while (0)
3335#elif HC_ARCH_BITS == 64
3336# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
3337 do { \
3338 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3339 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
3340 } while (0)
3341#else
3342# error HC_ARCH_BITS
3343#endif
3344
3345
3346/** @def ASMAtomicCmpXchgSize
3347 * Atomically Compare and Exchange a value which size might differ
3348 * between platforms or compilers, ordered.
3349 *
3350 * @param pu Pointer to the value to update.
3351 * @param uNew The new value to assigned to *pu.
3352 * @param uOld The old value to *pu compare with.
3353 * @param fRc Where to store the result.
3354 */
3355#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
3356 do { \
3357 switch (sizeof(*(pu))) { \
3358 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
3359 break; \
3360 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
3361 break; \
3362 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3363 (fRc) = false; \
3364 break; \
3365 } \
3366 } while (0)
3367
3368
3369/**
3370 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
3371 * passes back old value, ordered.
3372 *
3373 * @returns true if xchg was done.
3374 * @returns false if xchg wasn't done.
3375 *
3376 * @param pu32 Pointer to the value to update.
3377 * @param u32New The new value to assigned to *pu32.
3378 * @param u32Old The old value to *pu32 compare with.
3379 * @param pu32Old Pointer store the old value at.
3380 */
3381#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3382DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
3383#else
3384DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
3385{
3386# if RT_INLINE_ASM_GNU_STYLE
3387 uint8_t u8Ret;
3388 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3389 "setz %1\n\t"
3390 : "=m" (*pu32),
3391 "=qm" (u8Ret),
3392 "=a" (*pu32Old)
3393 : "r" (u32New),
3394 "a" (u32Old),
3395 "m" (*pu32));
3396 return (bool)u8Ret;
3397
3398# elif RT_INLINE_ASM_USES_INTRIN
3399 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
3400
3401# else
3402 uint32_t u32Ret;
3403 __asm
3404 {
3405# ifdef RT_ARCH_AMD64
3406 mov rdx, [pu32]
3407# else
3408 mov edx, [pu32]
3409# endif
3410 mov eax, [u32Old]
3411 mov ecx, [u32New]
3412# ifdef RT_ARCH_AMD64
3413 lock cmpxchg [rdx], ecx
3414 mov rdx, [pu32Old]
3415 mov [rdx], eax
3416# else
3417 lock cmpxchg [edx], ecx
3418 mov edx, [pu32Old]
3419 mov [edx], eax
3420# endif
3421 setz al
3422 movzx eax, al
3423 mov [u32Ret], eax
3424 }
3425 return !!u32Ret;
3426# endif
3427}
3428#endif
3429
3430
3431/**
3432 * Atomically Compare and Exchange a signed 32-bit value, additionally
3433 * passes back old value, ordered.
3434 *
3435 * @returns true if xchg was done.
3436 * @returns false if xchg wasn't done.
3437 *
3438 * @param pi32 Pointer to the value to update.
3439 * @param i32New The new value to assigned to *pi32.
3440 * @param i32Old The old value to *pi32 compare with.
3441 * @param pi32Old Pointer store the old value at.
3442 */
3443DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
3444{
3445 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
3446}
3447
3448
3449/**
3450 * Atomically Compare and exchange an unsigned 64-bit value, additionally
3451 * passing back old value, ordered.
3452 *
3453 * @returns true if xchg was done.
3454 * @returns false if xchg wasn't done.
3455 *
3456 * @param pu64 Pointer to the 64-bit variable to update.
3457 * @param u64New The 64-bit value to assign to *pu64.
3458 * @param u64Old The value to compare with.
3459 * @param pu64Old Pointer store the old value at.
3460 */
3461#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
3462 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
3463DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
3464#else
3465DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
3466{
3467# if RT_INLINE_ASM_USES_INTRIN
3468 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
3469
3470# elif defined(RT_ARCH_AMD64)
3471# if RT_INLINE_ASM_GNU_STYLE
3472 uint8_t u8Ret;
3473 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3474 "setz %1\n\t"
3475 : "=m" (*pu64),
3476 "=qm" (u8Ret),
3477 "=a" (*pu64Old)
3478 : "r" (u64New),
3479 "a" (u64Old),
3480 "m" (*pu64));
3481 return (bool)u8Ret;
3482# else
3483 bool fRet;
3484 __asm
3485 {
3486 mov rdx, [pu32]
3487 mov rax, [u64Old]
3488 mov rcx, [u64New]
3489 lock cmpxchg [rdx], rcx
3490 mov rdx, [pu64Old]
3491 mov [rdx], rax
3492 setz al
3493 mov [fRet], al
3494 }
3495 return fRet;
3496# endif
3497# else /* !RT_ARCH_AMD64 */
3498# if RT_INLINE_ASM_GNU_STYLE
3499 uint64_t u64Ret;
3500# if defined(PIC) || defined(__PIC__)
3501 /* NB: this code uses a memory clobber description, because the clean
3502 * solution with an output value for *pu64 makes gcc run out of registers.
3503 * This will cause suboptimal code, and anyone with a better solution is
3504 * welcome to improve this. */
3505 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
3506 "lock; cmpxchg8b %3\n\t"
3507 "xchgl %%ebx, %1\n\t"
3508 : "=A" (u64Ret)
3509 : "DS" ((uint32_t)u64New),
3510 "c" ((uint32_t)(u64New >> 32)),
3511 "m" (*pu64),
3512 "0" (u64Old)
3513 : "memory" );
3514# else /* !PIC */
3515 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
3516 : "=A" (u64Ret),
3517 "=m" (*pu64)
3518 : "b" ((uint32_t)u64New),
3519 "c" ((uint32_t)(u64New >> 32)),
3520 "m" (*pu64),
3521 "0" (u64Old));
3522# endif
3523 *pu64Old = u64Ret;
3524 return u64Ret == u64Old;
3525# else
3526 uint32_t u32Ret;
3527 __asm
3528 {
3529 mov ebx, dword ptr [u64New]
3530 mov ecx, dword ptr [u64New + 4]
3531 mov edi, [pu64]
3532 mov eax, dword ptr [u64Old]
3533 mov edx, dword ptr [u64Old + 4]
3534 lock cmpxchg8b [edi]
3535 mov ebx, [pu64Old]
3536 mov [ebx], eax
3537 setz al
3538 movzx eax, al
3539 add ebx, 4
3540 mov [ebx], edx
3541 mov dword ptr [u32Ret], eax
3542 }
3543 return !!u32Ret;
3544# endif
3545# endif /* !RT_ARCH_AMD64 */
3546}
3547#endif
3548
3549
3550/**
3551 * Atomically Compare and exchange a signed 64-bit value, additionally
3552 * passing back old value, ordered.
3553 *
3554 * @returns true if xchg was done.
3555 * @returns false if xchg wasn't done.
3556 *
3557 * @param pi64 Pointer to the 64-bit variable to update.
3558 * @param i64 The 64-bit value to assign to *pu64.
3559 * @param i64Old The value to compare with.
3560 * @param pi64Old Pointer store the old value at.
3561 */
3562DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
3563{
3564 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
3565}
3566
3567/** @def ASMAtomicCmpXchgExHandle
3568 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3569 *
3570 * @param ph Pointer to the value to update.
3571 * @param hNew The new value to assigned to *pu.
3572 * @param hOld The old value to *pu compare with.
3573 * @param fRc Where to store the result.
3574 * @param phOldVal Pointer to where to store the old value.
3575 *
3576 * @remarks This doesn't currently work for all handles (like RTFILE).
3577 */
3578#if HC_ARCH_BITS == 32
3579# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3580 do { \
3581 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
3582 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
3583 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
3584 } while (0)
3585#elif HC_ARCH_BITS == 64
3586# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3587 do { \
3588 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3589 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
3590 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
3591 } while (0)
3592#else
3593# error HC_ARCH_BITS
3594#endif
3595
3596
3597/** @def ASMAtomicCmpXchgExSize
3598 * Atomically Compare and Exchange a value which size might differ
3599 * between platforms or compilers. Additionally passes back old value.
3600 *
3601 * @param pu Pointer to the value to update.
3602 * @param uNew The new value to assigned to *pu.
3603 * @param uOld The old value to *pu compare with.
3604 * @param fRc Where to store the result.
3605 * @param puOldVal Pointer to where to store the old value.
3606 */
3607#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
3608 do { \
3609 switch (sizeof(*(pu))) { \
3610 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
3611 break; \
3612 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
3613 break; \
3614 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3615 (fRc) = false; \
3616 (uOldVal) = 0; \
3617 break; \
3618 } \
3619 } while (0)
3620
3621
3622/**
3623 * Atomically Compare and Exchange a pointer value, additionally
3624 * passing back old value, ordered.
3625 *
3626 * @returns true if xchg was done.
3627 * @returns false if xchg wasn't done.
3628 *
3629 * @param ppv Pointer to the value to update.
3630 * @param pvNew The new value to assigned to *ppv.
3631 * @param pvOld The old value to *ppv compare with.
3632 * @param ppvOld Pointer store the old value at.
3633 */
3634DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
3635{
3636#if ARCH_BITS == 32
3637 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
3638#elif ARCH_BITS == 64
3639 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
3640#else
3641# error "ARCH_BITS is bogus"
3642#endif
3643}
3644
3645
3646/**
3647 * Atomically exchanges and adds to a 32-bit value, ordered.
3648 *
3649 * @returns The old value.
3650 * @param pu32 Pointer to the value.
3651 * @param u32 Number to add.
3652 */
3653#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3654DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
3655#else
3656DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
3657{
3658# if RT_INLINE_ASM_USES_INTRIN
3659 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
3660 return u32;
3661
3662# elif RT_INLINE_ASM_GNU_STYLE
3663 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3664 : "=r" (u32),
3665 "=m" (*pu32)
3666 : "0" (u32),
3667 "m" (*pu32)
3668 : "memory");
3669 return u32;
3670# else
3671 __asm
3672 {
3673 mov eax, [u32]
3674# ifdef RT_ARCH_AMD64
3675 mov rdx, [pu32]
3676 lock xadd [rdx], eax
3677# else
3678 mov edx, [pu32]
3679 lock xadd [edx], eax
3680# endif
3681 mov [u32], eax
3682 }
3683 return u32;
3684# endif
3685}
3686#endif
3687
3688
3689/**
3690 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3691 *
3692 * @returns The old value.
3693 * @param pi32 Pointer to the value.
3694 * @param i32 Number to add.
3695 */
3696DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
3697{
3698 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
3699}
3700
3701
3702/**
3703 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
3704 *
3705 * @returns The old value.
3706 * @param pi32 Pointer to the value.
3707 * @param i32 Number to subtract.
3708 */
3709DECLINLINE(uint32_t) ASMAtomicSubU32(int32_t volatile *pi32, uint32_t i32)
3710{
3711 return ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-(int32_t)i32);
3712}
3713
3714
3715/**
3716 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
3717 *
3718 * @returns The old value.
3719 * @param pi32 Pointer to the value.
3720 * @param i32 Number to subtract.
3721 */
3722DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
3723{
3724 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
3725}
3726
3727
3728/**
3729 * Atomically increment a 32-bit value, ordered.
3730 *
3731 * @returns The new value.
3732 * @param pu32 Pointer to the value to increment.
3733 */
3734#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3735DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
3736#else
3737DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
3738{
3739 uint32_t u32;
3740# if RT_INLINE_ASM_USES_INTRIN
3741 u32 = _InterlockedIncrement((long *)pu32);
3742 return u32;
3743
3744# elif RT_INLINE_ASM_GNU_STYLE
3745 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3746 : "=r" (u32),
3747 "=m" (*pu32)
3748 : "0" (1),
3749 "m" (*pu32)
3750 : "memory");
3751 return u32+1;
3752# else
3753 __asm
3754 {
3755 mov eax, 1
3756# ifdef RT_ARCH_AMD64
3757 mov rdx, [pu32]
3758 lock xadd [rdx], eax
3759# else
3760 mov edx, [pu32]
3761 lock xadd [edx], eax
3762# endif
3763 mov u32, eax
3764 }
3765 return u32+1;
3766# endif
3767}
3768#endif
3769
3770
3771/**
3772 * Atomically increment a signed 32-bit value, ordered.
3773 *
3774 * @returns The new value.
3775 * @param pi32 Pointer to the value to increment.
3776 */
3777DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
3778{
3779 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
3780}
3781
3782
3783/**
3784 * Atomically decrement an unsigned 32-bit value, ordered.
3785 *
3786 * @returns The new value.
3787 * @param pu32 Pointer to the value to decrement.
3788 */
3789#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3790DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3791#else
3792DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3793{
3794 uint32_t u32;
3795# if RT_INLINE_ASM_USES_INTRIN
3796 u32 = _InterlockedDecrement((long *)pu32);
3797 return u32;
3798
3799# elif RT_INLINE_ASM_GNU_STYLE
3800 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3801 : "=r" (u32),
3802 "=m" (*pu32)
3803 : "0" (-1),
3804 "m" (*pu32)
3805 : "memory");
3806 return u32-1;
3807# else
3808 __asm
3809 {
3810 mov eax, -1
3811# ifdef RT_ARCH_AMD64
3812 mov rdx, [pu32]
3813 lock xadd [rdx], eax
3814# else
3815 mov edx, [pu32]
3816 lock xadd [edx], eax
3817# endif
3818 mov u32, eax
3819 }
3820 return u32-1;
3821# endif
3822}
3823#endif
3824
3825
3826/**
3827 * Atomically decrement a signed 32-bit value, ordered.
3828 *
3829 * @returns The new value.
3830 * @param pi32 Pointer to the value to decrement.
3831 */
3832DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3833{
3834 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3835}
3836
3837
3838/**
3839 * Atomically Or an unsigned 32-bit value, ordered.
3840 *
3841 * @param pu32 Pointer to the pointer variable to OR u32 with.
3842 * @param u32 The value to OR *pu32 with.
3843 */
3844#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3845DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3846#else
3847DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3848{
3849# if RT_INLINE_ASM_USES_INTRIN
3850 _InterlockedOr((long volatile *)pu32, (long)u32);
3851
3852# elif RT_INLINE_ASM_GNU_STYLE
3853 __asm__ __volatile__("lock; orl %1, %0\n\t"
3854 : "=m" (*pu32)
3855 : "ir" (u32),
3856 "m" (*pu32));
3857# else
3858 __asm
3859 {
3860 mov eax, [u32]
3861# ifdef RT_ARCH_AMD64
3862 mov rdx, [pu32]
3863 lock or [rdx], eax
3864# else
3865 mov edx, [pu32]
3866 lock or [edx], eax
3867# endif
3868 }
3869# endif
3870}
3871#endif
3872
3873
3874/**
3875 * Atomically Or a signed 32-bit value, ordered.
3876 *
3877 * @param pi32 Pointer to the pointer variable to OR u32 with.
3878 * @param i32 The value to OR *pu32 with.
3879 */
3880DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3881{
3882 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3883}
3884
3885
3886/**
3887 * Atomically And an unsigned 32-bit value, ordered.
3888 *
3889 * @param pu32 Pointer to the pointer variable to AND u32 with.
3890 * @param u32 The value to AND *pu32 with.
3891 */
3892#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3893DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3894#else
3895DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3896{
3897# if RT_INLINE_ASM_USES_INTRIN
3898 _InterlockedAnd((long volatile *)pu32, u32);
3899
3900# elif RT_INLINE_ASM_GNU_STYLE
3901 __asm__ __volatile__("lock; andl %1, %0\n\t"
3902 : "=m" (*pu32)
3903 : "ir" (u32),
3904 "m" (*pu32));
3905# else
3906 __asm
3907 {
3908 mov eax, [u32]
3909# ifdef RT_ARCH_AMD64
3910 mov rdx, [pu32]
3911 lock and [rdx], eax
3912# else
3913 mov edx, [pu32]
3914 lock and [edx], eax
3915# endif
3916 }
3917# endif
3918}
3919#endif
3920
3921
3922/**
3923 * Atomically And a signed 32-bit value, ordered.
3924 *
3925 * @param pi32 Pointer to the pointer variable to AND i32 with.
3926 * @param i32 The value to AND *pi32 with.
3927 */
3928DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3929{
3930 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3931}
3932
3933
3934/**
3935 * Serialize Instruction.
3936 */
3937#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3938DECLASM(void) ASMSerializeInstruction(void);
3939#else
3940DECLINLINE(void) ASMSerializeInstruction(void)
3941{
3942# if RT_INLINE_ASM_GNU_STYLE
3943 RTCCUINTREG xAX = 0;
3944# ifdef RT_ARCH_AMD64
3945 __asm__ ("cpuid"
3946 : "=a" (xAX)
3947 : "0" (xAX)
3948 : "rbx", "rcx", "rdx");
3949# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
3950 __asm__ ("push %%ebx\n\t"
3951 "cpuid\n\t"
3952 "pop %%ebx\n\t"
3953 : "=a" (xAX)
3954 : "0" (xAX)
3955 : "ecx", "edx");
3956# else
3957 __asm__ ("cpuid"
3958 : "=a" (xAX)
3959 : "0" (xAX)
3960 : "ebx", "ecx", "edx");
3961# endif
3962
3963# elif RT_INLINE_ASM_USES_INTRIN
3964 int aInfo[4];
3965 __cpuid(aInfo, 0);
3966
3967# else
3968 __asm
3969 {
3970 push ebx
3971 xor eax, eax
3972 cpuid
3973 pop ebx
3974 }
3975# endif
3976}
3977#endif
3978
3979
3980/**
3981 * Memory load/store fence, waits for any pending writes and reads to complete.
3982 * Requires the X86_CPUID_FEATURE_EDX_SSE2 CPUID bit set.
3983 */
3984DECLINLINE(void) ASMMemoryFenceSSE2(void)
3985{
3986#if RT_INLINE_ASM_GNU_STYLE
3987 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
3988#elif RT_INLINE_ASM_USES_INTRIN
3989 _mm_mfence();
3990#else
3991 __asm
3992 {
3993 _emit 0x0f
3994 _emit 0xae
3995 _emit 0xf0
3996 }
3997#endif
3998}
3999
4000
4001/**
4002 * Memory store fence, waits for any writes to complete.
4003 * Requires the X86_CPUID_FEATURE_EDX_SSE CPUID bit set.
4004 */
4005DECLINLINE(void) ASMWriteFenceSSE(void)
4006{
4007#if RT_INLINE_ASM_GNU_STYLE
4008 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
4009#elif RT_INLINE_ASM_USES_INTRIN
4010 _mm_sfence();
4011#else
4012 __asm
4013 {
4014 _emit 0x0f
4015 _emit 0xae
4016 _emit 0xf8
4017 }
4018#endif
4019}
4020
4021
4022/**
4023 * Memory load fence, waits for any pending reads to complete.
4024 * Requires the X86_CPUID_FEATURE_EDX_SSE2 CPUID bit set.
4025 */
4026DECLINLINE(void) ASMReadFenceSSE2(void)
4027{
4028#if RT_INLINE_ASM_GNU_STYLE
4029 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
4030#elif RT_INLINE_ASM_USES_INTRIN
4031 _mm_lfence();
4032#else
4033 __asm
4034 {
4035 _emit 0x0f
4036 _emit 0xae
4037 _emit 0xe8
4038 }
4039#endif
4040}
4041
4042
4043/**
4044 * Memory fence, waits for any pending writes and reads to complete.
4045 */
4046DECLINLINE(void) ASMMemoryFence(void)
4047{
4048 /** @todo use mfence? check if all cpus we care for support it. */
4049 uint32_t volatile u32;
4050 ASMAtomicXchgU32(&u32, 0);
4051}
4052
4053
4054/**
4055 * Write fence, waits for any pending writes to complete.
4056 */
4057DECLINLINE(void) ASMWriteFence(void)
4058{
4059 /** @todo use sfence? check if all cpus we care for support it. */
4060 ASMMemoryFence();
4061}
4062
4063
4064/**
4065 * Read fence, waits for any pending reads to complete.
4066 */
4067DECLINLINE(void) ASMReadFence(void)
4068{
4069 /** @todo use lfence? check if all cpus we care for support it. */
4070 ASMMemoryFence();
4071}
4072
4073
4074/**
4075 * Atomically reads an unsigned 8-bit value, ordered.
4076 *
4077 * @returns Current *pu8 value
4078 * @param pu8 Pointer to the 8-bit variable to read.
4079 */
4080DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
4081{
4082 ASMMemoryFence();
4083 return *pu8; /* byte reads are atomic on x86 */
4084}
4085
4086
4087/**
4088 * Atomically reads an unsigned 8-bit value, unordered.
4089 *
4090 * @returns Current *pu8 value
4091 * @param pu8 Pointer to the 8-bit variable to read.
4092 */
4093DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
4094{
4095 return *pu8; /* byte reads are atomic on x86 */
4096}
4097
4098
4099/**
4100 * Atomically reads a signed 8-bit value, ordered.
4101 *
4102 * @returns Current *pi8 value
4103 * @param pi8 Pointer to the 8-bit variable to read.
4104 */
4105DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
4106{
4107 ASMMemoryFence();
4108 return *pi8; /* byte reads are atomic on x86 */
4109}
4110
4111
4112/**
4113 * Atomically reads a signed 8-bit value, unordered.
4114 *
4115 * @returns Current *pi8 value
4116 * @param pi8 Pointer to the 8-bit variable to read.
4117 */
4118DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
4119{
4120 return *pi8; /* byte reads are atomic on x86 */
4121}
4122
4123
4124/**
4125 * Atomically reads an unsigned 16-bit value, ordered.
4126 *
4127 * @returns Current *pu16 value
4128 * @param pu16 Pointer to the 16-bit variable to read.
4129 */
4130DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
4131{
4132 ASMMemoryFence();
4133 Assert(!((uintptr_t)pu16 & 1));
4134 return *pu16;
4135}
4136
4137
4138/**
4139 * Atomically reads an unsigned 16-bit value, unordered.
4140 *
4141 * @returns Current *pu16 value
4142 * @param pu16 Pointer to the 16-bit variable to read.
4143 */
4144DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
4145{
4146 Assert(!((uintptr_t)pu16 & 1));
4147 return *pu16;
4148}
4149
4150
4151/**
4152 * Atomically reads a signed 16-bit value, ordered.
4153 *
4154 * @returns Current *pi16 value
4155 * @param pi16 Pointer to the 16-bit variable to read.
4156 */
4157DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
4158{
4159 ASMMemoryFence();
4160 Assert(!((uintptr_t)pi16 & 1));
4161 return *pi16;
4162}
4163
4164
4165/**
4166 * Atomically reads a signed 16-bit value, unordered.
4167 *
4168 * @returns Current *pi16 value
4169 * @param pi16 Pointer to the 16-bit variable to read.
4170 */
4171DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
4172{
4173 Assert(!((uintptr_t)pi16 & 1));
4174 return *pi16;
4175}
4176
4177
4178/**
4179 * Atomically reads an unsigned 32-bit value, ordered.
4180 *
4181 * @returns Current *pu32 value
4182 * @param pu32 Pointer to the 32-bit variable to read.
4183 */
4184DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
4185{
4186 ASMMemoryFence();
4187 Assert(!((uintptr_t)pu32 & 3));
4188 return *pu32;
4189}
4190
4191
4192/**
4193 * Atomically reads an unsigned 32-bit value, unordered.
4194 *
4195 * @returns Current *pu32 value
4196 * @param pu32 Pointer to the 32-bit variable to read.
4197 */
4198DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
4199{
4200 Assert(!((uintptr_t)pu32 & 3));
4201 return *pu32;
4202}
4203
4204
4205/**
4206 * Atomically reads a signed 32-bit value, ordered.
4207 *
4208 * @returns Current *pi32 value
4209 * @param pi32 Pointer to the 32-bit variable to read.
4210 */
4211DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
4212{
4213 ASMMemoryFence();
4214 Assert(!((uintptr_t)pi32 & 3));
4215 return *pi32;
4216}
4217
4218
4219/**
4220 * Atomically reads a signed 32-bit value, unordered.
4221 *
4222 * @returns Current *pi32 value
4223 * @param pi32 Pointer to the 32-bit variable to read.
4224 */
4225DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
4226{
4227 Assert(!((uintptr_t)pi32 & 3));
4228 return *pi32;
4229}
4230
4231
4232/**
4233 * Atomically reads an unsigned 64-bit value, ordered.
4234 *
4235 * @returns Current *pu64 value
4236 * @param pu64 Pointer to the 64-bit variable to read.
4237 * The memory pointed to must be writable.
4238 * @remark This will fault if the memory is read-only!
4239 */
4240#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
4241 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
4242DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
4243#else
4244DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
4245{
4246 uint64_t u64;
4247# ifdef RT_ARCH_AMD64
4248 Assert(!((uintptr_t)pu64 & 7));
4249/*# if RT_INLINE_ASM_GNU_STYLE
4250 __asm__ __volatile__( "mfence\n\t"
4251 "movq %1, %0\n\t"
4252 : "=r" (u64)
4253 : "m" (*pu64));
4254# else
4255 __asm
4256 {
4257 mfence
4258 mov rdx, [pu64]
4259 mov rax, [rdx]
4260 mov [u64], rax
4261 }
4262# endif*/
4263 ASMMemoryFence();
4264 u64 = *pu64;
4265# else /* !RT_ARCH_AMD64 */
4266# if RT_INLINE_ASM_GNU_STYLE
4267# if defined(PIC) || defined(__PIC__)
4268 uint32_t u32EBX = 0;
4269 Assert(!((uintptr_t)pu64 & 7));
4270 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
4271 "lock; cmpxchg8b (%5)\n\t"
4272 "movl %3, %%ebx\n\t"
4273 : "=A" (u64),
4274# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4275 "+m" (*pu64)
4276# else
4277 "=m" (*pu64)
4278# endif
4279 : "0" (0),
4280 "m" (u32EBX),
4281 "c" (0),
4282 "S" (pu64));
4283# else /* !PIC */
4284 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
4285 : "=A" (u64),
4286 "+m" (*pu64)
4287 : "0" (0),
4288 "b" (0),
4289 "c" (0));
4290# endif
4291# else
4292 Assert(!((uintptr_t)pu64 & 7));
4293 __asm
4294 {
4295 xor eax, eax
4296 xor edx, edx
4297 mov edi, pu64
4298 xor ecx, ecx
4299 xor ebx, ebx
4300 lock cmpxchg8b [edi]
4301 mov dword ptr [u64], eax
4302 mov dword ptr [u64 + 4], edx
4303 }
4304# endif
4305# endif /* !RT_ARCH_AMD64 */
4306 return u64;
4307}
4308#endif
4309
4310
4311/**
4312 * Atomically reads an unsigned 64-bit value, unordered.
4313 *
4314 * @returns Current *pu64 value
4315 * @param pu64 Pointer to the 64-bit variable to read.
4316 * The memory pointed to must be writable.
4317 * @remark This will fault if the memory is read-only!
4318 */
4319#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
4320 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
4321DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
4322#else
4323DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
4324{
4325 uint64_t u64;
4326# ifdef RT_ARCH_AMD64
4327 Assert(!((uintptr_t)pu64 & 7));
4328/*# if RT_INLINE_ASM_GNU_STYLE
4329 Assert(!((uintptr_t)pu64 & 7));
4330 __asm__ __volatile__("movq %1, %0\n\t"
4331 : "=r" (u64)
4332 : "m" (*pu64));
4333# else
4334 __asm
4335 {
4336 mov rdx, [pu64]
4337 mov rax, [rdx]
4338 mov [u64], rax
4339 }
4340# endif */
4341 u64 = *pu64;
4342# else /* !RT_ARCH_AMD64 */
4343# if RT_INLINE_ASM_GNU_STYLE
4344# if defined(PIC) || defined(__PIC__)
4345 uint32_t u32EBX = 0;
4346 uint32_t u32Spill;
4347 Assert(!((uintptr_t)pu64 & 7));
4348 __asm__ __volatile__("xor %%eax,%%eax\n\t"
4349 "xor %%ecx,%%ecx\n\t"
4350 "xor %%edx,%%edx\n\t"
4351 "xchgl %%ebx, %3\n\t"
4352 "lock; cmpxchg8b (%4)\n\t"
4353 "movl %3, %%ebx\n\t"
4354 : "=A" (u64),
4355# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4356 "+m" (*pu64),
4357# else
4358 "=m" (*pu64),
4359# endif
4360 "=c" (u32Spill)
4361 : "m" (u32EBX),
4362 "S" (pu64));
4363# else /* !PIC */
4364 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
4365 : "=A" (u64),
4366 "+m" (*pu64)
4367 : "0" (0),
4368 "b" (0),
4369 "c" (0));
4370# endif
4371# else
4372 Assert(!((uintptr_t)pu64 & 7));
4373 __asm
4374 {
4375 xor eax, eax
4376 xor edx, edx
4377 mov edi, pu64
4378 xor ecx, ecx
4379 xor ebx, ebx
4380 lock cmpxchg8b [edi]
4381 mov dword ptr [u64], eax
4382 mov dword ptr [u64 + 4], edx
4383 }
4384# endif
4385# endif /* !RT_ARCH_AMD64 */
4386 return u64;
4387}
4388#endif
4389
4390
4391/**
4392 * Atomically reads a signed 64-bit value, ordered.
4393 *
4394 * @returns Current *pi64 value
4395 * @param pi64 Pointer to the 64-bit variable to read.
4396 * The memory pointed to must be writable.
4397 * @remark This will fault if the memory is read-only!
4398 */
4399DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
4400{
4401 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
4402}
4403
4404
4405/**
4406 * Atomically reads a signed 64-bit value, unordered.
4407 *
4408 * @returns Current *pi64 value
4409 * @param pi64 Pointer to the 64-bit variable to read.
4410 * The memory pointed to must be writable.
4411 * @remark This will fault if the memory is read-only!
4412 */
4413DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
4414{
4415 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
4416}
4417
4418
4419/**
4420 * Atomically reads a pointer value, ordered.
4421 *
4422 * @returns Current *pv value
4423 * @param ppv Pointer to the pointer variable to read.
4424 */
4425DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
4426{
4427#if ARCH_BITS == 32
4428 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
4429#elif ARCH_BITS == 64
4430 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
4431#else
4432# error "ARCH_BITS is bogus"
4433#endif
4434}
4435
4436
4437/**
4438 * Atomically reads a pointer value, unordered.
4439 *
4440 * @returns Current *pv value
4441 * @param ppv Pointer to the pointer variable to read.
4442 */
4443DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
4444{
4445#if ARCH_BITS == 32
4446 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
4447#elif ARCH_BITS == 64
4448 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
4449#else
4450# error "ARCH_BITS is bogus"
4451#endif
4452}
4453
4454
4455/**
4456 * Atomically reads a boolean value, ordered.
4457 *
4458 * @returns Current *pf value
4459 * @param pf Pointer to the boolean variable to read.
4460 */
4461DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
4462{
4463 ASMMemoryFence();
4464 return *pf; /* byte reads are atomic on x86 */
4465}
4466
4467
4468/**
4469 * Atomically reads a boolean value, unordered.
4470 *
4471 * @returns Current *pf value
4472 * @param pf Pointer to the boolean variable to read.
4473 */
4474DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
4475{
4476 return *pf; /* byte reads are atomic on x86 */
4477}
4478
4479
4480/**
4481 * Atomically read a typical IPRT handle value, ordered.
4482 *
4483 * @param ph Pointer to the handle variable to read.
4484 * @param phRes Where to store the result.
4485 *
4486 * @remarks This doesn't currently work for all handles (like RTFILE).
4487 */
4488#if HC_ARCH_BITS == 32
4489# define ASMAtomicReadHandle(ph, phRes) \
4490 do { \
4491 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4492 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
4493 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
4494 } while (0)
4495#elif HC_ARCH_BITS == 64
4496# define ASMAtomicReadHandle(ph, phRes) \
4497 do { \
4498 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4499 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
4500 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
4501 } while (0)
4502#else
4503# error HC_ARCH_BITS
4504#endif
4505
4506
4507/**
4508 * Atomically read a typical IPRT handle value, unordered.
4509 *
4510 * @param ph Pointer to the handle variable to read.
4511 * @param phRes Where to store the result.
4512 *
4513 * @remarks This doesn't currently work for all handles (like RTFILE).
4514 */
4515#if HC_ARCH_BITS == 32
4516# define ASMAtomicUoReadHandle(ph, phRes) \
4517 do { \
4518 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4519 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
4520 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
4521 } while (0)
4522#elif HC_ARCH_BITS == 64
4523# define ASMAtomicUoReadHandle(ph, phRes) \
4524 do { \
4525 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4526 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
4527 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
4528 } while (0)
4529#else
4530# error HC_ARCH_BITS
4531#endif
4532
4533
4534/**
4535 * Atomically read a value which size might differ
4536 * between platforms or compilers, ordered.
4537 *
4538 * @param pu Pointer to the variable to update.
4539 * @param puRes Where to store the result.
4540 */
4541#define ASMAtomicReadSize(pu, puRes) \
4542 do { \
4543 switch (sizeof(*(pu))) { \
4544 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4545 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
4546 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
4547 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
4548 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4549 } \
4550 } while (0)
4551
4552
4553/**
4554 * Atomically read a value which size might differ
4555 * between platforms or compilers, unordered.
4556 *
4557 * @param pu Pointer to the variable to read.
4558 * @param puRes Where to store the result.
4559 */
4560#define ASMAtomicUoReadSize(pu, puRes) \
4561 do { \
4562 switch (sizeof(*(pu))) { \
4563 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4564 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
4565 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
4566 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
4567 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4568 } \
4569 } while (0)
4570
4571
4572/**
4573 * Atomically writes an unsigned 8-bit value, ordered.
4574 *
4575 * @param pu8 Pointer to the 8-bit variable.
4576 * @param u8 The 8-bit value to assign to *pu8.
4577 */
4578DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
4579{
4580 ASMAtomicXchgU8(pu8, u8);
4581}
4582
4583
4584/**
4585 * Atomically writes an unsigned 8-bit value, unordered.
4586 *
4587 * @param pu8 Pointer to the 8-bit variable.
4588 * @param u8 The 8-bit value to assign to *pu8.
4589 */
4590DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
4591{
4592 *pu8 = u8; /* byte writes are atomic on x86 */
4593}
4594
4595
4596/**
4597 * Atomically writes a signed 8-bit value, ordered.
4598 *
4599 * @param pi8 Pointer to the 8-bit variable to read.
4600 * @param i8 The 8-bit value to assign to *pi8.
4601 */
4602DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
4603{
4604 ASMAtomicXchgS8(pi8, i8);
4605}
4606
4607
4608/**
4609 * Atomically writes a signed 8-bit value, unordered.
4610 *
4611 * @param pi8 Pointer to the 8-bit variable to read.
4612 * @param i8 The 8-bit value to assign to *pi8.
4613 */
4614DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
4615{
4616 *pi8 = i8; /* byte writes are atomic on x86 */
4617}
4618
4619
4620/**
4621 * Atomically writes an unsigned 16-bit value, ordered.
4622 *
4623 * @param pu16 Pointer to the 16-bit variable.
4624 * @param u16 The 16-bit value to assign to *pu16.
4625 */
4626DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
4627{
4628 ASMAtomicXchgU16(pu16, u16);
4629}
4630
4631
4632/**
4633 * Atomically writes an unsigned 16-bit value, unordered.
4634 *
4635 * @param pu16 Pointer to the 16-bit variable.
4636 * @param u16 The 16-bit value to assign to *pu16.
4637 */
4638DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
4639{
4640 Assert(!((uintptr_t)pu16 & 1));
4641 *pu16 = u16;
4642}
4643
4644
4645/**
4646 * Atomically writes a signed 16-bit value, ordered.
4647 *
4648 * @param pi16 Pointer to the 16-bit variable to read.
4649 * @param i16 The 16-bit value to assign to *pi16.
4650 */
4651DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
4652{
4653 ASMAtomicXchgS16(pi16, i16);
4654}
4655
4656
4657/**
4658 * Atomically writes a signed 16-bit value, unordered.
4659 *
4660 * @param pi16 Pointer to the 16-bit variable to read.
4661 * @param i16 The 16-bit value to assign to *pi16.
4662 */
4663DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
4664{
4665 Assert(!((uintptr_t)pi16 & 1));
4666 *pi16 = i16;
4667}
4668
4669
4670/**
4671 * Atomically writes an unsigned 32-bit value, ordered.
4672 *
4673 * @param pu32 Pointer to the 32-bit variable.
4674 * @param u32 The 32-bit value to assign to *pu32.
4675 */
4676DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
4677{
4678 ASMAtomicXchgU32(pu32, u32);
4679}
4680
4681
4682/**
4683 * Atomically writes an unsigned 32-bit value, unordered.
4684 *
4685 * @param pu32 Pointer to the 32-bit variable.
4686 * @param u32 The 32-bit value to assign to *pu32.
4687 */
4688DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
4689{
4690 Assert(!((uintptr_t)pu32 & 3));
4691 *pu32 = u32;
4692}
4693
4694
4695/**
4696 * Atomically writes a signed 32-bit value, ordered.
4697 *
4698 * @param pi32 Pointer to the 32-bit variable to read.
4699 * @param i32 The 32-bit value to assign to *pi32.
4700 */
4701DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
4702{
4703 ASMAtomicXchgS32(pi32, i32);
4704}
4705
4706
4707/**
4708 * Atomically writes a signed 32-bit value, unordered.
4709 *
4710 * @param pi32 Pointer to the 32-bit variable to read.
4711 * @param i32 The 32-bit value to assign to *pi32.
4712 */
4713DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
4714{
4715 Assert(!((uintptr_t)pi32 & 3));
4716 *pi32 = i32;
4717}
4718
4719
4720/**
4721 * Atomically writes an unsigned 64-bit value, ordered.
4722 *
4723 * @param pu64 Pointer to the 64-bit variable.
4724 * @param u64 The 64-bit value to assign to *pu64.
4725 */
4726DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
4727{
4728 ASMAtomicXchgU64(pu64, u64);
4729}
4730
4731
4732/**
4733 * Atomically writes an unsigned 64-bit value, unordered.
4734 *
4735 * @param pu64 Pointer to the 64-bit variable.
4736 * @param u64 The 64-bit value to assign to *pu64.
4737 */
4738DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
4739{
4740 Assert(!((uintptr_t)pu64 & 7));
4741#if ARCH_BITS == 64
4742 *pu64 = u64;
4743#else
4744 ASMAtomicXchgU64(pu64, u64);
4745#endif
4746}
4747
4748
4749/**
4750 * Atomically writes a signed 64-bit value, ordered.
4751 *
4752 * @param pi64 Pointer to the 64-bit variable.
4753 * @param i64 The 64-bit value to assign to *pi64.
4754 */
4755DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
4756{
4757 ASMAtomicXchgS64(pi64, i64);
4758}
4759
4760
4761/**
4762 * Atomically writes a signed 64-bit value, unordered.
4763 *
4764 * @param pi64 Pointer to the 64-bit variable.
4765 * @param i64 The 64-bit value to assign to *pi64.
4766 */
4767DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
4768{
4769 Assert(!((uintptr_t)pi64 & 7));
4770#if ARCH_BITS == 64
4771 *pi64 = i64;
4772#else
4773 ASMAtomicXchgS64(pi64, i64);
4774#endif
4775}
4776
4777
4778/**
4779 * Atomically writes a boolean value, unordered.
4780 *
4781 * @param pf Pointer to the boolean variable.
4782 * @param f The boolean value to assign to *pf.
4783 */
4784DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
4785{
4786 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
4787}
4788
4789
4790/**
4791 * Atomically writes a boolean value, unordered.
4792 *
4793 * @param pf Pointer to the boolean variable.
4794 * @param f The boolean value to assign to *pf.
4795 */
4796DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
4797{
4798 *pf = f; /* byte writes are atomic on x86 */
4799}
4800
4801
4802/**
4803 * Atomically writes a pointer value, ordered.
4804 *
4805 * @returns Current *pv value
4806 * @param ppv Pointer to the pointer variable.
4807 * @param pv The pointer value to assigne to *ppv.
4808 */
4809DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, const void *pv)
4810{
4811#if ARCH_BITS == 32
4812 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4813#elif ARCH_BITS == 64
4814 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4815#else
4816# error "ARCH_BITS is bogus"
4817#endif
4818}
4819
4820
4821/**
4822 * Atomically writes a pointer value, unordered.
4823 *
4824 * @returns Current *pv value
4825 * @param ppv Pointer to the pointer variable.
4826 * @param pv The pointer value to assigne to *ppv.
4827 */
4828DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, const void *pv)
4829{
4830#if ARCH_BITS == 32
4831 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4832#elif ARCH_BITS == 64
4833 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4834#else
4835# error "ARCH_BITS is bogus"
4836#endif
4837}
4838
4839
4840/**
4841 * Atomically write a typical IPRT handle value, ordered.
4842 *
4843 * @param ph Pointer to the variable to update.
4844 * @param hNew The value to assign to *ph.
4845 *
4846 * @remarks This doesn't currently work for all handles (like RTFILE).
4847 */
4848#if HC_ARCH_BITS == 32
4849# define ASMAtomicWriteHandle(ph, hNew) \
4850 do { \
4851 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4852 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
4853 } while (0)
4854#elif HC_ARCH_BITS == 64
4855# define ASMAtomicWriteHandle(ph, hNew) \
4856 do { \
4857 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4858 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
4859 } while (0)
4860#else
4861# error HC_ARCH_BITS
4862#endif
4863
4864
4865/**
4866 * Atomically write a typical IPRT handle value, unordered.
4867 *
4868 * @param ph Pointer to the variable to update.
4869 * @param hNew The value to assign to *ph.
4870 *
4871 * @remarks This doesn't currently work for all handles (like RTFILE).
4872 */
4873#if HC_ARCH_BITS == 32
4874# define ASMAtomicUoWriteHandle(ph, hNew) \
4875 do { \
4876 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4877 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
4878 } while (0)
4879#elif HC_ARCH_BITS == 64
4880# define ASMAtomicUoWriteHandle(ph, hNew) \
4881 do { \
4882 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4883 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
4884 } while (0)
4885#else
4886# error HC_ARCH_BITS
4887#endif
4888
4889
4890/**
4891 * Atomically write a value which size might differ
4892 * between platforms or compilers, ordered.
4893 *
4894 * @param pu Pointer to the variable to update.
4895 * @param uNew The value to assign to *pu.
4896 */
4897#define ASMAtomicWriteSize(pu, uNew) \
4898 do { \
4899 switch (sizeof(*(pu))) { \
4900 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4901 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4902 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4903 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4904 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4905 } \
4906 } while (0)
4907
4908/**
4909 * Atomically write a value which size might differ
4910 * between platforms or compilers, unordered.
4911 *
4912 * @param pu Pointer to the variable to update.
4913 * @param uNew The value to assign to *pu.
4914 */
4915#define ASMAtomicUoWriteSize(pu, uNew) \
4916 do { \
4917 switch (sizeof(*(pu))) { \
4918 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4919 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4920 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4921 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4922 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4923 } \
4924 } while (0)
4925
4926
4927
4928
4929/**
4930 * Invalidate page.
4931 *
4932 * @param pv Address of the page to invalidate.
4933 */
4934#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4935DECLASM(void) ASMInvalidatePage(void *pv);
4936#else
4937DECLINLINE(void) ASMInvalidatePage(void *pv)
4938{
4939# if RT_INLINE_ASM_USES_INTRIN
4940 __invlpg(pv);
4941
4942# elif RT_INLINE_ASM_GNU_STYLE
4943 __asm__ __volatile__("invlpg %0\n\t"
4944 : : "m" (*(uint8_t *)pv));
4945# else
4946 __asm
4947 {
4948# ifdef RT_ARCH_AMD64
4949 mov rax, [pv]
4950 invlpg [rax]
4951# else
4952 mov eax, [pv]
4953 invlpg [eax]
4954# endif
4955 }
4956# endif
4957}
4958#endif
4959
4960
4961/**
4962 * Write back the internal caches and invalidate them.
4963 */
4964#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4965DECLASM(void) ASMWriteBackAndInvalidateCaches(void);
4966#else
4967DECLINLINE(void) ASMWriteBackAndInvalidateCaches(void)
4968{
4969# if RT_INLINE_ASM_USES_INTRIN
4970 __wbinvd();
4971
4972# elif RT_INLINE_ASM_GNU_STYLE
4973 __asm__ __volatile__("wbinvd");
4974# else
4975 __asm
4976 {
4977 wbinvd
4978 }
4979# endif
4980}
4981#endif
4982
4983
4984/**
4985 * Invalidate internal and (perhaps) external caches without first
4986 * flushing dirty cache lines. Use with extreme care.
4987 */
4988#if RT_INLINE_ASM_EXTERNAL
4989DECLASM(void) ASMInvalidateInternalCaches(void);
4990#else
4991DECLINLINE(void) ASMInvalidateInternalCaches(void)
4992{
4993# if RT_INLINE_ASM_GNU_STYLE
4994 __asm__ __volatile__("invd");
4995# else
4996 __asm
4997 {
4998 invd
4999 }
5000# endif
5001}
5002#endif
5003
5004
5005#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
5006# if PAGE_SIZE != 0x1000
5007# error "PAGE_SIZE is not 0x1000!"
5008# endif
5009#endif
5010
5011/**
5012 * Zeros a 4K memory page.
5013 *
5014 * @param pv Pointer to the memory block. This must be page aligned.
5015 */
5016#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5017DECLASM(void) ASMMemZeroPage(volatile void *pv);
5018# else
5019DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
5020{
5021# if RT_INLINE_ASM_USES_INTRIN
5022# ifdef RT_ARCH_AMD64
5023 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
5024# else
5025 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
5026# endif
5027
5028# elif RT_INLINE_ASM_GNU_STYLE
5029 RTCCUINTREG uDummy;
5030# ifdef RT_ARCH_AMD64
5031 __asm__ __volatile__("rep stosq"
5032 : "=D" (pv),
5033 "=c" (uDummy)
5034 : "0" (pv),
5035 "c" (0x1000 >> 3),
5036 "a" (0)
5037 : "memory");
5038# else
5039 __asm__ __volatile__("rep stosl"
5040 : "=D" (pv),
5041 "=c" (uDummy)
5042 : "0" (pv),
5043 "c" (0x1000 >> 2),
5044 "a" (0)
5045 : "memory");
5046# endif
5047# else
5048 __asm
5049 {
5050# ifdef RT_ARCH_AMD64
5051 xor rax, rax
5052 mov ecx, 0200h
5053 mov rdi, [pv]
5054 rep stosq
5055# else
5056 xor eax, eax
5057 mov ecx, 0400h
5058 mov edi, [pv]
5059 rep stosd
5060# endif
5061 }
5062# endif
5063}
5064# endif
5065
5066
5067/**
5068 * Zeros a memory block with a 32-bit aligned size.
5069 *
5070 * @param pv Pointer to the memory block.
5071 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5072 */
5073#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5074DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
5075#else
5076DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
5077{
5078# if RT_INLINE_ASM_USES_INTRIN
5079# ifdef RT_ARCH_AMD64
5080 if (!(cb & 7))
5081 __stosq((unsigned __int64 *)pv, 0, cb / 8);
5082 else
5083# endif
5084 __stosd((unsigned long *)pv, 0, cb / 4);
5085
5086# elif RT_INLINE_ASM_GNU_STYLE
5087 __asm__ __volatile__("rep stosl"
5088 : "=D" (pv),
5089 "=c" (cb)
5090 : "0" (pv),
5091 "1" (cb >> 2),
5092 "a" (0)
5093 : "memory");
5094# else
5095 __asm
5096 {
5097 xor eax, eax
5098# ifdef RT_ARCH_AMD64
5099 mov rcx, [cb]
5100 shr rcx, 2
5101 mov rdi, [pv]
5102# else
5103 mov ecx, [cb]
5104 shr ecx, 2
5105 mov edi, [pv]
5106# endif
5107 rep stosd
5108 }
5109# endif
5110}
5111#endif
5112
5113
5114/**
5115 * Fills a memory block with a 32-bit aligned size.
5116 *
5117 * @param pv Pointer to the memory block.
5118 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5119 * @param u32 The value to fill with.
5120 */
5121#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5122DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
5123#else
5124DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
5125{
5126# if RT_INLINE_ASM_USES_INTRIN
5127# ifdef RT_ARCH_AMD64
5128 if (!(cb & 7))
5129 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
5130 else
5131# endif
5132 __stosd((unsigned long *)pv, u32, cb / 4);
5133
5134# elif RT_INLINE_ASM_GNU_STYLE
5135 __asm__ __volatile__("rep stosl"
5136 : "=D" (pv),
5137 "=c" (cb)
5138 : "0" (pv),
5139 "1" (cb >> 2),
5140 "a" (u32)
5141 : "memory");
5142# else
5143 __asm
5144 {
5145# ifdef RT_ARCH_AMD64
5146 mov rcx, [cb]
5147 shr rcx, 2
5148 mov rdi, [pv]
5149# else
5150 mov ecx, [cb]
5151 shr ecx, 2
5152 mov edi, [pv]
5153# endif
5154 mov eax, [u32]
5155 rep stosd
5156 }
5157# endif
5158}
5159#endif
5160
5161
5162/**
5163 * Checks if a memory page is all zeros.
5164 *
5165 * @returns true / false.
5166 *
5167 * @param pvPage Pointer to the page. Must be aligned on 16 byte
5168 * boundrary
5169 */
5170DECLINLINE(bool) ASMMemIsZeroPage(void const *pvPage)
5171{
5172# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
5173 union { RTCCUINTREG r; bool f; } uAX;
5174 RTCCUINTREG xCX, xDI;
5175 Assert(!((uintptr_t)pvPage & 15));
5176 __asm__ __volatile__("repe; "
5177# ifdef RT_ARCH_AMD64
5178 "scasq\n\t"
5179# else
5180 "scasl\n\t"
5181# endif
5182 "setnc %%al\n\t"
5183 : "=&c" (xCX),
5184 "=&D" (xDI),
5185 "=&a" (uAX.r)
5186 : "mr" (pvPage),
5187# ifdef RT_ARCH_AMD64
5188 "0" (0x1000/8),
5189# else
5190 "0" (0x1000/4),
5191# endif
5192 "1" (pvPage),
5193 "2" (0));
5194 return uAX.f;
5195# else
5196 uintptr_t const *puPtr = (uintptr_t const *)pvPage;
5197 int cLeft = 0x1000 / sizeof(uintptr_t) / 8;
5198 Assert(!((uintptr_t)pvPage & 15));
5199 for (;;)
5200 {
5201 if (puPtr[0]) return false;
5202 if (puPtr[4]) return false;
5203
5204 if (puPtr[2]) return false;
5205 if (puPtr[6]) return false;
5206
5207 if (puPtr[1]) return false;
5208 if (puPtr[5]) return false;
5209
5210 if (puPtr[3]) return false;
5211 if (puPtr[7]) return false;
5212
5213 if (!--cLeft)
5214 return true;
5215 puPtr += 8;
5216 }
5217 return true;
5218# endif
5219}
5220
5221
5222/**
5223 * Checks if a memory block is filled with the specified byte.
5224 *
5225 * This is a sort of inverted memchr.
5226 *
5227 * @returns Pointer to the byte which doesn't equal u8.
5228 * @returns NULL if all equal to u8.
5229 *
5230 * @param pv Pointer to the memory block.
5231 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5232 * @param u8 The value it's supposed to be filled with.
5233 *
5234 * @todo Fix name, it is a predicate function but it's not returning boolean!
5235 */
5236#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5237DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
5238#else
5239DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
5240{
5241/** @todo rewrite this in inline assembly? */
5242 uint8_t const *pb = (uint8_t const *)pv;
5243 for (; cb; cb--, pb++)
5244 if (RT_UNLIKELY(*pb != u8))
5245 return (void *)pb;
5246 return NULL;
5247}
5248#endif
5249
5250
5251/**
5252 * Checks if a memory block is filled with the specified 32-bit value.
5253 *
5254 * This is a sort of inverted memchr.
5255 *
5256 * @returns Pointer to the first value which doesn't equal u32.
5257 * @returns NULL if all equal to u32.
5258 *
5259 * @param pv Pointer to the memory block.
5260 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5261 * @param u32 The value it's supposed to be filled with.
5262 *
5263 * @todo Fix name, it is a predicate function but it's not returning boolean!
5264 */
5265#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5266DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
5267#else
5268DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
5269{
5270/** @todo rewrite this in inline assembly? */
5271 uint32_t const *pu32 = (uint32_t const *)pv;
5272 for (; cb; cb -= 4, pu32++)
5273 if (RT_UNLIKELY(*pu32 != u32))
5274 return (uint32_t *)pu32;
5275 return NULL;
5276}
5277#endif
5278
5279
5280/**
5281 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
5282 *
5283 * @returns u32F1 * u32F2.
5284 */
5285#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5286DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
5287#else
5288DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
5289{
5290# ifdef RT_ARCH_AMD64
5291 return (uint64_t)u32F1 * u32F2;
5292# else /* !RT_ARCH_AMD64 */
5293 uint64_t u64;
5294# if RT_INLINE_ASM_GNU_STYLE
5295 __asm__ __volatile__("mull %%edx"
5296 : "=A" (u64)
5297 : "a" (u32F2), "d" (u32F1));
5298# else
5299 __asm
5300 {
5301 mov edx, [u32F1]
5302 mov eax, [u32F2]
5303 mul edx
5304 mov dword ptr [u64], eax
5305 mov dword ptr [u64 + 4], edx
5306 }
5307# endif
5308 return u64;
5309# endif /* !RT_ARCH_AMD64 */
5310}
5311#endif
5312
5313
5314/**
5315 * Multiplies two signed 32-bit values returning a signed 64-bit result.
5316 *
5317 * @returns u32F1 * u32F2.
5318 */
5319#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5320DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
5321#else
5322DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
5323{
5324# ifdef RT_ARCH_AMD64
5325 return (int64_t)i32F1 * i32F2;
5326# else /* !RT_ARCH_AMD64 */
5327 int64_t i64;
5328# if RT_INLINE_ASM_GNU_STYLE
5329 __asm__ __volatile__("imull %%edx"
5330 : "=A" (i64)
5331 : "a" (i32F2), "d" (i32F1));
5332# else
5333 __asm
5334 {
5335 mov edx, [i32F1]
5336 mov eax, [i32F2]
5337 imul edx
5338 mov dword ptr [i64], eax
5339 mov dword ptr [i64 + 4], edx
5340 }
5341# endif
5342 return i64;
5343# endif /* !RT_ARCH_AMD64 */
5344}
5345#endif
5346
5347
5348/**
5349 * Divides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
5350 *
5351 * @returns u64 / u32.
5352 */
5353#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5354DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
5355#else
5356DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
5357{
5358# ifdef RT_ARCH_AMD64
5359 return (uint32_t)(u64 / u32);
5360# else /* !RT_ARCH_AMD64 */
5361# if RT_INLINE_ASM_GNU_STYLE
5362 RTCCUINTREG uDummy;
5363 __asm__ __volatile__("divl %3"
5364 : "=a" (u32), "=d"(uDummy)
5365 : "A" (u64), "r" (u32));
5366# else
5367 __asm
5368 {
5369 mov eax, dword ptr [u64]
5370 mov edx, dword ptr [u64 + 4]
5371 mov ecx, [u32]
5372 div ecx
5373 mov [u32], eax
5374 }
5375# endif
5376 return u32;
5377# endif /* !RT_ARCH_AMD64 */
5378}
5379#endif
5380
5381
5382/**
5383 * Divides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
5384 *
5385 * @returns u64 / u32.
5386 */
5387#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5388DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
5389#else
5390DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
5391{
5392# ifdef RT_ARCH_AMD64
5393 return (int32_t)(i64 / i32);
5394# else /* !RT_ARCH_AMD64 */
5395# if RT_INLINE_ASM_GNU_STYLE
5396 RTCCUINTREG iDummy;
5397 __asm__ __volatile__("idivl %3"
5398 : "=a" (i32), "=d"(iDummy)
5399 : "A" (i64), "r" (i32));
5400# else
5401 __asm
5402 {
5403 mov eax, dword ptr [i64]
5404 mov edx, dword ptr [i64 + 4]
5405 mov ecx, [i32]
5406 idiv ecx
5407 mov [i32], eax
5408 }
5409# endif
5410 return i32;
5411# endif /* !RT_ARCH_AMD64 */
5412}
5413#endif
5414
5415
5416/**
5417 * Performs 64-bit unsigned by a 32-bit unsigned division with a 32-bit unsigned result,
5418 * returning the rest.
5419 *
5420 * @returns u64 % u32.
5421 *
5422 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5423 */
5424#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5425DECLASM(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32);
5426#else
5427DECLINLINE(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32)
5428{
5429# ifdef RT_ARCH_AMD64
5430 return (uint32_t)(u64 % u32);
5431# else /* !RT_ARCH_AMD64 */
5432# if RT_INLINE_ASM_GNU_STYLE
5433 RTCCUINTREG uDummy;
5434 __asm__ __volatile__("divl %3"
5435 : "=a" (uDummy), "=d"(u32)
5436 : "A" (u64), "r" (u32));
5437# else
5438 __asm
5439 {
5440 mov eax, dword ptr [u64]
5441 mov edx, dword ptr [u64 + 4]
5442 mov ecx, [u32]
5443 div ecx
5444 mov [u32], edx
5445 }
5446# endif
5447 return u32;
5448# endif /* !RT_ARCH_AMD64 */
5449}
5450#endif
5451
5452
5453/**
5454 * Performs 64-bit signed by a 32-bit signed division with a 32-bit signed result,
5455 * returning the rest.
5456 *
5457 * @returns u64 % u32.
5458 *
5459 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5460 */
5461#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5462DECLASM(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32);
5463#else
5464DECLINLINE(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32)
5465{
5466# ifdef RT_ARCH_AMD64
5467 return (int32_t)(i64 % i32);
5468# else /* !RT_ARCH_AMD64 */
5469# if RT_INLINE_ASM_GNU_STYLE
5470 RTCCUINTREG iDummy;
5471 __asm__ __volatile__("idivl %3"
5472 : "=a" (iDummy), "=d"(i32)
5473 : "A" (i64), "r" (i32));
5474# else
5475 __asm
5476 {
5477 mov eax, dword ptr [i64]
5478 mov edx, dword ptr [i64 + 4]
5479 mov ecx, [i32]
5480 idiv ecx
5481 mov [i32], edx
5482 }
5483# endif
5484 return i32;
5485# endif /* !RT_ARCH_AMD64 */
5486}
5487#endif
5488
5489
5490/**
5491 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
5492 * using a 96 bit intermediate result.
5493 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
5494 * __udivdi3 and __umoddi3 even if this inline function is not used.
5495 *
5496 * @returns (u64A * u32B) / u32C.
5497 * @param u64A The 64-bit value.
5498 * @param u32B The 32-bit value to multiple by A.
5499 * @param u32C The 32-bit value to divide A*B by.
5500 */
5501#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
5502DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
5503#else
5504DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
5505{
5506# if RT_INLINE_ASM_GNU_STYLE
5507# ifdef RT_ARCH_AMD64
5508 uint64_t u64Result, u64Spill;
5509 __asm__ __volatile__("mulq %2\n\t"
5510 "divq %3\n\t"
5511 : "=a" (u64Result),
5512 "=d" (u64Spill)
5513 : "r" ((uint64_t)u32B),
5514 "r" ((uint64_t)u32C),
5515 "0" (u64A),
5516 "1" (0));
5517 return u64Result;
5518# else
5519 uint32_t u32Dummy;
5520 uint64_t u64Result;
5521 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
5522 edx = u64Lo.hi = (u64A.lo * u32B).hi */
5523 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
5524 eax = u64A.hi */
5525 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
5526 edx = u32C */
5527 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
5528 edx = u32B */
5529 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
5530 edx = u64Hi.hi = (u64A.hi * u32B).hi */
5531 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
5532 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
5533 "divl %%ecx \n\t" /* eax = u64Hi / u32C
5534 edx = u64Hi % u32C */
5535 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
5536 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
5537 "divl %%ecx \n\t" /* u64Result.lo */
5538 "movl %%edi,%%edx \n\t" /* u64Result.hi */
5539 : "=A"(u64Result), "=c"(u32Dummy),
5540 "=S"(u32Dummy), "=D"(u32Dummy)
5541 : "a"((uint32_t)u64A),
5542 "S"((uint32_t)(u64A >> 32)),
5543 "c"(u32B),
5544 "D"(u32C));
5545 return u64Result;
5546# endif
5547# else
5548 RTUINT64U u;
5549 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
5550 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
5551 u64Hi += (u64Lo >> 32);
5552 u.s.Hi = (uint32_t)(u64Hi / u32C);
5553 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
5554 return u.u;
5555# endif
5556}
5557#endif
5558
5559
5560/**
5561 * Probes a byte pointer for read access.
5562 *
5563 * While the function will not fault if the byte is not read accessible,
5564 * the idea is to do this in a safe place like before acquiring locks
5565 * and such like.
5566 *
5567 * Also, this functions guarantees that an eager compiler is not going
5568 * to optimize the probing away.
5569 *
5570 * @param pvByte Pointer to the byte.
5571 */
5572#if RT_INLINE_ASM_EXTERNAL
5573DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
5574#else
5575DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
5576{
5577 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5578 uint8_t u8;
5579# if RT_INLINE_ASM_GNU_STYLE
5580 __asm__ __volatile__("movb (%1), %0\n\t"
5581 : "=r" (u8)
5582 : "r" (pvByte));
5583# else
5584 __asm
5585 {
5586# ifdef RT_ARCH_AMD64
5587 mov rax, [pvByte]
5588 mov al, [rax]
5589# else
5590 mov eax, [pvByte]
5591 mov al, [eax]
5592# endif
5593 mov [u8], al
5594 }
5595# endif
5596 return u8;
5597}
5598#endif
5599
5600/**
5601 * Probes a buffer for read access page by page.
5602 *
5603 * While the function will fault if the buffer is not fully read
5604 * accessible, the idea is to do this in a safe place like before
5605 * acquiring locks and such like.
5606 *
5607 * Also, this functions guarantees that an eager compiler is not going
5608 * to optimize the probing away.
5609 *
5610 * @param pvBuf Pointer to the buffer.
5611 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
5612 */
5613DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
5614{
5615 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5616 /* the first byte */
5617 const uint8_t *pu8 = (const uint8_t *)pvBuf;
5618 ASMProbeReadByte(pu8);
5619
5620 /* the pages in between pages. */
5621 while (cbBuf > /*PAGE_SIZE*/0x1000)
5622 {
5623 ASMProbeReadByte(pu8);
5624 cbBuf -= /*PAGE_SIZE*/0x1000;
5625 pu8 += /*PAGE_SIZE*/0x1000;
5626 }
5627
5628 /* the last byte */
5629 ASMProbeReadByte(pu8 + cbBuf - 1);
5630}
5631
5632
5633/** @def ASMBreakpoint
5634 * Debugger Breakpoint.
5635 * @remark In the gnu world we add a nop instruction after the int3 to
5636 * force gdb to remain at the int3 source line.
5637 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
5638 * @internal
5639 */
5640#if RT_INLINE_ASM_GNU_STYLE
5641# ifndef __L4ENV__
5642# define ASMBreakpoint() do { __asm__ __volatile__("int3\n\tnop"); } while (0)
5643# else
5644# define ASMBreakpoint() do { __asm__ __volatile__("int3; jmp 1f; 1:"); } while (0)
5645# endif
5646#else
5647# define ASMBreakpoint() __debugbreak()
5648#endif
5649
5650
5651
5652/** @defgroup grp_inline_bits Bit Operations
5653 * @{
5654 */
5655
5656
5657/**
5658 * Sets a bit in a bitmap.
5659 *
5660 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
5661 * @param iBit The bit to set.
5662 *
5663 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5664 * However, doing so will yield better performance as well as avoiding
5665 * traps accessing the last bits in the bitmap.
5666 */
5667#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5668DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
5669#else
5670DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
5671{
5672# if RT_INLINE_ASM_USES_INTRIN
5673 _bittestandset((long *)pvBitmap, iBit);
5674
5675# elif RT_INLINE_ASM_GNU_STYLE
5676 __asm__ __volatile__("btsl %1, %0"
5677 : "=m" (*(volatile long *)pvBitmap)
5678 : "Ir" (iBit),
5679 "m" (*(volatile long *)pvBitmap)
5680 : "memory");
5681# else
5682 __asm
5683 {
5684# ifdef RT_ARCH_AMD64
5685 mov rax, [pvBitmap]
5686 mov edx, [iBit]
5687 bts [rax], edx
5688# else
5689 mov eax, [pvBitmap]
5690 mov edx, [iBit]
5691 bts [eax], edx
5692# endif
5693 }
5694# endif
5695}
5696#endif
5697
5698
5699/**
5700 * Atomically sets a bit in a bitmap, ordered.
5701 *
5702 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5703 * the memory access isn't atomic!
5704 * @param iBit The bit to set.
5705 */
5706#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5707DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
5708#else
5709DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
5710{
5711 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5712# if RT_INLINE_ASM_USES_INTRIN
5713 _interlockedbittestandset((long *)pvBitmap, iBit);
5714# elif RT_INLINE_ASM_GNU_STYLE
5715 __asm__ __volatile__("lock; btsl %1, %0"
5716 : "=m" (*(volatile long *)pvBitmap)
5717 : "Ir" (iBit),
5718 "m" (*(volatile long *)pvBitmap)
5719 : "memory");
5720# else
5721 __asm
5722 {
5723# ifdef RT_ARCH_AMD64
5724 mov rax, [pvBitmap]
5725 mov edx, [iBit]
5726 lock bts [rax], edx
5727# else
5728 mov eax, [pvBitmap]
5729 mov edx, [iBit]
5730 lock bts [eax], edx
5731# endif
5732 }
5733# endif
5734}
5735#endif
5736
5737
5738/**
5739 * Clears a bit in a bitmap.
5740 *
5741 * @param pvBitmap Pointer to the bitmap.
5742 * @param iBit The bit to clear.
5743 *
5744 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5745 * However, doing so will yield better performance as well as avoiding
5746 * traps accessing the last bits in the bitmap.
5747 */
5748#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5749DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
5750#else
5751DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
5752{
5753# if RT_INLINE_ASM_USES_INTRIN
5754 _bittestandreset((long *)pvBitmap, iBit);
5755
5756# elif RT_INLINE_ASM_GNU_STYLE
5757 __asm__ __volatile__("btrl %1, %0"
5758 : "=m" (*(volatile long *)pvBitmap)
5759 : "Ir" (iBit),
5760 "m" (*(volatile long *)pvBitmap)
5761 : "memory");
5762# else
5763 __asm
5764 {
5765# ifdef RT_ARCH_AMD64
5766 mov rax, [pvBitmap]
5767 mov edx, [iBit]
5768 btr [rax], edx
5769# else
5770 mov eax, [pvBitmap]
5771 mov edx, [iBit]
5772 btr [eax], edx
5773# endif
5774 }
5775# endif
5776}
5777#endif
5778
5779
5780/**
5781 * Atomically clears a bit in a bitmap, ordered.
5782 *
5783 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5784 * the memory access isn't atomic!
5785 * @param iBit The bit to toggle set.
5786 * @remarks No memory barrier, take care on smp.
5787 */
5788#if RT_INLINE_ASM_EXTERNAL
5789DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
5790#else
5791DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
5792{
5793 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5794# if RT_INLINE_ASM_GNU_STYLE
5795 __asm__ __volatile__("lock; btrl %1, %0"
5796 : "=m" (*(volatile long *)pvBitmap)
5797 : "Ir" (iBit),
5798 "m" (*(volatile long *)pvBitmap)
5799 : "memory");
5800# else
5801 __asm
5802 {
5803# ifdef RT_ARCH_AMD64
5804 mov rax, [pvBitmap]
5805 mov edx, [iBit]
5806 lock btr [rax], edx
5807# else
5808 mov eax, [pvBitmap]
5809 mov edx, [iBit]
5810 lock btr [eax], edx
5811# endif
5812 }
5813# endif
5814}
5815#endif
5816
5817
5818/**
5819 * Toggles a bit in a bitmap.
5820 *
5821 * @param pvBitmap Pointer to the bitmap.
5822 * @param iBit The bit to toggle.
5823 *
5824 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5825 * However, doing so will yield better performance as well as avoiding
5826 * traps accessing the last bits in the bitmap.
5827 */
5828#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5829DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
5830#else
5831DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
5832{
5833# if RT_INLINE_ASM_USES_INTRIN
5834 _bittestandcomplement((long *)pvBitmap, iBit);
5835# elif RT_INLINE_ASM_GNU_STYLE
5836 __asm__ __volatile__("btcl %1, %0"
5837 : "=m" (*(volatile long *)pvBitmap)
5838 : "Ir" (iBit),
5839 "m" (*(volatile long *)pvBitmap)
5840 : "memory");
5841# else
5842 __asm
5843 {
5844# ifdef RT_ARCH_AMD64
5845 mov rax, [pvBitmap]
5846 mov edx, [iBit]
5847 btc [rax], edx
5848# else
5849 mov eax, [pvBitmap]
5850 mov edx, [iBit]
5851 btc [eax], edx
5852# endif
5853 }
5854# endif
5855}
5856#endif
5857
5858
5859/**
5860 * Atomically toggles a bit in a bitmap, ordered.
5861 *
5862 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5863 * the memory access isn't atomic!
5864 * @param iBit The bit to test and set.
5865 */
5866#if RT_INLINE_ASM_EXTERNAL
5867DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
5868#else
5869DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
5870{
5871 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5872# if RT_INLINE_ASM_GNU_STYLE
5873 __asm__ __volatile__("lock; btcl %1, %0"
5874 : "=m" (*(volatile long *)pvBitmap)
5875 : "Ir" (iBit),
5876 "m" (*(volatile long *)pvBitmap)
5877 : "memory");
5878# else
5879 __asm
5880 {
5881# ifdef RT_ARCH_AMD64
5882 mov rax, [pvBitmap]
5883 mov edx, [iBit]
5884 lock btc [rax], edx
5885# else
5886 mov eax, [pvBitmap]
5887 mov edx, [iBit]
5888 lock btc [eax], edx
5889# endif
5890 }
5891# endif
5892}
5893#endif
5894
5895
5896/**
5897 * Tests and sets a bit in a bitmap.
5898 *
5899 * @returns true if the bit was set.
5900 * @returns false if the bit was clear.
5901 *
5902 * @param pvBitmap Pointer to the bitmap.
5903 * @param iBit The bit to test and set.
5904 *
5905 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5906 * However, doing so will yield better performance as well as avoiding
5907 * traps accessing the last bits in the bitmap.
5908 */
5909#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5910DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5911#else
5912DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5913{
5914 union { bool f; uint32_t u32; uint8_t u8; } rc;
5915# if RT_INLINE_ASM_USES_INTRIN
5916 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
5917
5918# elif RT_INLINE_ASM_GNU_STYLE
5919 __asm__ __volatile__("btsl %2, %1\n\t"
5920 "setc %b0\n\t"
5921 "andl $1, %0\n\t"
5922 : "=q" (rc.u32),
5923 "=m" (*(volatile long *)pvBitmap)
5924 : "Ir" (iBit),
5925 "m" (*(volatile long *)pvBitmap)
5926 : "memory");
5927# else
5928 __asm
5929 {
5930 mov edx, [iBit]
5931# ifdef RT_ARCH_AMD64
5932 mov rax, [pvBitmap]
5933 bts [rax], edx
5934# else
5935 mov eax, [pvBitmap]
5936 bts [eax], edx
5937# endif
5938 setc al
5939 and eax, 1
5940 mov [rc.u32], eax
5941 }
5942# endif
5943 return rc.f;
5944}
5945#endif
5946
5947
5948/**
5949 * Atomically tests and sets a bit in a bitmap, ordered.
5950 *
5951 * @returns true if the bit was set.
5952 * @returns false if the bit was clear.
5953 *
5954 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5955 * the memory access isn't atomic!
5956 * @param iBit The bit to set.
5957 */
5958#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5959DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5960#else
5961DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5962{
5963 union { bool f; uint32_t u32; uint8_t u8; } rc;
5964 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5965# if RT_INLINE_ASM_USES_INTRIN
5966 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
5967# elif RT_INLINE_ASM_GNU_STYLE
5968 __asm__ __volatile__("lock; btsl %2, %1\n\t"
5969 "setc %b0\n\t"
5970 "andl $1, %0\n\t"
5971 : "=q" (rc.u32),
5972 "=m" (*(volatile long *)pvBitmap)
5973 : "Ir" (iBit),
5974 "m" (*(volatile long *)pvBitmap)
5975 : "memory");
5976# else
5977 __asm
5978 {
5979 mov edx, [iBit]
5980# ifdef RT_ARCH_AMD64
5981 mov rax, [pvBitmap]
5982 lock bts [rax], edx
5983# else
5984 mov eax, [pvBitmap]
5985 lock bts [eax], edx
5986# endif
5987 setc al
5988 and eax, 1
5989 mov [rc.u32], eax
5990 }
5991# endif
5992 return rc.f;
5993}
5994#endif
5995
5996
5997/**
5998 * Tests and clears a bit in a bitmap.
5999 *
6000 * @returns true if the bit was set.
6001 * @returns false if the bit was clear.
6002 *
6003 * @param pvBitmap Pointer to the bitmap.
6004 * @param iBit The bit to test and clear.
6005 *
6006 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6007 * However, doing so will yield better performance as well as avoiding
6008 * traps accessing the last bits in the bitmap.
6009 */
6010#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6011DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
6012#else
6013DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
6014{
6015 union { bool f; uint32_t u32; uint8_t u8; } rc;
6016# if RT_INLINE_ASM_USES_INTRIN
6017 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
6018
6019# elif RT_INLINE_ASM_GNU_STYLE
6020 __asm__ __volatile__("btrl %2, %1\n\t"
6021 "setc %b0\n\t"
6022 "andl $1, %0\n\t"
6023 : "=q" (rc.u32),
6024 "=m" (*(volatile long *)pvBitmap)
6025 : "Ir" (iBit),
6026 "m" (*(volatile long *)pvBitmap)
6027 : "memory");
6028# else
6029 __asm
6030 {
6031 mov edx, [iBit]
6032# ifdef RT_ARCH_AMD64
6033 mov rax, [pvBitmap]
6034 btr [rax], edx
6035# else
6036 mov eax, [pvBitmap]
6037 btr [eax], edx
6038# endif
6039 setc al
6040 and eax, 1
6041 mov [rc.u32], eax
6042 }
6043# endif
6044 return rc.f;
6045}
6046#endif
6047
6048
6049/**
6050 * Atomically tests and clears a bit in a bitmap, ordered.
6051 *
6052 * @returns true if the bit was set.
6053 * @returns false if the bit was clear.
6054 *
6055 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
6056 * the memory access isn't atomic!
6057 * @param iBit The bit to test and clear.
6058 *
6059 * @remarks No memory barrier, take care on smp.
6060 */
6061#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6062DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
6063#else
6064DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
6065{
6066 union { bool f; uint32_t u32; uint8_t u8; } rc;
6067 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6068# if RT_INLINE_ASM_USES_INTRIN
6069 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
6070
6071# elif RT_INLINE_ASM_GNU_STYLE
6072 __asm__ __volatile__("lock; btrl %2, %1\n\t"
6073 "setc %b0\n\t"
6074 "andl $1, %0\n\t"
6075 : "=q" (rc.u32),
6076 "=m" (*(volatile long *)pvBitmap)
6077 : "Ir" (iBit),
6078 "m" (*(volatile long *)pvBitmap)
6079 : "memory");
6080# else
6081 __asm
6082 {
6083 mov edx, [iBit]
6084# ifdef RT_ARCH_AMD64
6085 mov rax, [pvBitmap]
6086 lock btr [rax], edx
6087# else
6088 mov eax, [pvBitmap]
6089 lock btr [eax], edx
6090# endif
6091 setc al
6092 and eax, 1
6093 mov [rc.u32], eax
6094 }
6095# endif
6096 return rc.f;
6097}
6098#endif
6099
6100
6101/**
6102 * Tests and toggles a bit in a bitmap.
6103 *
6104 * @returns true if the bit was set.
6105 * @returns false if the bit was clear.
6106 *
6107 * @param pvBitmap Pointer to the bitmap.
6108 * @param iBit The bit to test and toggle.
6109 *
6110 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6111 * However, doing so will yield better performance as well as avoiding
6112 * traps accessing the last bits in the bitmap.
6113 */
6114#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6115DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
6116#else
6117DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
6118{
6119 union { bool f; uint32_t u32; uint8_t u8; } rc;
6120# if RT_INLINE_ASM_USES_INTRIN
6121 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
6122
6123# elif RT_INLINE_ASM_GNU_STYLE
6124 __asm__ __volatile__("btcl %2, %1\n\t"
6125 "setc %b0\n\t"
6126 "andl $1, %0\n\t"
6127 : "=q" (rc.u32),
6128 "=m" (*(volatile long *)pvBitmap)
6129 : "Ir" (iBit),
6130 "m" (*(volatile long *)pvBitmap)
6131 : "memory");
6132# else
6133 __asm
6134 {
6135 mov edx, [iBit]
6136# ifdef RT_ARCH_AMD64
6137 mov rax, [pvBitmap]
6138 btc [rax], edx
6139# else
6140 mov eax, [pvBitmap]
6141 btc [eax], edx
6142# endif
6143 setc al
6144 and eax, 1
6145 mov [rc.u32], eax
6146 }
6147# endif
6148 return rc.f;
6149}
6150#endif
6151
6152
6153/**
6154 * Atomically tests and toggles a bit in a bitmap, ordered.
6155 *
6156 * @returns true if the bit was set.
6157 * @returns false if the bit was clear.
6158 *
6159 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
6160 * the memory access isn't atomic!
6161 * @param iBit The bit to test and toggle.
6162 */
6163#if RT_INLINE_ASM_EXTERNAL
6164DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
6165#else
6166DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
6167{
6168 union { bool f; uint32_t u32; uint8_t u8; } rc;
6169 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6170# if RT_INLINE_ASM_GNU_STYLE
6171 __asm__ __volatile__("lock; btcl %2, %1\n\t"
6172 "setc %b0\n\t"
6173 "andl $1, %0\n\t"
6174 : "=q" (rc.u32),
6175 "=m" (*(volatile long *)pvBitmap)
6176 : "Ir" (iBit),
6177 "m" (*(volatile long *)pvBitmap)
6178 : "memory");
6179# else
6180 __asm
6181 {
6182 mov edx, [iBit]
6183# ifdef RT_ARCH_AMD64
6184 mov rax, [pvBitmap]
6185 lock btc [rax], edx
6186# else
6187 mov eax, [pvBitmap]
6188 lock btc [eax], edx
6189# endif
6190 setc al
6191 and eax, 1
6192 mov [rc.u32], eax
6193 }
6194# endif
6195 return rc.f;
6196}
6197#endif
6198
6199
6200/**
6201 * Tests if a bit in a bitmap is set.
6202 *
6203 * @returns true if the bit is set.
6204 * @returns false if the bit is clear.
6205 *
6206 * @param pvBitmap Pointer to the bitmap.
6207 * @param iBit The bit to test.
6208 *
6209 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6210 * However, doing so will yield better performance as well as avoiding
6211 * traps accessing the last bits in the bitmap.
6212 */
6213#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6214DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
6215#else
6216DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
6217{
6218 union { bool f; uint32_t u32; uint8_t u8; } rc;
6219# if RT_INLINE_ASM_USES_INTRIN
6220 rc.u32 = _bittest((long *)pvBitmap, iBit);
6221# elif RT_INLINE_ASM_GNU_STYLE
6222
6223 __asm__ __volatile__("btl %2, %1\n\t"
6224 "setc %b0\n\t"
6225 "andl $1, %0\n\t"
6226 : "=q" (rc.u32)
6227 : "m" (*(const volatile long *)pvBitmap),
6228 "Ir" (iBit)
6229 : "memory");
6230# else
6231 __asm
6232 {
6233 mov edx, [iBit]
6234# ifdef RT_ARCH_AMD64
6235 mov rax, [pvBitmap]
6236 bt [rax], edx
6237# else
6238 mov eax, [pvBitmap]
6239 bt [eax], edx
6240# endif
6241 setc al
6242 and eax, 1
6243 mov [rc.u32], eax
6244 }
6245# endif
6246 return rc.f;
6247}
6248#endif
6249
6250
6251/**
6252 * Clears a bit range within a bitmap.
6253 *
6254 * @param pvBitmap Pointer to the bitmap.
6255 * @param iBitStart The First bit to clear.
6256 * @param iBitEnd The first bit not to clear.
6257 */
6258DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
6259{
6260 if (iBitStart < iBitEnd)
6261 {
6262 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
6263 int iStart = iBitStart & ~31;
6264 int iEnd = iBitEnd & ~31;
6265 if (iStart == iEnd)
6266 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
6267 else
6268 {
6269 /* bits in first dword. */
6270 if (iBitStart & 31)
6271 {
6272 *pu32 &= (1 << (iBitStart & 31)) - 1;
6273 pu32++;
6274 iBitStart = iStart + 32;
6275 }
6276
6277 /* whole dword. */
6278 if (iBitStart != iEnd)
6279 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
6280
6281 /* bits in last dword. */
6282 if (iBitEnd & 31)
6283 {
6284 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
6285 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
6286 }
6287 }
6288 }
6289}
6290
6291
6292/**
6293 * Sets a bit range within a bitmap.
6294 *
6295 * @param pvBitmap Pointer to the bitmap.
6296 * @param iBitStart The First bit to set.
6297 * @param iBitEnd The first bit not to set.
6298 */
6299DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
6300{
6301 if (iBitStart < iBitEnd)
6302 {
6303 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
6304 int iStart = iBitStart & ~31;
6305 int iEnd = iBitEnd & ~31;
6306 if (iStart == iEnd)
6307 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
6308 else
6309 {
6310 /* bits in first dword. */
6311 if (iBitStart & 31)
6312 {
6313 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
6314 pu32++;
6315 iBitStart = iStart + 32;
6316 }
6317
6318 /* whole dword. */
6319 if (iBitStart != iEnd)
6320 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
6321
6322 /* bits in last dword. */
6323 if (iBitEnd & 31)
6324 {
6325 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
6326 *pu32 |= (1 << (iBitEnd & 31)) - 1;
6327 }
6328 }
6329 }
6330}
6331
6332
6333/**
6334 * Finds the first clear bit in a bitmap.
6335 *
6336 * @returns Index of the first zero bit.
6337 * @returns -1 if no clear bit was found.
6338 * @param pvBitmap Pointer to the bitmap.
6339 * @param cBits The number of bits in the bitmap. Multiple of 32.
6340 */
6341#if RT_INLINE_ASM_EXTERNAL
6342DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
6343#else
6344DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
6345{
6346 if (cBits)
6347 {
6348 int32_t iBit;
6349# if RT_INLINE_ASM_GNU_STYLE
6350 RTCCUINTREG uEAX, uECX, uEDI;
6351 cBits = RT_ALIGN_32(cBits, 32);
6352 __asm__ __volatile__("repe; scasl\n\t"
6353 "je 1f\n\t"
6354# ifdef RT_ARCH_AMD64
6355 "lea -4(%%rdi), %%rdi\n\t"
6356 "xorl (%%rdi), %%eax\n\t"
6357 "subq %5, %%rdi\n\t"
6358# else
6359 "lea -4(%%edi), %%edi\n\t"
6360 "xorl (%%edi), %%eax\n\t"
6361 "subl %5, %%edi\n\t"
6362# endif
6363 "shll $3, %%edi\n\t"
6364 "bsfl %%eax, %%edx\n\t"
6365 "addl %%edi, %%edx\n\t"
6366 "1:\t\n"
6367 : "=d" (iBit),
6368 "=&c" (uECX),
6369 "=&D" (uEDI),
6370 "=&a" (uEAX)
6371 : "0" (0xffffffff),
6372 "mr" (pvBitmap),
6373 "1" (cBits >> 5),
6374 "2" (pvBitmap),
6375 "3" (0xffffffff));
6376# else
6377 cBits = RT_ALIGN_32(cBits, 32);
6378 __asm
6379 {
6380# ifdef RT_ARCH_AMD64
6381 mov rdi, [pvBitmap]
6382 mov rbx, rdi
6383# else
6384 mov edi, [pvBitmap]
6385 mov ebx, edi
6386# endif
6387 mov edx, 0ffffffffh
6388 mov eax, edx
6389 mov ecx, [cBits]
6390 shr ecx, 5
6391 repe scasd
6392 je done
6393
6394# ifdef RT_ARCH_AMD64
6395 lea rdi, [rdi - 4]
6396 xor eax, [rdi]
6397 sub rdi, rbx
6398# else
6399 lea edi, [edi - 4]
6400 xor eax, [edi]
6401 sub edi, ebx
6402# endif
6403 shl edi, 3
6404 bsf edx, eax
6405 add edx, edi
6406 done:
6407 mov [iBit], edx
6408 }
6409# endif
6410 return iBit;
6411 }
6412 return -1;
6413}
6414#endif
6415
6416
6417/**
6418 * Finds the next clear bit in a bitmap.
6419 *
6420 * @returns Index of the first zero bit.
6421 * @returns -1 if no clear bit was found.
6422 * @param pvBitmap Pointer to the bitmap.
6423 * @param cBits The number of bits in the bitmap. Multiple of 32.
6424 * @param iBitPrev The bit returned from the last search.
6425 * The search will start at iBitPrev + 1.
6426 */
6427#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6428DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
6429#else
6430DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
6431{
6432 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
6433 int iBit = ++iBitPrev & 31;
6434 if (iBit)
6435 {
6436 /*
6437 * Inspect the 32-bit word containing the unaligned bit.
6438 */
6439 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
6440
6441# if RT_INLINE_ASM_USES_INTRIN
6442 unsigned long ulBit = 0;
6443 if (_BitScanForward(&ulBit, u32))
6444 return ulBit + iBitPrev;
6445# else
6446# if RT_INLINE_ASM_GNU_STYLE
6447 __asm__ __volatile__("bsf %1, %0\n\t"
6448 "jnz 1f\n\t"
6449 "movl $-1, %0\n\t"
6450 "1:\n\t"
6451 : "=r" (iBit)
6452 : "r" (u32));
6453# else
6454 __asm
6455 {
6456 mov edx, [u32]
6457 bsf eax, edx
6458 jnz done
6459 mov eax, 0ffffffffh
6460 done:
6461 mov [iBit], eax
6462 }
6463# endif
6464 if (iBit >= 0)
6465 return iBit + iBitPrev;
6466# endif
6467
6468 /*
6469 * Skip ahead and see if there is anything left to search.
6470 */
6471 iBitPrev |= 31;
6472 iBitPrev++;
6473 if (cBits <= (uint32_t)iBitPrev)
6474 return -1;
6475 }
6476
6477 /*
6478 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6479 */
6480 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6481 if (iBit >= 0)
6482 iBit += iBitPrev;
6483 return iBit;
6484}
6485#endif
6486
6487
6488/**
6489 * Finds the first set bit in a bitmap.
6490 *
6491 * @returns Index of the first set bit.
6492 * @returns -1 if no clear bit was found.
6493 * @param pvBitmap Pointer to the bitmap.
6494 * @param cBits The number of bits in the bitmap. Multiple of 32.
6495 */
6496#if RT_INLINE_ASM_EXTERNAL
6497DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
6498#else
6499DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
6500{
6501 if (cBits)
6502 {
6503 int32_t iBit;
6504# if RT_INLINE_ASM_GNU_STYLE
6505 RTCCUINTREG uEAX, uECX, uEDI;
6506 cBits = RT_ALIGN_32(cBits, 32);
6507 __asm__ __volatile__("repe; scasl\n\t"
6508 "je 1f\n\t"
6509# ifdef RT_ARCH_AMD64
6510 "lea -4(%%rdi), %%rdi\n\t"
6511 "movl (%%rdi), %%eax\n\t"
6512 "subq %5, %%rdi\n\t"
6513# else
6514 "lea -4(%%edi), %%edi\n\t"
6515 "movl (%%edi), %%eax\n\t"
6516 "subl %5, %%edi\n\t"
6517# endif
6518 "shll $3, %%edi\n\t"
6519 "bsfl %%eax, %%edx\n\t"
6520 "addl %%edi, %%edx\n\t"
6521 "1:\t\n"
6522 : "=d" (iBit),
6523 "=&c" (uECX),
6524 "=&D" (uEDI),
6525 "=&a" (uEAX)
6526 : "0" (0xffffffff),
6527 "mr" (pvBitmap),
6528 "1" (cBits >> 5),
6529 "2" (pvBitmap),
6530 "3" (0));
6531# else
6532 cBits = RT_ALIGN_32(cBits, 32);
6533 __asm
6534 {
6535# ifdef RT_ARCH_AMD64
6536 mov rdi, [pvBitmap]
6537 mov rbx, rdi
6538# else
6539 mov edi, [pvBitmap]
6540 mov ebx, edi
6541# endif
6542 mov edx, 0ffffffffh
6543 xor eax, eax
6544 mov ecx, [cBits]
6545 shr ecx, 5
6546 repe scasd
6547 je done
6548# ifdef RT_ARCH_AMD64
6549 lea rdi, [rdi - 4]
6550 mov eax, [rdi]
6551 sub rdi, rbx
6552# else
6553 lea edi, [edi - 4]
6554 mov eax, [edi]
6555 sub edi, ebx
6556# endif
6557 shl edi, 3
6558 bsf edx, eax
6559 add edx, edi
6560 done:
6561 mov [iBit], edx
6562 }
6563# endif
6564 return iBit;
6565 }
6566 return -1;
6567}
6568#endif
6569
6570
6571/**
6572 * Finds the next set bit in a bitmap.
6573 *
6574 * @returns Index of the next set bit.
6575 * @returns -1 if no set bit was found.
6576 * @param pvBitmap Pointer to the bitmap.
6577 * @param cBits The number of bits in the bitmap. Multiple of 32.
6578 * @param iBitPrev The bit returned from the last search.
6579 * The search will start at iBitPrev + 1.
6580 */
6581#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6582DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
6583#else
6584DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
6585{
6586 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
6587 int iBit = ++iBitPrev & 31;
6588 if (iBit)
6589 {
6590 /*
6591 * Inspect the 32-bit word containing the unaligned bit.
6592 */
6593 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
6594
6595# if RT_INLINE_ASM_USES_INTRIN
6596 unsigned long ulBit = 0;
6597 if (_BitScanForward(&ulBit, u32))
6598 return ulBit + iBitPrev;
6599# else
6600# if RT_INLINE_ASM_GNU_STYLE
6601 __asm__ __volatile__("bsf %1, %0\n\t"
6602 "jnz 1f\n\t"
6603 "movl $-1, %0\n\t"
6604 "1:\n\t"
6605 : "=r" (iBit)
6606 : "r" (u32));
6607# else
6608 __asm
6609 {
6610 mov edx, [u32]
6611 bsf eax, edx
6612 jnz done
6613 mov eax, 0ffffffffh
6614 done:
6615 mov [iBit], eax
6616 }
6617# endif
6618 if (iBit >= 0)
6619 return iBit + iBitPrev;
6620# endif
6621
6622 /*
6623 * Skip ahead and see if there is anything left to search.
6624 */
6625 iBitPrev |= 31;
6626 iBitPrev++;
6627 if (cBits <= (uint32_t)iBitPrev)
6628 return -1;
6629 }
6630
6631 /*
6632 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6633 */
6634 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6635 if (iBit >= 0)
6636 iBit += iBitPrev;
6637 return iBit;
6638}
6639#endif
6640
6641
6642/**
6643 * Finds the first bit which is set in the given 32-bit integer.
6644 * Bits are numbered from 1 (least significant) to 32.
6645 *
6646 * @returns index [1..32] of the first set bit.
6647 * @returns 0 if all bits are cleared.
6648 * @param u32 Integer to search for set bits.
6649 * @remark Similar to ffs() in BSD.
6650 */
6651DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
6652{
6653# if RT_INLINE_ASM_USES_INTRIN
6654 unsigned long iBit;
6655 if (_BitScanForward(&iBit, u32))
6656 iBit++;
6657 else
6658 iBit = 0;
6659# elif RT_INLINE_ASM_GNU_STYLE
6660 uint32_t iBit;
6661 __asm__ __volatile__("bsf %1, %0\n\t"
6662 "jnz 1f\n\t"
6663 "xorl %0, %0\n\t"
6664 "jmp 2f\n"
6665 "1:\n\t"
6666 "incl %0\n"
6667 "2:\n\t"
6668 : "=r" (iBit)
6669 : "rm" (u32));
6670# else
6671 uint32_t iBit;
6672 _asm
6673 {
6674 bsf eax, [u32]
6675 jnz found
6676 xor eax, eax
6677 jmp done
6678 found:
6679 inc eax
6680 done:
6681 mov [iBit], eax
6682 }
6683# endif
6684 return iBit;
6685}
6686
6687
6688/**
6689 * Finds the first bit which is set in the given 32-bit integer.
6690 * Bits are numbered from 1 (least significant) to 32.
6691 *
6692 * @returns index [1..32] of the first set bit.
6693 * @returns 0 if all bits are cleared.
6694 * @param i32 Integer to search for set bits.
6695 * @remark Similar to ffs() in BSD.
6696 */
6697DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
6698{
6699 return ASMBitFirstSetU32((uint32_t)i32);
6700}
6701
6702
6703/**
6704 * Finds the last bit which is set in the given 32-bit integer.
6705 * Bits are numbered from 1 (least significant) to 32.
6706 *
6707 * @returns index [1..32] of the last set bit.
6708 * @returns 0 if all bits are cleared.
6709 * @param u32 Integer to search for set bits.
6710 * @remark Similar to fls() in BSD.
6711 */
6712DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
6713{
6714# if RT_INLINE_ASM_USES_INTRIN
6715 unsigned long iBit;
6716 if (_BitScanReverse(&iBit, u32))
6717 iBit++;
6718 else
6719 iBit = 0;
6720# elif RT_INLINE_ASM_GNU_STYLE
6721 uint32_t iBit;
6722 __asm__ __volatile__("bsrl %1, %0\n\t"
6723 "jnz 1f\n\t"
6724 "xorl %0, %0\n\t"
6725 "jmp 2f\n"
6726 "1:\n\t"
6727 "incl %0\n"
6728 "2:\n\t"
6729 : "=r" (iBit)
6730 : "rm" (u32));
6731# else
6732 uint32_t iBit;
6733 _asm
6734 {
6735 bsr eax, [u32]
6736 jnz found
6737 xor eax, eax
6738 jmp done
6739 found:
6740 inc eax
6741 done:
6742 mov [iBit], eax
6743 }
6744# endif
6745 return iBit;
6746}
6747
6748
6749/**
6750 * Finds the last bit which is set in the given 32-bit integer.
6751 * Bits are numbered from 1 (least significant) to 32.
6752 *
6753 * @returns index [1..32] of the last set bit.
6754 * @returns 0 if all bits are cleared.
6755 * @param i32 Integer to search for set bits.
6756 * @remark Similar to fls() in BSD.
6757 */
6758DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
6759{
6760 return ASMBitLastSetU32((uint32_t)i32);
6761}
6762
6763/**
6764 * Reverse the byte order of the given 16-bit integer.
6765 *
6766 * @returns Revert
6767 * @param u16 16-bit integer value.
6768 */
6769DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
6770{
6771#if RT_INLINE_ASM_USES_INTRIN
6772 u16 = _byteswap_ushort(u16);
6773#elif RT_INLINE_ASM_GNU_STYLE
6774 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
6775#else
6776 _asm
6777 {
6778 mov ax, [u16]
6779 ror ax, 8
6780 mov [u16], ax
6781 }
6782#endif
6783 return u16;
6784}
6785
6786/**
6787 * Reverse the byte order of the given 32-bit integer.
6788 *
6789 * @returns Revert
6790 * @param u32 32-bit integer value.
6791 */
6792DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
6793{
6794#if RT_INLINE_ASM_USES_INTRIN
6795 u32 = _byteswap_ulong(u32);
6796#elif RT_INLINE_ASM_GNU_STYLE
6797 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
6798#else
6799 _asm
6800 {
6801 mov eax, [u32]
6802 bswap eax
6803 mov [u32], eax
6804 }
6805#endif
6806 return u32;
6807}
6808
6809
6810/**
6811 * Reverse the byte order of the given 64-bit integer.
6812 *
6813 * @returns Revert
6814 * @param u64 64-bit integer value.
6815 */
6816DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
6817{
6818#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
6819 u64 = _byteswap_uint64(u64);
6820#else
6821 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
6822 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
6823#endif
6824 return u64;
6825}
6826
6827
6828/** @} */
6829
6830
6831/** @} */
6832#endif
6833
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette