VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 59670

Last change on this file since 59670 was 59533, checked in by vboxsync, 9 years ago

iprt/asm.h: build fix

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 156.0 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2015 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37/* Solaris 10 header ugliness */
38#ifdef u
39# undef u
40#endif
41
42#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
43# include <intrin.h>
44 /* Emit the intrinsics at all optimization levels. */
45# pragma intrinsic(_ReadWriteBarrier)
46# pragma intrinsic(__cpuid)
47# pragma intrinsic(__stosd)
48# pragma intrinsic(__stosw)
49# pragma intrinsic(__stosb)
50# pragma intrinsic(_BitScanForward)
51# pragma intrinsic(_BitScanReverse)
52# pragma intrinsic(_bittest)
53# pragma intrinsic(_bittestandset)
54# pragma intrinsic(_bittestandreset)
55# pragma intrinsic(_bittestandcomplement)
56# pragma intrinsic(_byteswap_ushort)
57# pragma intrinsic(_byteswap_ulong)
58# pragma intrinsic(_interlockedbittestandset)
59# pragma intrinsic(_interlockedbittestandreset)
60# pragma intrinsic(_InterlockedAnd)
61# pragma intrinsic(_InterlockedOr)
62# pragma intrinsic(_InterlockedIncrement)
63# pragma intrinsic(_InterlockedDecrement)
64# pragma intrinsic(_InterlockedExchange)
65# pragma intrinsic(_InterlockedExchangeAdd)
66# pragma intrinsic(_InterlockedCompareExchange)
67# pragma intrinsic(_InterlockedCompareExchange64)
68# pragma intrinsic(_rotl)
69# pragma intrinsic(_rotr)
70# pragma intrinsic(_rotl64)
71# pragma intrinsic(_rotr64)
72# ifdef RT_ARCH_AMD64
73# pragma intrinsic(__stosq)
74# pragma intrinsic(_byteswap_uint64)
75# pragma intrinsic(_InterlockedExchange64)
76# pragma intrinsic(_InterlockedExchangeAdd64)
77# pragma intrinsic(_InterlockedAnd64)
78# pragma intrinsic(_InterlockedOr64)
79# pragma intrinsic(_InterlockedIncrement64)
80# pragma intrinsic(_InterlockedDecrement64)
81# endif
82#endif
83
84/*
85 * Include #pragma aux definitions for Watcom C/C++.
86 */
87#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
88# include "asm-watcom-x86-16.h"
89#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
90# include "asm-watcom-x86-32.h"
91#endif
92
93
94
95/** @defgroup grp_rt_asm ASM - Assembly Routines
96 * @ingroup grp_rt
97 *
98 * @remarks The difference between ordered and unordered atomic operations are that
99 * the former will complete outstanding reads and writes before continuing
100 * while the latter doesn't make any promises about the order. Ordered
101 * operations doesn't, it seems, make any 100% promise wrt to whether
102 * the operation will complete before any subsequent memory access.
103 * (please, correct if wrong.)
104 *
105 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
106 * are unordered (note the Uo).
107 *
108 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
109 * or even optimize assembler instructions away. For instance, in the following code
110 * the second rdmsr instruction is optimized away because gcc treats that instruction
111 * as deterministic:
112 *
113 * @code
114 * static inline uint64_t rdmsr_low(int idx)
115 * {
116 * uint32_t low;
117 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
118 * }
119 * ...
120 * uint32_t msr1 = rdmsr_low(1);
121 * foo(msr1);
122 * msr1 = rdmsr_low(1);
123 * bar(msr1);
124 * @endcode
125 *
126 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
127 * use the result of the first call as input parameter for bar() as well. For rdmsr this
128 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
129 * machine status information in general.
130 *
131 * @{
132 */
133
134
135/** @def RT_INLINE_ASM_GCC_4_3_X_X86
136 * Used to work around some 4.3.x register allocation issues in this version of
137 * the compiler. So far this workaround is still required for 4.4 and 4.5. */
138#ifdef __GNUC__
139# define RT_INLINE_ASM_GCC_4_3_X_X86 (__GNUC__ == 4 && __GNUC_MINOR__ >= 3 && defined(__i386__))
140#endif
141#ifndef RT_INLINE_ASM_GCC_4_3_X_X86
142# define RT_INLINE_ASM_GCC_4_3_X_X86 0
143#endif
144
145/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
146 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
147 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
148 * mode, x86.
149 *
150 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
151 * when in PIC mode on x86.
152 */
153#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
154# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
155# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
156# else
157# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC \
158 ( (defined(PIC) || defined(__PIC__)) \
159 && defined(RT_ARCH_X86) \
160 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
161 || defined(RT_OS_DARWIN)) )
162# endif
163#endif
164
165
166/** @def ASMReturnAddress
167 * Gets the return address of the current (or calling if you like) function or method.
168 */
169#ifdef _MSC_VER
170# ifdef __cplusplus
171extern "C"
172# endif
173void * _ReturnAddress(void);
174# pragma intrinsic(_ReturnAddress)
175# define ASMReturnAddress() _ReturnAddress()
176#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
177# define ASMReturnAddress() __builtin_return_address(0)
178#elif defined(__WATCOMC__)
179# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
180#else
181# error "Unsupported compiler."
182#endif
183
184
185/**
186 * Compiler memory barrier.
187 *
188 * Ensure that the compiler does not use any cached (register/tmp stack) memory
189 * values or any outstanding writes when returning from this function.
190 *
191 * This function must be used if non-volatile data is modified by a
192 * device or the VMM. Typical cases are port access, MMIO access,
193 * trapping instruction, etc.
194 */
195#if RT_INLINE_ASM_GNU_STYLE
196# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
197#elif RT_INLINE_ASM_USES_INTRIN
198# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
199#elif defined(__WATCOMC__)
200void ASMCompilerBarrier(void);
201#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
202DECLINLINE(void) ASMCompilerBarrier(void)
203{
204 __asm
205 {
206 }
207}
208#endif
209
210
211/** @def ASMBreakpoint
212 * Debugger Breakpoint.
213 * @deprecated Use RT_BREAKPOINT instead.
214 * @internal
215 */
216#define ASMBreakpoint() RT_BREAKPOINT()
217
218
219/**
220 * Spinloop hint for platforms that have these, empty function on the other
221 * platforms.
222 *
223 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
224 * spin locks.
225 */
226#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
227DECLASM(void) ASMNopPause(void);
228#else
229DECLINLINE(void) ASMNopPause(void)
230{
231# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
232# if RT_INLINE_ASM_GNU_STYLE
233 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
234# else
235 __asm {
236 _emit 0f3h
237 _emit 090h
238 }
239# endif
240# else
241 /* dummy */
242# endif
243}
244#endif
245
246
247/**
248 * Atomically Exchange an unsigned 8-bit value, ordered.
249 *
250 * @returns Current *pu8 value
251 * @param pu8 Pointer to the 8-bit variable to update.
252 * @param u8 The 8-bit value to assign to *pu8.
253 */
254#if RT_INLINE_ASM_EXTERNAL
255DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
256#else
257DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
258{
259# if RT_INLINE_ASM_GNU_STYLE
260 __asm__ __volatile__("xchgb %0, %1\n\t"
261 : "=m" (*pu8),
262 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
263 : "1" (u8),
264 "m" (*pu8));
265# else
266 __asm
267 {
268# ifdef RT_ARCH_AMD64
269 mov rdx, [pu8]
270 mov al, [u8]
271 xchg [rdx], al
272 mov [u8], al
273# else
274 mov edx, [pu8]
275 mov al, [u8]
276 xchg [edx], al
277 mov [u8], al
278# endif
279 }
280# endif
281 return u8;
282}
283#endif
284
285
286/**
287 * Atomically Exchange a signed 8-bit value, ordered.
288 *
289 * @returns Current *pu8 value
290 * @param pi8 Pointer to the 8-bit variable to update.
291 * @param i8 The 8-bit value to assign to *pi8.
292 */
293DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
294{
295 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
296}
297
298
299/**
300 * Atomically Exchange a bool value, ordered.
301 *
302 * @returns Current *pf value
303 * @param pf Pointer to the 8-bit variable to update.
304 * @param f The 8-bit value to assign to *pi8.
305 */
306DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
307{
308#ifdef _MSC_VER
309 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
310#else
311 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
312#endif
313}
314
315
316/**
317 * Atomically Exchange an unsigned 16-bit value, ordered.
318 *
319 * @returns Current *pu16 value
320 * @param pu16 Pointer to the 16-bit variable to update.
321 * @param u16 The 16-bit value to assign to *pu16.
322 */
323#if RT_INLINE_ASM_EXTERNAL
324DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
325#else
326DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
327{
328# if RT_INLINE_ASM_GNU_STYLE
329 __asm__ __volatile__("xchgw %0, %1\n\t"
330 : "=m" (*pu16),
331 "=r" (u16)
332 : "1" (u16),
333 "m" (*pu16));
334# else
335 __asm
336 {
337# ifdef RT_ARCH_AMD64
338 mov rdx, [pu16]
339 mov ax, [u16]
340 xchg [rdx], ax
341 mov [u16], ax
342# else
343 mov edx, [pu16]
344 mov ax, [u16]
345 xchg [edx], ax
346 mov [u16], ax
347# endif
348 }
349# endif
350 return u16;
351}
352#endif
353
354
355/**
356 * Atomically Exchange a signed 16-bit value, ordered.
357 *
358 * @returns Current *pu16 value
359 * @param pi16 Pointer to the 16-bit variable to update.
360 * @param i16 The 16-bit value to assign to *pi16.
361 */
362DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
363{
364 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
365}
366
367
368/**
369 * Atomically Exchange an unsigned 32-bit value, ordered.
370 *
371 * @returns Current *pu32 value
372 * @param pu32 Pointer to the 32-bit variable to update.
373 * @param u32 The 32-bit value to assign to *pu32.
374 *
375 * @remarks Does not work on 286 and earlier.
376 */
377#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
378DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
379#else
380DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
381{
382# if RT_INLINE_ASM_GNU_STYLE
383 __asm__ __volatile__("xchgl %0, %1\n\t"
384 : "=m" (*pu32),
385 "=r" (u32)
386 : "1" (u32),
387 "m" (*pu32));
388
389# elif RT_INLINE_ASM_USES_INTRIN
390 u32 = _InterlockedExchange((long *)pu32, u32);
391
392# else
393 __asm
394 {
395# ifdef RT_ARCH_AMD64
396 mov rdx, [pu32]
397 mov eax, u32
398 xchg [rdx], eax
399 mov [u32], eax
400# else
401 mov edx, [pu32]
402 mov eax, u32
403 xchg [edx], eax
404 mov [u32], eax
405# endif
406 }
407# endif
408 return u32;
409}
410#endif
411
412
413/**
414 * Atomically Exchange a signed 32-bit value, ordered.
415 *
416 * @returns Current *pu32 value
417 * @param pi32 Pointer to the 32-bit variable to update.
418 * @param i32 The 32-bit value to assign to *pi32.
419 */
420DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
421{
422 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
423}
424
425
426/**
427 * Atomically Exchange an unsigned 64-bit value, ordered.
428 *
429 * @returns Current *pu64 value
430 * @param pu64 Pointer to the 64-bit variable to update.
431 * @param u64 The 64-bit value to assign to *pu64.
432 *
433 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
434 */
435#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
436 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
437DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
438#else
439DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
440{
441# if defined(RT_ARCH_AMD64)
442# if RT_INLINE_ASM_USES_INTRIN
443 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
444
445# elif RT_INLINE_ASM_GNU_STYLE
446 __asm__ __volatile__("xchgq %0, %1\n\t"
447 : "=m" (*pu64),
448 "=r" (u64)
449 : "1" (u64),
450 "m" (*pu64));
451# else
452 __asm
453 {
454 mov rdx, [pu64]
455 mov rax, [u64]
456 xchg [rdx], rax
457 mov [u64], rax
458 }
459# endif
460# else /* !RT_ARCH_AMD64 */
461# if RT_INLINE_ASM_GNU_STYLE
462# if defined(PIC) || defined(__PIC__)
463 uint32_t u32EBX = (uint32_t)u64;
464 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
465 "xchgl %%ebx, %3\n\t"
466 "1:\n\t"
467 "lock; cmpxchg8b (%5)\n\t"
468 "jnz 1b\n\t"
469 "movl %3, %%ebx\n\t"
470 /*"xchgl %%esi, %5\n\t"*/
471 : "=A" (u64),
472 "=m" (*pu64)
473 : "0" (*pu64),
474 "m" ( u32EBX ),
475 "c" ( (uint32_t)(u64 >> 32) ),
476 "S" (pu64));
477# else /* !PIC */
478 __asm__ __volatile__("1:\n\t"
479 "lock; cmpxchg8b %1\n\t"
480 "jnz 1b\n\t"
481 : "=A" (u64),
482 "=m" (*pu64)
483 : "0" (*pu64),
484 "b" ( (uint32_t)u64 ),
485 "c" ( (uint32_t)(u64 >> 32) ));
486# endif
487# else
488 __asm
489 {
490 mov ebx, dword ptr [u64]
491 mov ecx, dword ptr [u64 + 4]
492 mov edi, pu64
493 mov eax, dword ptr [edi]
494 mov edx, dword ptr [edi + 4]
495 retry:
496 lock cmpxchg8b [edi]
497 jnz retry
498 mov dword ptr [u64], eax
499 mov dword ptr [u64 + 4], edx
500 }
501# endif
502# endif /* !RT_ARCH_AMD64 */
503 return u64;
504}
505#endif
506
507
508/**
509 * Atomically Exchange an signed 64-bit value, ordered.
510 *
511 * @returns Current *pi64 value
512 * @param pi64 Pointer to the 64-bit variable to update.
513 * @param i64 The 64-bit value to assign to *pi64.
514 */
515DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
516{
517 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
518}
519
520
521/**
522 * Atomically Exchange a pointer value, ordered.
523 *
524 * @returns Current *ppv value
525 * @param ppv Pointer to the pointer variable to update.
526 * @param pv The pointer value to assign to *ppv.
527 */
528DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
529{
530#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
531 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
532#elif ARCH_BITS == 64
533 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
534#else
535# error "ARCH_BITS is bogus"
536#endif
537}
538
539
540/**
541 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
542 *
543 * @returns Current *pv value
544 * @param ppv Pointer to the pointer variable to update.
545 * @param pv The pointer value to assign to *ppv.
546 * @param Type The type of *ppv, sans volatile.
547 */
548#ifdef __GNUC__
549# define ASMAtomicXchgPtrT(ppv, pv, Type) \
550 __extension__ \
551 ({\
552 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
553 Type const pvTypeChecked = (pv); \
554 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
555 pvTypeCheckedRet; \
556 })
557#else
558# define ASMAtomicXchgPtrT(ppv, pv, Type) \
559 (Type)ASMAtomicXchgPtr((void * volatile *)(ppv), (void *)(pv))
560#endif
561
562
563/**
564 * Atomically Exchange a raw-mode context pointer value, ordered.
565 *
566 * @returns Current *ppv value
567 * @param ppvRC Pointer to the pointer variable to update.
568 * @param pvRC The pointer value to assign to *ppv.
569 */
570DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
571{
572 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
573}
574
575
576/**
577 * Atomically Exchange a ring-0 pointer value, ordered.
578 *
579 * @returns Current *ppv value
580 * @param ppvR0 Pointer to the pointer variable to update.
581 * @param pvR0 The pointer value to assign to *ppv.
582 */
583DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
584{
585#if R0_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
586 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
587#elif R0_ARCH_BITS == 64
588 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
589#else
590# error "R0_ARCH_BITS is bogus"
591#endif
592}
593
594
595/**
596 * Atomically Exchange a ring-3 pointer value, ordered.
597 *
598 * @returns Current *ppv value
599 * @param ppvR3 Pointer to the pointer variable to update.
600 * @param pvR3 The pointer value to assign to *ppv.
601 */
602DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
603{
604#if R3_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
605 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
606#elif R3_ARCH_BITS == 64
607 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
608#else
609# error "R3_ARCH_BITS is bogus"
610#endif
611}
612
613
614/** @def ASMAtomicXchgHandle
615 * Atomically Exchange a typical IPRT handle value, ordered.
616 *
617 * @param ph Pointer to the value to update.
618 * @param hNew The new value to assigned to *pu.
619 * @param phRes Where to store the current *ph value.
620 *
621 * @remarks This doesn't currently work for all handles (like RTFILE).
622 */
623#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
624# define ASMAtomicXchgHandle(ph, hNew, phRes) \
625 do { \
626 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
627 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
628 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
629 } while (0)
630#elif HC_ARCH_BITS == 64
631# define ASMAtomicXchgHandle(ph, hNew, phRes) \
632 do { \
633 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
634 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
635 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
636 } while (0)
637#else
638# error HC_ARCH_BITS
639#endif
640
641
642/**
643 * Atomically Exchange a value which size might differ
644 * between platforms or compilers, ordered.
645 *
646 * @param pu Pointer to the variable to update.
647 * @param uNew The value to assign to *pu.
648 * @todo This is busted as its missing the result argument.
649 */
650#define ASMAtomicXchgSize(pu, uNew) \
651 do { \
652 switch (sizeof(*(pu))) { \
653 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
654 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
655 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
656 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
657 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
658 } \
659 } while (0)
660
661/**
662 * Atomically Exchange a value which size might differ
663 * between platforms or compilers, ordered.
664 *
665 * @param pu Pointer to the variable to update.
666 * @param uNew The value to assign to *pu.
667 * @param puRes Where to store the current *pu value.
668 */
669#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
670 do { \
671 switch (sizeof(*(pu))) { \
672 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
673 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
674 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
675 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
676 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
677 } \
678 } while (0)
679
680
681
682/**
683 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
684 *
685 * @returns true if xchg was done.
686 * @returns false if xchg wasn't done.
687 *
688 * @param pu8 Pointer to the value to update.
689 * @param u8New The new value to assigned to *pu8.
690 * @param u8Old The old value to *pu8 compare with.
691 *
692 * @remarks x86: Requires a 486 or later.
693 */
694#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
695DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, const uint8_t u8Old);
696#else
697DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, uint8_t u8Old)
698{
699 uint8_t u8Ret;
700 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
701 "setz %1\n\t"
702 : "=m" (*pu8),
703 "=qm" (u8Ret),
704 "=a" (u8Old)
705 : "q" (u8New),
706 "2" (u8Old),
707 "m" (*pu8));
708 return (bool)u8Ret;
709}
710#endif
711
712
713/**
714 * Atomically Compare and Exchange a signed 8-bit value, ordered.
715 *
716 * @returns true if xchg was done.
717 * @returns false if xchg wasn't done.
718 *
719 * @param pi8 Pointer to the value to update.
720 * @param i8New The new value to assigned to *pi8.
721 * @param i8Old The old value to *pi8 compare with.
722 *
723 * @remarks x86: Requires a 486 or later.
724 */
725DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t *pi8, const int8_t i8New, const int8_t i8Old)
726{
727 return ASMAtomicCmpXchgU8((volatile uint8_t *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old);
728}
729
730
731/**
732 * Atomically Compare and Exchange a bool value, ordered.
733 *
734 * @returns true if xchg was done.
735 * @returns false if xchg wasn't done.
736 *
737 * @param pf Pointer to the value to update.
738 * @param fNew The new value to assigned to *pf.
739 * @param fOld The old value to *pf compare with.
740 *
741 * @remarks x86: Requires a 486 or later.
742 */
743DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool *pf, const bool fNew, const bool fOld)
744{
745 return ASMAtomicCmpXchgU8((volatile uint8_t *)pf, (const uint8_t)fNew, (const uint8_t)fOld);
746}
747
748
749/**
750 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
751 *
752 * @returns true if xchg was done.
753 * @returns false if xchg wasn't done.
754 *
755 * @param pu32 Pointer to the value to update.
756 * @param u32New The new value to assigned to *pu32.
757 * @param u32Old The old value to *pu32 compare with.
758 *
759 * @remarks x86: Requires a 486 or later.
760 */
761#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
762DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
763#else
764DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
765{
766# if RT_INLINE_ASM_GNU_STYLE
767 uint8_t u8Ret;
768 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
769 "setz %1\n\t"
770 : "=m" (*pu32),
771 "=qm" (u8Ret),
772 "=a" (u32Old)
773 : "r" (u32New),
774 "2" (u32Old),
775 "m" (*pu32));
776 return (bool)u8Ret;
777
778# elif RT_INLINE_ASM_USES_INTRIN
779 return (uint32_t)_InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
780
781# else
782 uint32_t u32Ret;
783 __asm
784 {
785# ifdef RT_ARCH_AMD64
786 mov rdx, [pu32]
787# else
788 mov edx, [pu32]
789# endif
790 mov eax, [u32Old]
791 mov ecx, [u32New]
792# ifdef RT_ARCH_AMD64
793 lock cmpxchg [rdx], ecx
794# else
795 lock cmpxchg [edx], ecx
796# endif
797 setz al
798 movzx eax, al
799 mov [u32Ret], eax
800 }
801 return !!u32Ret;
802# endif
803}
804#endif
805
806
807/**
808 * Atomically Compare and Exchange a signed 32-bit value, ordered.
809 *
810 * @returns true if xchg was done.
811 * @returns false if xchg wasn't done.
812 *
813 * @param pi32 Pointer to the value to update.
814 * @param i32New The new value to assigned to *pi32.
815 * @param i32Old The old value to *pi32 compare with.
816 *
817 * @remarks x86: Requires a 486 or later.
818 */
819DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
820{
821 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
822}
823
824
825/**
826 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
827 *
828 * @returns true if xchg was done.
829 * @returns false if xchg wasn't done.
830 *
831 * @param pu64 Pointer to the 64-bit variable to update.
832 * @param u64New The 64-bit value to assign to *pu64.
833 * @param u64Old The value to compare with.
834 *
835 * @remarks x86: Requires a Pentium or later.
836 */
837#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
838 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
839DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
840#else
841DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, uint64_t u64New, uint64_t u64Old)
842{
843# if RT_INLINE_ASM_USES_INTRIN
844 return (uint64_t)_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
845
846# elif defined(RT_ARCH_AMD64)
847# if RT_INLINE_ASM_GNU_STYLE
848 uint8_t u8Ret;
849 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
850 "setz %1\n\t"
851 : "=m" (*pu64),
852 "=qm" (u8Ret),
853 "=a" (u64Old)
854 : "r" (u64New),
855 "2" (u64Old),
856 "m" (*pu64));
857 return (bool)u8Ret;
858# else
859 bool fRet;
860 __asm
861 {
862 mov rdx, [pu32]
863 mov rax, [u64Old]
864 mov rcx, [u64New]
865 lock cmpxchg [rdx], rcx
866 setz al
867 mov [fRet], al
868 }
869 return fRet;
870# endif
871# else /* !RT_ARCH_AMD64 */
872 uint32_t u32Ret;
873# if RT_INLINE_ASM_GNU_STYLE
874# if defined(PIC) || defined(__PIC__)
875 uint32_t u32EBX = (uint32_t)u64New;
876 uint32_t u32Spill;
877 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
878 "lock; cmpxchg8b (%6)\n\t"
879 "setz %%al\n\t"
880 "movl %4, %%ebx\n\t"
881 "movzbl %%al, %%eax\n\t"
882 : "=a" (u32Ret),
883 "=d" (u32Spill),
884# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
885 "+m" (*pu64)
886# else
887 "=m" (*pu64)
888# endif
889 : "A" (u64Old),
890 "m" ( u32EBX ),
891 "c" ( (uint32_t)(u64New >> 32) ),
892 "S" (pu64));
893# else /* !PIC */
894 uint32_t u32Spill;
895 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
896 "setz %%al\n\t"
897 "movzbl %%al, %%eax\n\t"
898 : "=a" (u32Ret),
899 "=d" (u32Spill),
900 "+m" (*pu64)
901 : "A" (u64Old),
902 "b" ( (uint32_t)u64New ),
903 "c" ( (uint32_t)(u64New >> 32) ));
904# endif
905 return (bool)u32Ret;
906# else
907 __asm
908 {
909 mov ebx, dword ptr [u64New]
910 mov ecx, dword ptr [u64New + 4]
911 mov edi, [pu64]
912 mov eax, dword ptr [u64Old]
913 mov edx, dword ptr [u64Old + 4]
914 lock cmpxchg8b [edi]
915 setz al
916 movzx eax, al
917 mov dword ptr [u32Ret], eax
918 }
919 return !!u32Ret;
920# endif
921# endif /* !RT_ARCH_AMD64 */
922}
923#endif
924
925
926/**
927 * Atomically Compare and exchange a signed 64-bit value, ordered.
928 *
929 * @returns true if xchg was done.
930 * @returns false if xchg wasn't done.
931 *
932 * @param pi64 Pointer to the 64-bit variable to update.
933 * @param i64 The 64-bit value to assign to *pu64.
934 * @param i64Old The value to compare with.
935 *
936 * @remarks x86: Requires a Pentium or later.
937 */
938DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
939{
940 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
941}
942
943
944/**
945 * Atomically Compare and Exchange a pointer value, ordered.
946 *
947 * @returns true if xchg was done.
948 * @returns false if xchg wasn't done.
949 *
950 * @param ppv Pointer to the value to update.
951 * @param pvNew The new value to assigned to *ppv.
952 * @param pvOld The old value to *ppv compare with.
953 *
954 * @remarks x86: Requires a 486 or later.
955 */
956DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld)
957{
958#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
959 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
960#elif ARCH_BITS == 64
961 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
962#else
963# error "ARCH_BITS is bogus"
964#endif
965}
966
967
968/**
969 * Atomically Compare and Exchange a pointer value, ordered.
970 *
971 * @returns true if xchg was done.
972 * @returns false if xchg wasn't done.
973 *
974 * @param ppv Pointer to the value to update.
975 * @param pvNew The new value to assigned to *ppv.
976 * @param pvOld The old value to *ppv compare with.
977 *
978 * @remarks This is relatively type safe on GCC platforms.
979 * @remarks x86: Requires a 486 or later.
980 */
981#ifdef __GNUC__
982# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
983 __extension__ \
984 ({\
985 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
986 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
987 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
988 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
989 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
990 fMacroRet; \
991 })
992#else
993# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
994 ASMAtomicCmpXchgPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld))
995#endif
996
997
998/** @def ASMAtomicCmpXchgHandle
999 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1000 *
1001 * @param ph Pointer to the value to update.
1002 * @param hNew The new value to assigned to *pu.
1003 * @param hOld The old value to *pu compare with.
1004 * @param fRc Where to store the result.
1005 *
1006 * @remarks This doesn't currently work for all handles (like RTFILE).
1007 * @remarks x86: Requires a 486 or later.
1008 */
1009#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1010# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1011 do { \
1012 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1013 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1014 } while (0)
1015#elif HC_ARCH_BITS == 64
1016# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1017 do { \
1018 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1019 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1020 } while (0)
1021#else
1022# error HC_ARCH_BITS
1023#endif
1024
1025
1026/** @def ASMAtomicCmpXchgSize
1027 * Atomically Compare and Exchange a value which size might differ
1028 * between platforms or compilers, ordered.
1029 *
1030 * @param pu Pointer to the value to update.
1031 * @param uNew The new value to assigned to *pu.
1032 * @param uOld The old value to *pu compare with.
1033 * @param fRc Where to store the result.
1034 *
1035 * @remarks x86: Requires a 486 or later.
1036 */
1037#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1038 do { \
1039 switch (sizeof(*(pu))) { \
1040 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1041 break; \
1042 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1043 break; \
1044 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1045 (fRc) = false; \
1046 break; \
1047 } \
1048 } while (0)
1049
1050
1051/**
1052 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1053 * passes back old value, ordered.
1054 *
1055 * @returns true if xchg was done.
1056 * @returns false if xchg wasn't done.
1057 *
1058 * @param pu32 Pointer to the value to update.
1059 * @param u32New The new value to assigned to *pu32.
1060 * @param u32Old The old value to *pu32 compare with.
1061 * @param pu32Old Pointer store the old value at.
1062 *
1063 * @remarks x86: Requires a 486 or later.
1064 */
1065#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1066DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
1067#else
1068DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
1069{
1070# if RT_INLINE_ASM_GNU_STYLE
1071 uint8_t u8Ret;
1072 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1073 "setz %1\n\t"
1074 : "=m" (*pu32),
1075 "=qm" (u8Ret),
1076 "=a" (*pu32Old)
1077 : "r" (u32New),
1078 "a" (u32Old),
1079 "m" (*pu32));
1080 return (bool)u8Ret;
1081
1082# elif RT_INLINE_ASM_USES_INTRIN
1083 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
1084
1085# else
1086 uint32_t u32Ret;
1087 __asm
1088 {
1089# ifdef RT_ARCH_AMD64
1090 mov rdx, [pu32]
1091# else
1092 mov edx, [pu32]
1093# endif
1094 mov eax, [u32Old]
1095 mov ecx, [u32New]
1096# ifdef RT_ARCH_AMD64
1097 lock cmpxchg [rdx], ecx
1098 mov rdx, [pu32Old]
1099 mov [rdx], eax
1100# else
1101 lock cmpxchg [edx], ecx
1102 mov edx, [pu32Old]
1103 mov [edx], eax
1104# endif
1105 setz al
1106 movzx eax, al
1107 mov [u32Ret], eax
1108 }
1109 return !!u32Ret;
1110# endif
1111}
1112#endif
1113
1114
1115/**
1116 * Atomically Compare and Exchange a signed 32-bit value, additionally
1117 * passes back old value, ordered.
1118 *
1119 * @returns true if xchg was done.
1120 * @returns false if xchg wasn't done.
1121 *
1122 * @param pi32 Pointer to the value to update.
1123 * @param i32New The new value to assigned to *pi32.
1124 * @param i32Old The old value to *pi32 compare with.
1125 * @param pi32Old Pointer store the old value at.
1126 *
1127 * @remarks x86: Requires a 486 or later.
1128 */
1129DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
1130{
1131 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
1132}
1133
1134
1135/**
1136 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1137 * passing back old value, ordered.
1138 *
1139 * @returns true if xchg was done.
1140 * @returns false if xchg wasn't done.
1141 *
1142 * @param pu64 Pointer to the 64-bit variable to update.
1143 * @param u64New The 64-bit value to assign to *pu64.
1144 * @param u64Old The value to compare with.
1145 * @param pu64Old Pointer store the old value at.
1146 *
1147 * @remarks x86: Requires a Pentium or later.
1148 */
1149#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1150 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1151DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
1152#else
1153DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
1154{
1155# if RT_INLINE_ASM_USES_INTRIN
1156 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
1157
1158# elif defined(RT_ARCH_AMD64)
1159# if RT_INLINE_ASM_GNU_STYLE
1160 uint8_t u8Ret;
1161 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1162 "setz %1\n\t"
1163 : "=m" (*pu64),
1164 "=qm" (u8Ret),
1165 "=a" (*pu64Old)
1166 : "r" (u64New),
1167 "a" (u64Old),
1168 "m" (*pu64));
1169 return (bool)u8Ret;
1170# else
1171 bool fRet;
1172 __asm
1173 {
1174 mov rdx, [pu32]
1175 mov rax, [u64Old]
1176 mov rcx, [u64New]
1177 lock cmpxchg [rdx], rcx
1178 mov rdx, [pu64Old]
1179 mov [rdx], rax
1180 setz al
1181 mov [fRet], al
1182 }
1183 return fRet;
1184# endif
1185# else /* !RT_ARCH_AMD64 */
1186# if RT_INLINE_ASM_GNU_STYLE
1187 uint64_t u64Ret;
1188# if defined(PIC) || defined(__PIC__)
1189 /* NB: this code uses a memory clobber description, because the clean
1190 * solution with an output value for *pu64 makes gcc run out of registers.
1191 * This will cause suboptimal code, and anyone with a better solution is
1192 * welcome to improve this. */
1193 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1194 "lock; cmpxchg8b %3\n\t"
1195 "xchgl %%ebx, %1\n\t"
1196 : "=A" (u64Ret)
1197 : "DS" ((uint32_t)u64New),
1198 "c" ((uint32_t)(u64New >> 32)),
1199 "m" (*pu64),
1200 "0" (u64Old)
1201 : "memory" );
1202# else /* !PIC */
1203 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1204 : "=A" (u64Ret),
1205 "=m" (*pu64)
1206 : "b" ((uint32_t)u64New),
1207 "c" ((uint32_t)(u64New >> 32)),
1208 "m" (*pu64),
1209 "0" (u64Old));
1210# endif
1211 *pu64Old = u64Ret;
1212 return u64Ret == u64Old;
1213# else
1214 uint32_t u32Ret;
1215 __asm
1216 {
1217 mov ebx, dword ptr [u64New]
1218 mov ecx, dword ptr [u64New + 4]
1219 mov edi, [pu64]
1220 mov eax, dword ptr [u64Old]
1221 mov edx, dword ptr [u64Old + 4]
1222 lock cmpxchg8b [edi]
1223 mov ebx, [pu64Old]
1224 mov [ebx], eax
1225 setz al
1226 movzx eax, al
1227 add ebx, 4
1228 mov [ebx], edx
1229 mov dword ptr [u32Ret], eax
1230 }
1231 return !!u32Ret;
1232# endif
1233# endif /* !RT_ARCH_AMD64 */
1234}
1235#endif
1236
1237
1238/**
1239 * Atomically Compare and exchange a signed 64-bit value, additionally
1240 * passing back old value, ordered.
1241 *
1242 * @returns true if xchg was done.
1243 * @returns false if xchg wasn't done.
1244 *
1245 * @param pi64 Pointer to the 64-bit variable to update.
1246 * @param i64 The 64-bit value to assign to *pu64.
1247 * @param i64Old The value to compare with.
1248 * @param pi64Old Pointer store the old value at.
1249 *
1250 * @remarks x86: Requires a Pentium or later.
1251 */
1252DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
1253{
1254 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
1255}
1256
1257/** @def ASMAtomicCmpXchgExHandle
1258 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1259 *
1260 * @param ph Pointer to the value to update.
1261 * @param hNew The new value to assigned to *pu.
1262 * @param hOld The old value to *pu compare with.
1263 * @param fRc Where to store the result.
1264 * @param phOldVal Pointer to where to store the old value.
1265 *
1266 * @remarks This doesn't currently work for all handles (like RTFILE).
1267 */
1268#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1269# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1270 do { \
1271 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1272 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1273 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
1274 } while (0)
1275#elif HC_ARCH_BITS == 64
1276# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1277 do { \
1278 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1279 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1280 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
1281 } while (0)
1282#else
1283# error HC_ARCH_BITS
1284#endif
1285
1286
1287/** @def ASMAtomicCmpXchgExSize
1288 * Atomically Compare and Exchange a value which size might differ
1289 * between platforms or compilers. Additionally passes back old value.
1290 *
1291 * @param pu Pointer to the value to update.
1292 * @param uNew The new value to assigned to *pu.
1293 * @param uOld The old value to *pu compare with.
1294 * @param fRc Where to store the result.
1295 * @param puOldVal Pointer to where to store the old value.
1296 *
1297 * @remarks x86: Requires a 486 or later.
1298 */
1299#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1300 do { \
1301 switch (sizeof(*(pu))) { \
1302 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
1303 break; \
1304 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
1305 break; \
1306 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1307 (fRc) = false; \
1308 (uOldVal) = 0; \
1309 break; \
1310 } \
1311 } while (0)
1312
1313
1314/**
1315 * Atomically Compare and Exchange a pointer value, additionally
1316 * passing back old value, ordered.
1317 *
1318 * @returns true if xchg was done.
1319 * @returns false if xchg wasn't done.
1320 *
1321 * @param ppv Pointer to the value to update.
1322 * @param pvNew The new value to assigned to *ppv.
1323 * @param pvOld The old value to *ppv compare with.
1324 * @param ppvOld Pointer store the old value at.
1325 *
1326 * @remarks x86: Requires a 486 or later.
1327 */
1328DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
1329{
1330#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1331 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
1332#elif ARCH_BITS == 64
1333 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
1334#else
1335# error "ARCH_BITS is bogus"
1336#endif
1337}
1338
1339
1340/**
1341 * Atomically Compare and Exchange a pointer value, additionally
1342 * passing back old value, ordered.
1343 *
1344 * @returns true if xchg was done.
1345 * @returns false if xchg wasn't done.
1346 *
1347 * @param ppv Pointer to the value to update.
1348 * @param pvNew The new value to assigned to *ppv.
1349 * @param pvOld The old value to *ppv compare with.
1350 * @param ppvOld Pointer store the old value at.
1351 *
1352 * @remarks This is relatively type safe on GCC platforms.
1353 * @remarks x86: Requires a 486 or later.
1354 */
1355#ifdef __GNUC__
1356# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1357 __extension__ \
1358 ({\
1359 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1360 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1361 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1362 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1363 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1364 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1365 (void **)ppvOldTypeChecked); \
1366 fMacroRet; \
1367 })
1368#else
1369# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1370 ASMAtomicCmpXchgExPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld), (void **)(ppvOld))
1371#endif
1372
1373
1374/**
1375 * Virtualization unfriendly serializing instruction, always exits.
1376 */
1377#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1378DECLASM(void) ASMSerializeInstructionCpuId(void);
1379#else
1380DECLINLINE(void) ASMSerializeInstructionCpuId(void)
1381{
1382# if RT_INLINE_ASM_GNU_STYLE
1383 RTCCUINTREG xAX = 0;
1384# ifdef RT_ARCH_AMD64
1385 __asm__ __volatile__ ("cpuid"
1386 : "=a" (xAX)
1387 : "0" (xAX)
1388 : "rbx", "rcx", "rdx", "memory");
1389# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1390 __asm__ __volatile__ ("push %%ebx\n\t"
1391 "cpuid\n\t"
1392 "pop %%ebx\n\t"
1393 : "=a" (xAX)
1394 : "0" (xAX)
1395 : "ecx", "edx", "memory");
1396# else
1397 __asm__ __volatile__ ("cpuid"
1398 : "=a" (xAX)
1399 : "0" (xAX)
1400 : "ebx", "ecx", "edx", "memory");
1401# endif
1402
1403# elif RT_INLINE_ASM_USES_INTRIN
1404 int aInfo[4];
1405 _ReadWriteBarrier();
1406 __cpuid(aInfo, 0);
1407
1408# else
1409 __asm
1410 {
1411 push ebx
1412 xor eax, eax
1413 cpuid
1414 pop ebx
1415 }
1416# endif
1417}
1418#endif
1419
1420/**
1421 * Virtualization friendly serializing instruction, though more expensive.
1422 */
1423#if RT_INLINE_ASM_EXTERNAL || defined(_MSC_VER) /** @todo fix 32-bit inline MSC asm */
1424DECLASM(void) ASMSerializeInstructionIRet(void);
1425#else
1426DECLINLINE(void) ASMSerializeInstructionIRet(void)
1427{
1428# if RT_INLINE_ASM_GNU_STYLE
1429# ifdef RT_ARCH_AMD64
1430 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
1431 "subq $128, %%rsp\n\t" /*redzone*/
1432 "mov %%ss, %%eax\n\t"
1433 "pushq %%rax\n\t"
1434 "pushq %%r10\n\t"
1435 "pushfq\n\t"
1436 "movl %%cs, %%eax\n\t"
1437 "pushq %%rax\n\t"
1438 "leaq 1f(%%rip), %%rax\n\t"
1439 "pushq %%rax\n\t"
1440 "iretq\n\t"
1441 "1:\n\t"
1442 ::: "rax", "r10", "memory");
1443# else
1444 __asm__ __volatile__ ("pushfl\n\t"
1445 "pushl %%cs\n\t"
1446 "pushl $1f\n\t"
1447 "iretl\n\t"
1448 "1:\n\t"
1449 ::: "memory");
1450# endif
1451
1452# else
1453 __asm
1454 {
1455 pushfd
1456 push cs
1457 push la_ret
1458 retd
1459 la_ret:
1460 }
1461# endif
1462}
1463#endif
1464
1465/**
1466 * Virtualization friendlier serializing instruction, may still cause exits.
1467 */
1468#if RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < 15
1469DECLASM(void) ASMSerializeInstructionRdTscp(void);
1470#else
1471DECLINLINE(void) ASMSerializeInstructionRdTscp(void)
1472{
1473# if RT_INLINE_ASM_GNU_STYLE
1474 /* rdtscp is not supported by ancient linux build VM of course :-( */
1475# ifdef RT_ARCH_AMD64
1476 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
1477 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
1478# else
1479 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
1480 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
1481# endif
1482# else
1483# if RT_INLINE_ASM_USES_INTRIN >= 15
1484 uint32_t uIgnore;
1485 _ReadWriteBarrier();
1486 (void)__rdtscp(&uIgnore);
1487 (void)uIgnore;
1488# else
1489 __asm
1490 {
1491 rdtscp
1492 }
1493# endif
1494# endif
1495}
1496#endif
1497
1498
1499/**
1500 * Serialize Instruction.
1501 */
1502#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
1503# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
1504#else
1505# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
1506#endif
1507
1508
1509/**
1510 * Memory fence, waits for any pending writes and reads to complete.
1511 */
1512DECLINLINE(void) ASMMemoryFence(void)
1513{
1514 /** @todo use mfence? check if all cpus we care for support it. */
1515 uint32_t volatile u32;
1516 ASMAtomicXchgU32(&u32, 0);
1517}
1518
1519
1520/**
1521 * Write fence, waits for any pending writes to complete.
1522 */
1523DECLINLINE(void) ASMWriteFence(void)
1524{
1525 /** @todo use sfence? check if all cpus we care for support it. */
1526 ASMMemoryFence();
1527}
1528
1529
1530/**
1531 * Read fence, waits for any pending reads to complete.
1532 */
1533DECLINLINE(void) ASMReadFence(void)
1534{
1535 /** @todo use lfence? check if all cpus we care for support it. */
1536 ASMMemoryFence();
1537}
1538
1539
1540/**
1541 * Atomically reads an unsigned 8-bit value, ordered.
1542 *
1543 * @returns Current *pu8 value
1544 * @param pu8 Pointer to the 8-bit variable to read.
1545 */
1546DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
1547{
1548 ASMMemoryFence();
1549 return *pu8; /* byte reads are atomic on x86 */
1550}
1551
1552
1553/**
1554 * Atomically reads an unsigned 8-bit value, unordered.
1555 *
1556 * @returns Current *pu8 value
1557 * @param pu8 Pointer to the 8-bit variable to read.
1558 */
1559DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
1560{
1561 return *pu8; /* byte reads are atomic on x86 */
1562}
1563
1564
1565/**
1566 * Atomically reads a signed 8-bit value, ordered.
1567 *
1568 * @returns Current *pi8 value
1569 * @param pi8 Pointer to the 8-bit variable to read.
1570 */
1571DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
1572{
1573 ASMMemoryFence();
1574 return *pi8; /* byte reads are atomic on x86 */
1575}
1576
1577
1578/**
1579 * Atomically reads a signed 8-bit value, unordered.
1580 *
1581 * @returns Current *pi8 value
1582 * @param pi8 Pointer to the 8-bit variable to read.
1583 */
1584DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
1585{
1586 return *pi8; /* byte reads are atomic on x86 */
1587}
1588
1589
1590/**
1591 * Atomically reads an unsigned 16-bit value, ordered.
1592 *
1593 * @returns Current *pu16 value
1594 * @param pu16 Pointer to the 16-bit variable to read.
1595 */
1596DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
1597{
1598 ASMMemoryFence();
1599 Assert(!((uintptr_t)pu16 & 1));
1600 return *pu16;
1601}
1602
1603
1604/**
1605 * Atomically reads an unsigned 16-bit value, unordered.
1606 *
1607 * @returns Current *pu16 value
1608 * @param pu16 Pointer to the 16-bit variable to read.
1609 */
1610DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
1611{
1612 Assert(!((uintptr_t)pu16 & 1));
1613 return *pu16;
1614}
1615
1616
1617/**
1618 * Atomically reads a signed 16-bit value, ordered.
1619 *
1620 * @returns Current *pi16 value
1621 * @param pi16 Pointer to the 16-bit variable to read.
1622 */
1623DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
1624{
1625 ASMMemoryFence();
1626 Assert(!((uintptr_t)pi16 & 1));
1627 return *pi16;
1628}
1629
1630
1631/**
1632 * Atomically reads a signed 16-bit value, unordered.
1633 *
1634 * @returns Current *pi16 value
1635 * @param pi16 Pointer to the 16-bit variable to read.
1636 */
1637DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
1638{
1639 Assert(!((uintptr_t)pi16 & 1));
1640 return *pi16;
1641}
1642
1643
1644/**
1645 * Atomically reads an unsigned 32-bit value, ordered.
1646 *
1647 * @returns Current *pu32 value
1648 * @param pu32 Pointer to the 32-bit variable to read.
1649 */
1650DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
1651{
1652 ASMMemoryFence();
1653 Assert(!((uintptr_t)pu32 & 3));
1654 return *pu32;
1655}
1656
1657
1658/**
1659 * Atomically reads an unsigned 32-bit value, unordered.
1660 *
1661 * @returns Current *pu32 value
1662 * @param pu32 Pointer to the 32-bit variable to read.
1663 */
1664DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
1665{
1666 Assert(!((uintptr_t)pu32 & 3));
1667 return *pu32;
1668}
1669
1670
1671/**
1672 * Atomically reads a signed 32-bit value, ordered.
1673 *
1674 * @returns Current *pi32 value
1675 * @param pi32 Pointer to the 32-bit variable to read.
1676 */
1677DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
1678{
1679 ASMMemoryFence();
1680 Assert(!((uintptr_t)pi32 & 3));
1681 return *pi32;
1682}
1683
1684
1685/**
1686 * Atomically reads a signed 32-bit value, unordered.
1687 *
1688 * @returns Current *pi32 value
1689 * @param pi32 Pointer to the 32-bit variable to read.
1690 */
1691DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
1692{
1693 Assert(!((uintptr_t)pi32 & 3));
1694 return *pi32;
1695}
1696
1697
1698/**
1699 * Atomically reads an unsigned 64-bit value, ordered.
1700 *
1701 * @returns Current *pu64 value
1702 * @param pu64 Pointer to the 64-bit variable to read.
1703 * The memory pointed to must be writable.
1704 *
1705 * @remarks This may fault if the memory is read-only!
1706 * @remarks x86: Requires a Pentium or later.
1707 */
1708#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1709 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1710DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
1711#else
1712DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
1713{
1714 uint64_t u64;
1715# ifdef RT_ARCH_AMD64
1716 Assert(!((uintptr_t)pu64 & 7));
1717/*# if RT_INLINE_ASM_GNU_STYLE
1718 __asm__ __volatile__( "mfence\n\t"
1719 "movq %1, %0\n\t"
1720 : "=r" (u64)
1721 : "m" (*pu64));
1722# else
1723 __asm
1724 {
1725 mfence
1726 mov rdx, [pu64]
1727 mov rax, [rdx]
1728 mov [u64], rax
1729 }
1730# endif*/
1731 ASMMemoryFence();
1732 u64 = *pu64;
1733# else /* !RT_ARCH_AMD64 */
1734# if RT_INLINE_ASM_GNU_STYLE
1735# if defined(PIC) || defined(__PIC__)
1736 uint32_t u32EBX = 0;
1737 Assert(!((uintptr_t)pu64 & 7));
1738 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1739 "lock; cmpxchg8b (%5)\n\t"
1740 "movl %3, %%ebx\n\t"
1741 : "=A" (u64),
1742# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1743 "+m" (*pu64)
1744# else
1745 "=m" (*pu64)
1746# endif
1747 : "0" (0ULL),
1748 "m" (u32EBX),
1749 "c" (0),
1750 "S" (pu64));
1751# else /* !PIC */
1752 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1753 : "=A" (u64),
1754 "+m" (*pu64)
1755 : "0" (0ULL),
1756 "b" (0),
1757 "c" (0));
1758# endif
1759# else
1760 Assert(!((uintptr_t)pu64 & 7));
1761 __asm
1762 {
1763 xor eax, eax
1764 xor edx, edx
1765 mov edi, pu64
1766 xor ecx, ecx
1767 xor ebx, ebx
1768 lock cmpxchg8b [edi]
1769 mov dword ptr [u64], eax
1770 mov dword ptr [u64 + 4], edx
1771 }
1772# endif
1773# endif /* !RT_ARCH_AMD64 */
1774 return u64;
1775}
1776#endif
1777
1778
1779/**
1780 * Atomically reads an unsigned 64-bit value, unordered.
1781 *
1782 * @returns Current *pu64 value
1783 * @param pu64 Pointer to the 64-bit variable to read.
1784 * The memory pointed to must be writable.
1785 *
1786 * @remarks This may fault if the memory is read-only!
1787 * @remarks x86: Requires a Pentium or later.
1788 */
1789#if !defined(RT_ARCH_AMD64) \
1790 && ( (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1791 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
1792DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
1793#else
1794DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
1795{
1796 uint64_t u64;
1797# ifdef RT_ARCH_AMD64
1798 Assert(!((uintptr_t)pu64 & 7));
1799/*# if RT_INLINE_ASM_GNU_STYLE
1800 Assert(!((uintptr_t)pu64 & 7));
1801 __asm__ __volatile__("movq %1, %0\n\t"
1802 : "=r" (u64)
1803 : "m" (*pu64));
1804# else
1805 __asm
1806 {
1807 mov rdx, [pu64]
1808 mov rax, [rdx]
1809 mov [u64], rax
1810 }
1811# endif */
1812 u64 = *pu64;
1813# else /* !RT_ARCH_AMD64 */
1814# if RT_INLINE_ASM_GNU_STYLE
1815# if defined(PIC) || defined(__PIC__)
1816 uint32_t u32EBX = 0;
1817 uint32_t u32Spill;
1818 Assert(!((uintptr_t)pu64 & 7));
1819 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1820 "xor %%ecx,%%ecx\n\t"
1821 "xor %%edx,%%edx\n\t"
1822 "xchgl %%ebx, %3\n\t"
1823 "lock; cmpxchg8b (%4)\n\t"
1824 "movl %3, %%ebx\n\t"
1825 : "=A" (u64),
1826# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1827 "+m" (*pu64),
1828# else
1829 "=m" (*pu64),
1830# endif
1831 "=c" (u32Spill)
1832 : "m" (u32EBX),
1833 "S" (pu64));
1834# else /* !PIC */
1835 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1836 : "=A" (u64),
1837 "+m" (*pu64)
1838 : "0" (0ULL),
1839 "b" (0),
1840 "c" (0));
1841# endif
1842# else
1843 Assert(!((uintptr_t)pu64 & 7));
1844 __asm
1845 {
1846 xor eax, eax
1847 xor edx, edx
1848 mov edi, pu64
1849 xor ecx, ecx
1850 xor ebx, ebx
1851 lock cmpxchg8b [edi]
1852 mov dword ptr [u64], eax
1853 mov dword ptr [u64 + 4], edx
1854 }
1855# endif
1856# endif /* !RT_ARCH_AMD64 */
1857 return u64;
1858}
1859#endif
1860
1861
1862/**
1863 * Atomically reads a signed 64-bit value, ordered.
1864 *
1865 * @returns Current *pi64 value
1866 * @param pi64 Pointer to the 64-bit variable to read.
1867 * The memory pointed to must be writable.
1868 *
1869 * @remarks This may fault if the memory is read-only!
1870 * @remarks x86: Requires a Pentium or later.
1871 */
1872DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
1873{
1874 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
1875}
1876
1877
1878/**
1879 * Atomically reads a signed 64-bit value, unordered.
1880 *
1881 * @returns Current *pi64 value
1882 * @param pi64 Pointer to the 64-bit variable to read.
1883 * The memory pointed to must be writable.
1884 *
1885 * @remarks This will fault if the memory is read-only!
1886 * @remarks x86: Requires a Pentium or later.
1887 */
1888DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
1889{
1890 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
1891}
1892
1893
1894/**
1895 * Atomically reads a size_t value, ordered.
1896 *
1897 * @returns Current *pcb value
1898 * @param pcb Pointer to the size_t variable to read.
1899 */
1900DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile *pcb)
1901{
1902#if ARCH_BITS == 64
1903 return ASMAtomicReadU64((uint64_t volatile *)pcb);
1904#elif ARCH_BITS == 32
1905 return ASMAtomicReadU32((uint32_t volatile *)pcb);
1906#elif ARCH_BITS == 16
1907 AssertCompileSize(size_t, 2);
1908 return ASMAtomicReadU16((uint16_t volatile *)pcb);
1909#else
1910# error "Unsupported ARCH_BITS value"
1911#endif
1912}
1913
1914
1915/**
1916 * Atomically reads a size_t value, unordered.
1917 *
1918 * @returns Current *pcb value
1919 * @param pcb Pointer to the size_t variable to read.
1920 */
1921DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile *pcb)
1922{
1923#if ARCH_BITS == 64 || (ARCH_BITS == 16 && RT_FAR_DATA)
1924 return ASMAtomicUoReadU64((uint64_t volatile *)pcb);
1925#elif ARCH_BITS == 32
1926 return ASMAtomicUoReadU32((uint32_t volatile *)pcb);
1927#elif ARCH_BITS == 16
1928 AssertCompileSize(size_t, 2);
1929 return ASMAtomicUoReadU16((uint16_t volatile *)pcb);
1930#else
1931# error "Unsupported ARCH_BITS value"
1932#endif
1933}
1934
1935
1936/**
1937 * Atomically reads a pointer value, ordered.
1938 *
1939 * @returns Current *pv value
1940 * @param ppv Pointer to the pointer variable to read.
1941 *
1942 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
1943 * requires less typing (no casts).
1944 */
1945DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
1946{
1947#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1948 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
1949#elif ARCH_BITS == 64
1950 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
1951#else
1952# error "ARCH_BITS is bogus"
1953#endif
1954}
1955
1956/**
1957 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
1958 *
1959 * @returns Current *pv value
1960 * @param ppv Pointer to the pointer variable to read.
1961 * @param Type The type of *ppv, sans volatile.
1962 */
1963#ifdef __GNUC__
1964# define ASMAtomicReadPtrT(ppv, Type) \
1965 __extension__ \
1966 ({\
1967 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
1968 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
1969 pvTypeChecked; \
1970 })
1971#else
1972# define ASMAtomicReadPtrT(ppv, Type) \
1973 (Type)ASMAtomicReadPtr((void * volatile *)(ppv))
1974#endif
1975
1976
1977/**
1978 * Atomically reads a pointer value, unordered.
1979 *
1980 * @returns Current *pv value
1981 * @param ppv Pointer to the pointer variable to read.
1982 *
1983 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
1984 * requires less typing (no casts).
1985 */
1986DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
1987{
1988#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1989 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
1990#elif ARCH_BITS == 64
1991 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
1992#else
1993# error "ARCH_BITS is bogus"
1994#endif
1995}
1996
1997
1998/**
1999 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
2000 *
2001 * @returns Current *pv value
2002 * @param ppv Pointer to the pointer variable to read.
2003 * @param Type The type of *ppv, sans volatile.
2004 */
2005#ifdef __GNUC__
2006# define ASMAtomicUoReadPtrT(ppv, Type) \
2007 __extension__ \
2008 ({\
2009 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2010 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
2011 pvTypeChecked; \
2012 })
2013#else
2014# define ASMAtomicUoReadPtrT(ppv, Type) \
2015 (Type)ASMAtomicUoReadPtr((void * volatile *)(ppv))
2016#endif
2017
2018
2019/**
2020 * Atomically reads a boolean value, ordered.
2021 *
2022 * @returns Current *pf value
2023 * @param pf Pointer to the boolean variable to read.
2024 */
2025DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
2026{
2027 ASMMemoryFence();
2028 return *pf; /* byte reads are atomic on x86 */
2029}
2030
2031
2032/**
2033 * Atomically reads a boolean value, unordered.
2034 *
2035 * @returns Current *pf value
2036 * @param pf Pointer to the boolean variable to read.
2037 */
2038DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
2039{
2040 return *pf; /* byte reads are atomic on x86 */
2041}
2042
2043
2044/**
2045 * Atomically read a typical IPRT handle value, ordered.
2046 *
2047 * @param ph Pointer to the handle variable to read.
2048 * @param phRes Where to store the result.
2049 *
2050 * @remarks This doesn't currently work for all handles (like RTFILE).
2051 */
2052#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2053# define ASMAtomicReadHandle(ph, phRes) \
2054 do { \
2055 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2056 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2057 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
2058 } while (0)
2059#elif HC_ARCH_BITS == 64
2060# define ASMAtomicReadHandle(ph, phRes) \
2061 do { \
2062 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2063 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2064 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
2065 } while (0)
2066#else
2067# error HC_ARCH_BITS
2068#endif
2069
2070
2071/**
2072 * Atomically read a typical IPRT handle value, unordered.
2073 *
2074 * @param ph Pointer to the handle variable to read.
2075 * @param phRes Where to store the result.
2076 *
2077 * @remarks This doesn't currently work for all handles (like RTFILE).
2078 */
2079#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2080# define ASMAtomicUoReadHandle(ph, phRes) \
2081 do { \
2082 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2083 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2084 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
2085 } while (0)
2086#elif HC_ARCH_BITS == 64
2087# define ASMAtomicUoReadHandle(ph, phRes) \
2088 do { \
2089 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2090 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2091 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
2092 } while (0)
2093#else
2094# error HC_ARCH_BITS
2095#endif
2096
2097
2098/**
2099 * Atomically read a value which size might differ
2100 * between platforms or compilers, ordered.
2101 *
2102 * @param pu Pointer to the variable to read.
2103 * @param puRes Where to store the result.
2104 */
2105#define ASMAtomicReadSize(pu, puRes) \
2106 do { \
2107 switch (sizeof(*(pu))) { \
2108 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
2109 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
2110 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
2111 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
2112 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2113 } \
2114 } while (0)
2115
2116
2117/**
2118 * Atomically read a value which size might differ
2119 * between platforms or compilers, unordered.
2120 *
2121 * @param pu Pointer to the variable to read.
2122 * @param puRes Where to store the result.
2123 */
2124#define ASMAtomicUoReadSize(pu, puRes) \
2125 do { \
2126 switch (sizeof(*(pu))) { \
2127 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
2128 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
2129 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
2130 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
2131 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2132 } \
2133 } while (0)
2134
2135
2136/**
2137 * Atomically writes an unsigned 8-bit value, ordered.
2138 *
2139 * @param pu8 Pointer to the 8-bit variable.
2140 * @param u8 The 8-bit value to assign to *pu8.
2141 */
2142DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
2143{
2144 ASMAtomicXchgU8(pu8, u8);
2145}
2146
2147
2148/**
2149 * Atomically writes an unsigned 8-bit value, unordered.
2150 *
2151 * @param pu8 Pointer to the 8-bit variable.
2152 * @param u8 The 8-bit value to assign to *pu8.
2153 */
2154DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
2155{
2156 *pu8 = u8; /* byte writes are atomic on x86 */
2157}
2158
2159
2160/**
2161 * Atomically writes a signed 8-bit value, ordered.
2162 *
2163 * @param pi8 Pointer to the 8-bit variable to read.
2164 * @param i8 The 8-bit value to assign to *pi8.
2165 */
2166DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
2167{
2168 ASMAtomicXchgS8(pi8, i8);
2169}
2170
2171
2172/**
2173 * Atomically writes a signed 8-bit value, unordered.
2174 *
2175 * @param pi8 Pointer to the 8-bit variable to write.
2176 * @param i8 The 8-bit value to assign to *pi8.
2177 */
2178DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
2179{
2180 *pi8 = i8; /* byte writes are atomic on x86 */
2181}
2182
2183
2184/**
2185 * Atomically writes an unsigned 16-bit value, ordered.
2186 *
2187 * @param pu16 Pointer to the 16-bit variable to write.
2188 * @param u16 The 16-bit value to assign to *pu16.
2189 */
2190DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
2191{
2192 ASMAtomicXchgU16(pu16, u16);
2193}
2194
2195
2196/**
2197 * Atomically writes an unsigned 16-bit value, unordered.
2198 *
2199 * @param pu16 Pointer to the 16-bit variable to write.
2200 * @param u16 The 16-bit value to assign to *pu16.
2201 */
2202DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
2203{
2204 Assert(!((uintptr_t)pu16 & 1));
2205 *pu16 = u16;
2206}
2207
2208
2209/**
2210 * Atomically writes a signed 16-bit value, ordered.
2211 *
2212 * @param pi16 Pointer to the 16-bit variable to write.
2213 * @param i16 The 16-bit value to assign to *pi16.
2214 */
2215DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
2216{
2217 ASMAtomicXchgS16(pi16, i16);
2218}
2219
2220
2221/**
2222 * Atomically writes a signed 16-bit value, unordered.
2223 *
2224 * @param pi16 Pointer to the 16-bit variable to write.
2225 * @param i16 The 16-bit value to assign to *pi16.
2226 */
2227DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
2228{
2229 Assert(!((uintptr_t)pi16 & 1));
2230 *pi16 = i16;
2231}
2232
2233
2234/**
2235 * Atomically writes an unsigned 32-bit value, ordered.
2236 *
2237 * @param pu32 Pointer to the 32-bit variable to write.
2238 * @param u32 The 32-bit value to assign to *pu32.
2239 */
2240DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
2241{
2242 ASMAtomicXchgU32(pu32, u32);
2243}
2244
2245
2246/**
2247 * Atomically writes an unsigned 32-bit value, unordered.
2248 *
2249 * @param pu32 Pointer to the 32-bit variable to write.
2250 * @param u32 The 32-bit value to assign to *pu32.
2251 */
2252DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
2253{
2254 Assert(!((uintptr_t)pu32 & 3));
2255 *pu32 = u32;
2256}
2257
2258
2259/**
2260 * Atomically writes a signed 32-bit value, ordered.
2261 *
2262 * @param pi32 Pointer to the 32-bit variable to write.
2263 * @param i32 The 32-bit value to assign to *pi32.
2264 */
2265DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
2266{
2267 ASMAtomicXchgS32(pi32, i32);
2268}
2269
2270
2271/**
2272 * Atomically writes a signed 32-bit value, unordered.
2273 *
2274 * @param pi32 Pointer to the 32-bit variable to write.
2275 * @param i32 The 32-bit value to assign to *pi32.
2276 */
2277DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
2278{
2279 Assert(!((uintptr_t)pi32 & 3));
2280 *pi32 = i32;
2281}
2282
2283
2284/**
2285 * Atomically writes an unsigned 64-bit value, ordered.
2286 *
2287 * @param pu64 Pointer to the 64-bit variable to write.
2288 * @param u64 The 64-bit value to assign to *pu64.
2289 */
2290DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
2291{
2292 ASMAtomicXchgU64(pu64, u64);
2293}
2294
2295
2296/**
2297 * Atomically writes an unsigned 64-bit value, unordered.
2298 *
2299 * @param pu64 Pointer to the 64-bit variable to write.
2300 * @param u64 The 64-bit value to assign to *pu64.
2301 */
2302DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
2303{
2304 Assert(!((uintptr_t)pu64 & 7));
2305#if ARCH_BITS == 64
2306 *pu64 = u64;
2307#else
2308 ASMAtomicXchgU64(pu64, u64);
2309#endif
2310}
2311
2312
2313/**
2314 * Atomically writes a signed 64-bit value, ordered.
2315 *
2316 * @param pi64 Pointer to the 64-bit variable to write.
2317 * @param i64 The 64-bit value to assign to *pi64.
2318 */
2319DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
2320{
2321 ASMAtomicXchgS64(pi64, i64);
2322}
2323
2324
2325/**
2326 * Atomically writes a signed 64-bit value, unordered.
2327 *
2328 * @param pi64 Pointer to the 64-bit variable to write.
2329 * @param i64 The 64-bit value to assign to *pi64.
2330 */
2331DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
2332{
2333 Assert(!((uintptr_t)pi64 & 7));
2334#if ARCH_BITS == 64
2335 *pi64 = i64;
2336#else
2337 ASMAtomicXchgS64(pi64, i64);
2338#endif
2339}
2340
2341
2342/**
2343 * Atomically writes a boolean value, unordered.
2344 *
2345 * @param pf Pointer to the boolean variable to write.
2346 * @param f The boolean value to assign to *pf.
2347 */
2348DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
2349{
2350 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
2351}
2352
2353
2354/**
2355 * Atomically writes a boolean value, unordered.
2356 *
2357 * @param pf Pointer to the boolean variable to write.
2358 * @param f The boolean value to assign to *pf.
2359 */
2360DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
2361{
2362 *pf = f; /* byte writes are atomic on x86 */
2363}
2364
2365
2366/**
2367 * Atomically writes a pointer value, ordered.
2368 *
2369 * @param ppv Pointer to the pointer variable to write.
2370 * @param pv The pointer value to assign to *ppv.
2371 */
2372DECLINLINE(void) ASMAtomicWritePtrVoid(void * volatile *ppv, const void *pv)
2373{
2374#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2375 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2376#elif ARCH_BITS == 64
2377 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2378#else
2379# error "ARCH_BITS is bogus"
2380#endif
2381}
2382
2383
2384/**
2385 * Atomically writes a pointer value, ordered.
2386 *
2387 * @param ppv Pointer to the pointer variable to write.
2388 * @param pv The pointer value to assign to *ppv. If NULL use
2389 * ASMAtomicWriteNullPtr or you'll land in trouble.
2390 *
2391 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2392 * NULL.
2393 */
2394#ifdef __GNUC__
2395# define ASMAtomicWritePtr(ppv, pv) \
2396 do \
2397 { \
2398 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2399 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2400 \
2401 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2402 AssertCompile(sizeof(pv) == sizeof(void *)); \
2403 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2404 \
2405 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), (void *)(pvTypeChecked)); \
2406 } while (0)
2407#else
2408# define ASMAtomicWritePtr(ppv, pv) \
2409 do \
2410 { \
2411 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2412 AssertCompile(sizeof(pv) == sizeof(void *)); \
2413 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2414 \
2415 ASMAtomicWritePtrVoid((void * volatile *)(ppv), (void *)(pv)); \
2416 } while (0)
2417#endif
2418
2419
2420/**
2421 * Atomically sets a pointer to NULL, ordered.
2422 *
2423 * @param ppv Pointer to the pointer variable that should be set to NULL.
2424 *
2425 * @remarks This is relatively type safe on GCC platforms.
2426 */
2427#ifdef __GNUC__
2428# define ASMAtomicWriteNullPtr(ppv) \
2429 do \
2430 { \
2431 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
2432 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2433 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2434 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), NULL); \
2435 } while (0)
2436#else
2437# define ASMAtomicWriteNullPtr(ppv) \
2438 do \
2439 { \
2440 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2441 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2442 ASMAtomicWritePtrVoid((void * volatile *)(ppv), NULL); \
2443 } while (0)
2444#endif
2445
2446
2447/**
2448 * Atomically writes a pointer value, unordered.
2449 *
2450 * @returns Current *pv value
2451 * @param ppv Pointer to the pointer variable.
2452 * @param pv The pointer value to assign to *ppv. If NULL use
2453 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2454 *
2455 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2456 * NULL.
2457 */
2458#ifdef __GNUC__
2459# define ASMAtomicUoWritePtr(ppv, pv) \
2460 do \
2461 { \
2462 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2463 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2464 \
2465 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2466 AssertCompile(sizeof(pv) == sizeof(void *)); \
2467 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2468 \
2469 *(ppvTypeChecked) = pvTypeChecked; \
2470 } while (0)
2471#else
2472# define ASMAtomicUoWritePtr(ppv, pv) \
2473 do \
2474 { \
2475 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2476 AssertCompile(sizeof(pv) == sizeof(void *)); \
2477 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2478 *(ppv) = pv; \
2479 } while (0)
2480#endif
2481
2482
2483/**
2484 * Atomically sets a pointer to NULL, unordered.
2485 *
2486 * @param ppv Pointer to the pointer variable that should be set to NULL.
2487 *
2488 * @remarks This is relatively type safe on GCC platforms.
2489 */
2490#ifdef __GNUC__
2491# define ASMAtomicUoWriteNullPtr(ppv) \
2492 do \
2493 { \
2494 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2495 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2496 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2497 *(ppvTypeChecked) = NULL; \
2498 } while (0)
2499#else
2500# define ASMAtomicUoWriteNullPtr(ppv) \
2501 do \
2502 { \
2503 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2504 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2505 *(ppv) = NULL; \
2506 } while (0)
2507#endif
2508
2509
2510/**
2511 * Atomically write a typical IPRT handle value, ordered.
2512 *
2513 * @param ph Pointer to the variable to update.
2514 * @param hNew The value to assign to *ph.
2515 *
2516 * @remarks This doesn't currently work for all handles (like RTFILE).
2517 */
2518#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2519# define ASMAtomicWriteHandle(ph, hNew) \
2520 do { \
2521 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2522 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2523 } while (0)
2524#elif HC_ARCH_BITS == 64
2525# define ASMAtomicWriteHandle(ph, hNew) \
2526 do { \
2527 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2528 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
2529 } while (0)
2530#else
2531# error HC_ARCH_BITS
2532#endif
2533
2534
2535/**
2536 * Atomically write a typical IPRT handle value, unordered.
2537 *
2538 * @param ph Pointer to the variable to update.
2539 * @param hNew The value to assign to *ph.
2540 *
2541 * @remarks This doesn't currently work for all handles (like RTFILE).
2542 */
2543#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2544# define ASMAtomicUoWriteHandle(ph, hNew) \
2545 do { \
2546 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2547 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
2548 } while (0)
2549#elif HC_ARCH_BITS == 64
2550# define ASMAtomicUoWriteHandle(ph, hNew) \
2551 do { \
2552 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2553 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
2554 } while (0)
2555#else
2556# error HC_ARCH_BITS
2557#endif
2558
2559
2560/**
2561 * Atomically write a value which size might differ
2562 * between platforms or compilers, ordered.
2563 *
2564 * @param pu Pointer to the variable to update.
2565 * @param uNew The value to assign to *pu.
2566 */
2567#define ASMAtomicWriteSize(pu, uNew) \
2568 do { \
2569 switch (sizeof(*(pu))) { \
2570 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2571 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2572 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2573 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2574 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2575 } \
2576 } while (0)
2577
2578/**
2579 * Atomically write a value which size might differ
2580 * between platforms or compilers, unordered.
2581 *
2582 * @param pu Pointer to the variable to update.
2583 * @param uNew The value to assign to *pu.
2584 */
2585#define ASMAtomicUoWriteSize(pu, uNew) \
2586 do { \
2587 switch (sizeof(*(pu))) { \
2588 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2589 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2590 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2591 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2592 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2593 } \
2594 } while (0)
2595
2596
2597
2598/**
2599 * Atomically exchanges and adds to a 16-bit value, ordered.
2600 *
2601 * @returns The old value.
2602 * @param pu16 Pointer to the value.
2603 * @param u16 Number to add.
2604 *
2605 * @remarks Currently not implemented, just to make 16-bit code happy.
2606 * @remarks x86: Requires a 486 or later.
2607 */
2608DECLASM(uint16_t) ASMAtomicAddU16(uint16_t volatile *pu16, uint32_t u16);
2609
2610
2611/**
2612 * Atomically exchanges and adds to a 32-bit value, ordered.
2613 *
2614 * @returns The old value.
2615 * @param pu32 Pointer to the value.
2616 * @param u32 Number to add.
2617 *
2618 * @remarks x86: Requires a 486 or later.
2619 */
2620#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2621DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
2622#else
2623DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
2624{
2625# if RT_INLINE_ASM_USES_INTRIN
2626 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
2627 return u32;
2628
2629# elif RT_INLINE_ASM_GNU_STYLE
2630 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2631 : "=r" (u32),
2632 "=m" (*pu32)
2633 : "0" (u32),
2634 "m" (*pu32)
2635 : "memory");
2636 return u32;
2637# else
2638 __asm
2639 {
2640 mov eax, [u32]
2641# ifdef RT_ARCH_AMD64
2642 mov rdx, [pu32]
2643 lock xadd [rdx], eax
2644# else
2645 mov edx, [pu32]
2646 lock xadd [edx], eax
2647# endif
2648 mov [u32], eax
2649 }
2650 return u32;
2651# endif
2652}
2653#endif
2654
2655
2656/**
2657 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2658 *
2659 * @returns The old value.
2660 * @param pi32 Pointer to the value.
2661 * @param i32 Number to add.
2662 *
2663 * @remarks x86: Requires a 486 or later.
2664 */
2665DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
2666{
2667 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
2668}
2669
2670
2671/**
2672 * Atomically exchanges and adds to a 64-bit value, ordered.
2673 *
2674 * @returns The old value.
2675 * @param pu64 Pointer to the value.
2676 * @param u64 Number to add.
2677 *
2678 * @remarks x86: Requires a Pentium or later.
2679 */
2680#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2681DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64);
2682#else
2683DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64)
2684{
2685# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2686 u64 = _InterlockedExchangeAdd64((__int64 *)pu64, u64);
2687 return u64;
2688
2689# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2690 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2691 : "=r" (u64),
2692 "=m" (*pu64)
2693 : "0" (u64),
2694 "m" (*pu64)
2695 : "memory");
2696 return u64;
2697# else
2698 uint64_t u64Old;
2699 for (;;)
2700 {
2701 uint64_t u64New;
2702 u64Old = ASMAtomicUoReadU64(pu64);
2703 u64New = u64Old + u64;
2704 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2705 break;
2706 ASMNopPause();
2707 }
2708 return u64Old;
2709# endif
2710}
2711#endif
2712
2713
2714/**
2715 * Atomically exchanges and adds to a signed 64-bit value, ordered.
2716 *
2717 * @returns The old value.
2718 * @param pi64 Pointer to the value.
2719 * @param i64 Number to add.
2720 *
2721 * @remarks x86: Requires a Pentium or later.
2722 */
2723DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile *pi64, int64_t i64)
2724{
2725 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)i64);
2726}
2727
2728
2729/**
2730 * Atomically exchanges and adds to a size_t value, ordered.
2731 *
2732 * @returns The old value.
2733 * @param pcb Pointer to the size_t value.
2734 * @param cb Number to add.
2735 */
2736DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile *pcb, size_t cb)
2737{
2738#if ARCH_BITS == 64
2739 AssertCompileSize(size_t, 8);
2740 return ASMAtomicAddU64((uint64_t volatile *)pcb, cb);
2741#elif ARCH_BITS == 32
2742 AssertCompileSize(size_t, 4);
2743 return ASMAtomicAddU32((uint32_t volatile *)pcb, cb);
2744#elif ARCH_BITS == 16
2745 AssertCompileSize(size_t, 2);
2746 return ASMAtomicAddU16((uint16_t volatile *)pcb, cb);
2747#else
2748# error "Unsupported ARCH_BITS value"
2749#endif
2750}
2751
2752
2753/**
2754 * Atomically exchanges and adds a value which size might differ between
2755 * platforms or compilers, ordered.
2756 *
2757 * @param pu Pointer to the variable to update.
2758 * @param uNew The value to add to *pu.
2759 * @param puOld Where to store the old value.
2760 */
2761#define ASMAtomicAddSize(pu, uNew, puOld) \
2762 do { \
2763 switch (sizeof(*(pu))) { \
2764 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2765 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2766 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
2767 } \
2768 } while (0)
2769
2770
2771
2772/**
2773 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
2774 *
2775 * @returns The old value.
2776 * @param pu16 Pointer to the value.
2777 * @param u16 Number to subtract.
2778 *
2779 * @remarks x86: Requires a 486 or later.
2780 */
2781DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile *pu16, uint32_t u16)
2782{
2783 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
2784}
2785
2786
2787/**
2788 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
2789 *
2790 * @returns The old value.
2791 * @param pi16 Pointer to the value.
2792 * @param i16 Number to subtract.
2793 *
2794 * @remarks x86: Requires a 486 or later.
2795 */
2796DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile *pi16, int16_t i16)
2797{
2798 return (int16_t)ASMAtomicAddU16((uint16_t volatile *)pi16, (uint16_t)-i16);
2799}
2800
2801
2802/**
2803 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
2804 *
2805 * @returns The old value.
2806 * @param pu32 Pointer to the value.
2807 * @param u32 Number to subtract.
2808 *
2809 * @remarks x86: Requires a 486 or later.
2810 */
2811DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile *pu32, uint32_t u32)
2812{
2813 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
2814}
2815
2816
2817/**
2818 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
2819 *
2820 * @returns The old value.
2821 * @param pi32 Pointer to the value.
2822 * @param i32 Number to subtract.
2823 *
2824 * @remarks x86: Requires a 486 or later.
2825 */
2826DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
2827{
2828 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
2829}
2830
2831
2832/**
2833 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
2834 *
2835 * @returns The old value.
2836 * @param pu64 Pointer to the value.
2837 * @param u64 Number to subtract.
2838 *
2839 * @remarks x86: Requires a Pentium or later.
2840 */
2841DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile *pu64, uint64_t u64)
2842{
2843 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
2844}
2845
2846
2847/**
2848 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
2849 *
2850 * @returns The old value.
2851 * @param pi64 Pointer to the value.
2852 * @param i64 Number to subtract.
2853 *
2854 * @remarks x86: Requires a Pentium or later.
2855 */
2856DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile *pi64, int64_t i64)
2857{
2858 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)-i64);
2859}
2860
2861
2862/**
2863 * Atomically exchanges and subtracts to a size_t value, ordered.
2864 *
2865 * @returns The old value.
2866 * @param pcb Pointer to the size_t value.
2867 * @param cb Number to subtract.
2868 *
2869 * @remarks x86: Requires a 486 or later.
2870 */
2871DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile *pcb, size_t cb)
2872{
2873#if ARCH_BITS == 64
2874 return ASMAtomicSubU64((uint64_t volatile *)pcb, cb);
2875#elif ARCH_BITS == 32
2876 return ASMAtomicSubU32((uint32_t volatile *)pcb, cb);
2877#elif ARCH_BITS == 16
2878 AssertCompileSize(size_t, 2);
2879 return ASMAtomicSubU16((uint16_t volatile *)pcb, cb);
2880#else
2881# error "Unsupported ARCH_BITS value"
2882#endif
2883}
2884
2885
2886/**
2887 * Atomically exchanges and subtracts a value which size might differ between
2888 * platforms or compilers, ordered.
2889 *
2890 * @param pu Pointer to the variable to update.
2891 * @param uNew The value to subtract to *pu.
2892 * @param puOld Where to store the old value.
2893 *
2894 * @remarks x86: Requires a 486 or later.
2895 */
2896#define ASMAtomicSubSize(pu, uNew, puOld) \
2897 do { \
2898 switch (sizeof(*(pu))) { \
2899 case 4: *(uint32_t *)(puOld) = ASMAtomicSubU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2900 case 8: *(uint64_t *)(puOld) = ASMAtomicSubU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2901 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
2902 } \
2903 } while (0)
2904
2905
2906
2907/**
2908 * Atomically increment a 16-bit value, ordered.
2909 *
2910 * @returns The new value.
2911 * @param pu16 Pointer to the value to increment.
2912 * @remarks Not implemented. Just to make 16-bit code happy.
2913 *
2914 * @remarks x86: Requires a 486 or later.
2915 */
2916DECLASM(uint16_t) ASMAtomicIncU16(uint16_t volatile *pu16);
2917
2918
2919/**
2920 * Atomically increment a 32-bit value, ordered.
2921 *
2922 * @returns The new value.
2923 * @param pu32 Pointer to the value to increment.
2924 *
2925 * @remarks x86: Requires a 486 or later.
2926 */
2927#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2928DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2929#else
2930DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2931{
2932 uint32_t u32;
2933# if RT_INLINE_ASM_USES_INTRIN
2934 u32 = _InterlockedIncrement((long *)pu32);
2935 return u32;
2936
2937# elif RT_INLINE_ASM_GNU_STYLE
2938 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2939 : "=r" (u32),
2940 "=m" (*pu32)
2941 : "0" (1),
2942 "m" (*pu32)
2943 : "memory");
2944 return u32+1;
2945# else
2946 __asm
2947 {
2948 mov eax, 1
2949# ifdef RT_ARCH_AMD64
2950 mov rdx, [pu32]
2951 lock xadd [rdx], eax
2952# else
2953 mov edx, [pu32]
2954 lock xadd [edx], eax
2955# endif
2956 mov u32, eax
2957 }
2958 return u32+1;
2959# endif
2960}
2961#endif
2962
2963
2964/**
2965 * Atomically increment a signed 32-bit value, ordered.
2966 *
2967 * @returns The new value.
2968 * @param pi32 Pointer to the value to increment.
2969 *
2970 * @remarks x86: Requires a 486 or later.
2971 */
2972DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2973{
2974 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2975}
2976
2977
2978/**
2979 * Atomically increment a 64-bit value, ordered.
2980 *
2981 * @returns The new value.
2982 * @param pu64 Pointer to the value to increment.
2983 *
2984 * @remarks x86: Requires a Pentium or later.
2985 */
2986#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2987DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64);
2988#else
2989DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64)
2990{
2991# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2992 uint64_t u64;
2993 u64 = _InterlockedIncrement64((__int64 *)pu64);
2994 return u64;
2995
2996# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2997 uint64_t u64;
2998 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2999 : "=r" (u64),
3000 "=m" (*pu64)
3001 : "0" (1),
3002 "m" (*pu64)
3003 : "memory");
3004 return u64 + 1;
3005# else
3006 return ASMAtomicAddU64(pu64, 1) + 1;
3007# endif
3008}
3009#endif
3010
3011
3012/**
3013 * Atomically increment a signed 64-bit value, ordered.
3014 *
3015 * @returns The new value.
3016 * @param pi64 Pointer to the value to increment.
3017 *
3018 * @remarks x86: Requires a Pentium or later.
3019 */
3020DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile *pi64)
3021{
3022 return (int64_t)ASMAtomicIncU64((uint64_t volatile *)pi64);
3023}
3024
3025
3026/**
3027 * Atomically increment a size_t value, ordered.
3028 *
3029 * @returns The new value.
3030 * @param pcb Pointer to the value to increment.
3031 *
3032 * @remarks x86: Requires a 486 or later.
3033 */
3034DECLINLINE(int64_t) ASMAtomicIncZ(size_t volatile *pcb)
3035{
3036#if ARCH_BITS == 64
3037 return ASMAtomicIncU64((uint64_t volatile *)pcb);
3038#elif ARCH_BITS == 32
3039 return ASMAtomicIncU32((uint32_t volatile *)pcb);
3040#elif ARCH_BITS == 16
3041 return ASMAtomicIncU16((uint16_t volatile *)pcb);
3042#else
3043# error "Unsupported ARCH_BITS value"
3044#endif
3045}
3046
3047
3048
3049/**
3050 * Atomically decrement an unsigned 32-bit value, ordered.
3051 *
3052 * @returns The new value.
3053 * @param pu16 Pointer to the value to decrement.
3054 * @remarks Not implemented. Just to make 16-bit code happy.
3055 *
3056 * @remarks x86: Requires a 486 or later.
3057 */
3058DECLASM(uint32_t) ASMAtomicDecU16(uint16_t volatile *pu16);
3059
3060
3061/**
3062 * Atomically decrement an unsigned 32-bit value, ordered.
3063 *
3064 * @returns The new value.
3065 * @param pu32 Pointer to the value to decrement.
3066 *
3067 * @remarks x86: Requires a 486 or later.
3068 */
3069#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3070DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3071#else
3072DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3073{
3074 uint32_t u32;
3075# if RT_INLINE_ASM_USES_INTRIN
3076 u32 = _InterlockedDecrement((long *)pu32);
3077 return u32;
3078
3079# elif RT_INLINE_ASM_GNU_STYLE
3080 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3081 : "=r" (u32),
3082 "=m" (*pu32)
3083 : "0" (-1),
3084 "m" (*pu32)
3085 : "memory");
3086 return u32-1;
3087# else
3088 __asm
3089 {
3090 mov eax, -1
3091# ifdef RT_ARCH_AMD64
3092 mov rdx, [pu32]
3093 lock xadd [rdx], eax
3094# else
3095 mov edx, [pu32]
3096 lock xadd [edx], eax
3097# endif
3098 mov u32, eax
3099 }
3100 return u32-1;
3101# endif
3102}
3103#endif
3104
3105
3106/**
3107 * Atomically decrement a signed 32-bit value, ordered.
3108 *
3109 * @returns The new value.
3110 * @param pi32 Pointer to the value to decrement.
3111 *
3112 * @remarks x86: Requires a 486 or later.
3113 */
3114DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3115{
3116 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3117}
3118
3119
3120/**
3121 * Atomically decrement an unsigned 64-bit value, ordered.
3122 *
3123 * @returns The new value.
3124 * @param pu64 Pointer to the value to decrement.
3125 *
3126 * @remarks x86: Requires a Pentium or later.
3127 */
3128#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3129DECLASM(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64);
3130#else
3131DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64)
3132{
3133# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3134 uint64_t u64 = _InterlockedDecrement64((__int64 volatile *)pu64);
3135 return u64;
3136
3137# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3138 uint64_t u64;
3139 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
3140 : "=r" (u64),
3141 "=m" (*pu64)
3142 : "0" (~(uint64_t)0),
3143 "m" (*pu64)
3144 : "memory");
3145 return u64-1;
3146# else
3147 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
3148# endif
3149}
3150#endif
3151
3152
3153/**
3154 * Atomically decrement a signed 64-bit value, ordered.
3155 *
3156 * @returns The new value.
3157 * @param pi64 Pointer to the value to decrement.
3158 *
3159 * @remarks x86: Requires a Pentium or later.
3160 */
3161DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile *pi64)
3162{
3163 return (int64_t)ASMAtomicDecU64((uint64_t volatile *)pi64);
3164}
3165
3166
3167/**
3168 * Atomically decrement a size_t value, ordered.
3169 *
3170 * @returns The new value.
3171 * @param pcb Pointer to the value to decrement.
3172 *
3173 * @remarks x86: Requires a 486 or later.
3174 */
3175DECLINLINE(int64_t) ASMAtomicDecZ(size_t volatile *pcb)
3176{
3177#if ARCH_BITS == 64
3178 return ASMAtomicDecU64((uint64_t volatile *)pcb);
3179#elif ARCH_BITS == 32
3180 return ASMAtomicDecU32((uint32_t volatile *)pcb);
3181#elif ARCH_BITS == 16
3182 return ASMAtomicDecU16((uint16_t volatile *)pcb);
3183#else
3184# error "Unsupported ARCH_BITS value"
3185#endif
3186}
3187
3188
3189/**
3190 * Atomically Or an unsigned 32-bit value, ordered.
3191 *
3192 * @param pu32 Pointer to the pointer variable to OR u32 with.
3193 * @param u32 The value to OR *pu32 with.
3194 *
3195 * @remarks x86: Requires a 386 or later.
3196 */
3197#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3198DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3199#else
3200DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3201{
3202# if RT_INLINE_ASM_USES_INTRIN
3203 _InterlockedOr((long volatile *)pu32, (long)u32);
3204
3205# elif RT_INLINE_ASM_GNU_STYLE
3206 __asm__ __volatile__("lock; orl %1, %0\n\t"
3207 : "=m" (*pu32)
3208 : "ir" (u32),
3209 "m" (*pu32));
3210# else
3211 __asm
3212 {
3213 mov eax, [u32]
3214# ifdef RT_ARCH_AMD64
3215 mov rdx, [pu32]
3216 lock or [rdx], eax
3217# else
3218 mov edx, [pu32]
3219 lock or [edx], eax
3220# endif
3221 }
3222# endif
3223}
3224#endif
3225
3226
3227/**
3228 * Atomically Or a signed 32-bit value, ordered.
3229 *
3230 * @param pi32 Pointer to the pointer variable to OR u32 with.
3231 * @param i32 The value to OR *pu32 with.
3232 *
3233 * @remarks x86: Requires a 386 or later.
3234 */
3235DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3236{
3237 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3238}
3239
3240
3241/**
3242 * Atomically Or an unsigned 64-bit value, ordered.
3243 *
3244 * @param pu64 Pointer to the pointer variable to OR u64 with.
3245 * @param u64 The value to OR *pu64 with.
3246 *
3247 * @remarks x86: Requires a Pentium or later.
3248 */
3249#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3250DECLASM(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64);
3251#else
3252DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64)
3253{
3254# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3255 _InterlockedOr64((__int64 volatile *)pu64, (__int64)u64);
3256
3257# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3258 __asm__ __volatile__("lock; orq %1, %q0\n\t"
3259 : "=m" (*pu64)
3260 : "r" (u64),
3261 "m" (*pu64));
3262# else
3263 for (;;)
3264 {
3265 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3266 uint64_t u64New = u64Old | u64;
3267 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3268 break;
3269 ASMNopPause();
3270 }
3271# endif
3272}
3273#endif
3274
3275
3276/**
3277 * Atomically Or a signed 64-bit value, ordered.
3278 *
3279 * @param pi64 Pointer to the pointer variable to OR u64 with.
3280 * @param i64 The value to OR *pu64 with.
3281 *
3282 * @remarks x86: Requires a Pentium or later.
3283 */
3284DECLINLINE(void) ASMAtomicOrS64(int64_t volatile *pi64, int64_t i64)
3285{
3286 ASMAtomicOrU64((uint64_t volatile *)pi64, i64);
3287}
3288
3289
3290/**
3291 * Atomically And an unsigned 32-bit value, ordered.
3292 *
3293 * @param pu32 Pointer to the pointer variable to AND u32 with.
3294 * @param u32 The value to AND *pu32 with.
3295 *
3296 * @remarks x86: Requires a 386 or later.
3297 */
3298#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3299DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3300#else
3301DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3302{
3303# if RT_INLINE_ASM_USES_INTRIN
3304 _InterlockedAnd((long volatile *)pu32, u32);
3305
3306# elif RT_INLINE_ASM_GNU_STYLE
3307 __asm__ __volatile__("lock; andl %1, %0\n\t"
3308 : "=m" (*pu32)
3309 : "ir" (u32),
3310 "m" (*pu32));
3311# else
3312 __asm
3313 {
3314 mov eax, [u32]
3315# ifdef RT_ARCH_AMD64
3316 mov rdx, [pu32]
3317 lock and [rdx], eax
3318# else
3319 mov edx, [pu32]
3320 lock and [edx], eax
3321# endif
3322 }
3323# endif
3324}
3325#endif
3326
3327
3328/**
3329 * Atomically And a signed 32-bit value, ordered.
3330 *
3331 * @param pi32 Pointer to the pointer variable to AND i32 with.
3332 * @param i32 The value to AND *pi32 with.
3333 *
3334 * @remarks x86: Requires a 386 or later.
3335 */
3336DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3337{
3338 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3339}
3340
3341
3342/**
3343 * Atomically And an unsigned 64-bit value, ordered.
3344 *
3345 * @param pu64 Pointer to the pointer variable to AND u64 with.
3346 * @param u64 The value to AND *pu64 with.
3347 *
3348 * @remarks x86: Requires a Pentium or later.
3349 */
3350#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3351DECLASM(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64);
3352#else
3353DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64)
3354{
3355# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3356 _InterlockedAnd64((__int64 volatile *)pu64, u64);
3357
3358# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3359 __asm__ __volatile__("lock; andq %1, %0\n\t"
3360 : "=m" (*pu64)
3361 : "r" (u64),
3362 "m" (*pu64));
3363# else
3364 for (;;)
3365 {
3366 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3367 uint64_t u64New = u64Old & u64;
3368 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3369 break;
3370 ASMNopPause();
3371 }
3372# endif
3373}
3374#endif
3375
3376
3377/**
3378 * Atomically And a signed 64-bit value, ordered.
3379 *
3380 * @param pi64 Pointer to the pointer variable to AND i64 with.
3381 * @param i64 The value to AND *pi64 with.
3382 *
3383 * @remarks x86: Requires a Pentium or later.
3384 */
3385DECLINLINE(void) ASMAtomicAndS64(int64_t volatile *pi64, int64_t i64)
3386{
3387 ASMAtomicAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3388}
3389
3390
3391/**
3392 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
3393 *
3394 * @param pu32 Pointer to the pointer variable to OR u32 with.
3395 * @param u32 The value to OR *pu32 with.
3396 *
3397 * @remarks x86: Requires a 386 or later.
3398 */
3399#if RT_INLINE_ASM_EXTERNAL
3400DECLASM(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32);
3401#else
3402DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32)
3403{
3404# if RT_INLINE_ASM_GNU_STYLE
3405 __asm__ __volatile__("orl %1, %0\n\t"
3406 : "=m" (*pu32)
3407 : "ir" (u32),
3408 "m" (*pu32));
3409# else
3410 __asm
3411 {
3412 mov eax, [u32]
3413# ifdef RT_ARCH_AMD64
3414 mov rdx, [pu32]
3415 or [rdx], eax
3416# else
3417 mov edx, [pu32]
3418 or [edx], eax
3419# endif
3420 }
3421# endif
3422}
3423#endif
3424
3425
3426/**
3427 * Atomically OR a signed 32-bit value, unordered.
3428 *
3429 * @param pi32 Pointer to the pointer variable to OR u32 with.
3430 * @param i32 The value to OR *pu32 with.
3431 *
3432 * @remarks x86: Requires a 386 or later.
3433 */
3434DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile *pi32, int32_t i32)
3435{
3436 ASMAtomicUoOrU32((uint32_t volatile *)pi32, i32);
3437}
3438
3439
3440/**
3441 * Atomically OR an unsigned 64-bit value, unordered.
3442 *
3443 * @param pu64 Pointer to the pointer variable to OR u64 with.
3444 * @param u64 The value to OR *pu64 with.
3445 *
3446 * @remarks x86: Requires a Pentium or later.
3447 */
3448#if RT_INLINE_ASM_EXTERNAL
3449DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64);
3450#else
3451DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64)
3452{
3453# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3454 __asm__ __volatile__("orq %1, %q0\n\t"
3455 : "=m" (*pu64)
3456 : "r" (u64),
3457 "m" (*pu64));
3458# else
3459 for (;;)
3460 {
3461 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3462 uint64_t u64New = u64Old | u64;
3463 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3464 break;
3465 ASMNopPause();
3466 }
3467# endif
3468}
3469#endif
3470
3471
3472/**
3473 * Atomically Or a signed 64-bit value, unordered.
3474 *
3475 * @param pi64 Pointer to the pointer variable to OR u64 with.
3476 * @param i64 The value to OR *pu64 with.
3477 *
3478 * @remarks x86: Requires a Pentium or later.
3479 */
3480DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile *pi64, int64_t i64)
3481{
3482 ASMAtomicUoOrU64((uint64_t volatile *)pi64, i64);
3483}
3484
3485
3486/**
3487 * Atomically And an unsigned 32-bit value, unordered.
3488 *
3489 * @param pu32 Pointer to the pointer variable to AND u32 with.
3490 * @param u32 The value to AND *pu32 with.
3491 *
3492 * @remarks x86: Requires a 386 or later.
3493 */
3494#if RT_INLINE_ASM_EXTERNAL
3495DECLASM(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32);
3496#else
3497DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32)
3498{
3499# if RT_INLINE_ASM_GNU_STYLE
3500 __asm__ __volatile__("andl %1, %0\n\t"
3501 : "=m" (*pu32)
3502 : "ir" (u32),
3503 "m" (*pu32));
3504# else
3505 __asm
3506 {
3507 mov eax, [u32]
3508# ifdef RT_ARCH_AMD64
3509 mov rdx, [pu32]
3510 and [rdx], eax
3511# else
3512 mov edx, [pu32]
3513 and [edx], eax
3514# endif
3515 }
3516# endif
3517}
3518#endif
3519
3520
3521/**
3522 * Atomically And a signed 32-bit value, unordered.
3523 *
3524 * @param pi32 Pointer to the pointer variable to AND i32 with.
3525 * @param i32 The value to AND *pi32 with.
3526 *
3527 * @remarks x86: Requires a 386 or later.
3528 */
3529DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile *pi32, int32_t i32)
3530{
3531 ASMAtomicUoAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3532}
3533
3534
3535/**
3536 * Atomically And an unsigned 64-bit value, unordered.
3537 *
3538 * @param pu64 Pointer to the pointer variable to AND u64 with.
3539 * @param u64 The value to AND *pu64 with.
3540 *
3541 * @remarks x86: Requires a Pentium or later.
3542 */
3543#if RT_INLINE_ASM_EXTERNAL
3544DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64);
3545#else
3546DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64)
3547{
3548# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3549 __asm__ __volatile__("andq %1, %0\n\t"
3550 : "=m" (*pu64)
3551 : "r" (u64),
3552 "m" (*pu64));
3553# else
3554 for (;;)
3555 {
3556 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3557 uint64_t u64New = u64Old & u64;
3558 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3559 break;
3560 ASMNopPause();
3561 }
3562# endif
3563}
3564#endif
3565
3566
3567/**
3568 * Atomically And a signed 64-bit value, unordered.
3569 *
3570 * @param pi64 Pointer to the pointer variable to AND i64 with.
3571 * @param i64 The value to AND *pi64 with.
3572 *
3573 * @remarks x86: Requires a Pentium or later.
3574 */
3575DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile *pi64, int64_t i64)
3576{
3577 ASMAtomicUoAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3578}
3579
3580
3581/**
3582 * Atomically increment an unsigned 32-bit value, unordered.
3583 *
3584 * @returns the new value.
3585 * @param pu32 Pointer to the variable to increment.
3586 *
3587 * @remarks x86: Requires a 486 or later.
3588 */
3589#if RT_INLINE_ASM_EXTERNAL
3590DECLASM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile *pu32);
3591#else
3592DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile *pu32)
3593{
3594 uint32_t u32;
3595# if RT_INLINE_ASM_GNU_STYLE
3596 __asm__ __volatile__("xaddl %0, %1\n\t"
3597 : "=r" (u32),
3598 "=m" (*pu32)
3599 : "0" (1),
3600 "m" (*pu32)
3601 : "memory");
3602 return u32 + 1;
3603# else
3604 __asm
3605 {
3606 mov eax, 1
3607# ifdef RT_ARCH_AMD64
3608 mov rdx, [pu32]
3609 xadd [rdx], eax
3610# else
3611 mov edx, [pu32]
3612 xadd [edx], eax
3613# endif
3614 mov u32, eax
3615 }
3616 return u32 + 1;
3617# endif
3618}
3619#endif
3620
3621
3622/**
3623 * Atomically decrement an unsigned 32-bit value, unordered.
3624 *
3625 * @returns the new value.
3626 * @param pu32 Pointer to the variable to decrement.
3627 *
3628 * @remarks x86: Requires a 486 or later.
3629 */
3630#if RT_INLINE_ASM_EXTERNAL
3631DECLASM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile *pu32);
3632#else
3633DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile *pu32)
3634{
3635 uint32_t u32;
3636# if RT_INLINE_ASM_GNU_STYLE
3637 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3638 : "=r" (u32),
3639 "=m" (*pu32)
3640 : "0" (-1),
3641 "m" (*pu32)
3642 : "memory");
3643 return u32 - 1;
3644# else
3645 __asm
3646 {
3647 mov eax, -1
3648# ifdef RT_ARCH_AMD64
3649 mov rdx, [pu32]
3650 xadd [rdx], eax
3651# else
3652 mov edx, [pu32]
3653 xadd [edx], eax
3654# endif
3655 mov u32, eax
3656 }
3657 return u32 - 1;
3658# endif
3659}
3660#endif
3661
3662
3663/** @def RT_ASM_PAGE_SIZE
3664 * We try avoid dragging in iprt/param.h here.
3665 * @internal
3666 */
3667#if defined(RT_ARCH_SPARC64)
3668# define RT_ASM_PAGE_SIZE 0x2000
3669# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3670# if PAGE_SIZE != 0x2000
3671# error "PAGE_SIZE is not 0x2000!"
3672# endif
3673# endif
3674#else
3675# define RT_ASM_PAGE_SIZE 0x1000
3676# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3677# if PAGE_SIZE != 0x1000
3678# error "PAGE_SIZE is not 0x1000!"
3679# endif
3680# endif
3681#endif
3682
3683/**
3684 * Zeros a 4K memory page.
3685 *
3686 * @param pv Pointer to the memory block. This must be page aligned.
3687 */
3688#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3689DECLASM(void) ASMMemZeroPage(volatile void *pv);
3690# else
3691DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
3692{
3693# if RT_INLINE_ASM_USES_INTRIN
3694# ifdef RT_ARCH_AMD64
3695 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
3696# else
3697 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
3698# endif
3699
3700# elif RT_INLINE_ASM_GNU_STYLE
3701 RTCCUINTREG uDummy;
3702# ifdef RT_ARCH_AMD64
3703 __asm__ __volatile__("rep stosq"
3704 : "=D" (pv),
3705 "=c" (uDummy)
3706 : "0" (pv),
3707 "c" (RT_ASM_PAGE_SIZE >> 3),
3708 "a" (0)
3709 : "memory");
3710# else
3711 __asm__ __volatile__("rep stosl"
3712 : "=D" (pv),
3713 "=c" (uDummy)
3714 : "0" (pv),
3715 "c" (RT_ASM_PAGE_SIZE >> 2),
3716 "a" (0)
3717 : "memory");
3718# endif
3719# else
3720 __asm
3721 {
3722# ifdef RT_ARCH_AMD64
3723 xor rax, rax
3724 mov ecx, 0200h
3725 mov rdi, [pv]
3726 rep stosq
3727# else
3728 xor eax, eax
3729 mov ecx, 0400h
3730 mov edi, [pv]
3731 rep stosd
3732# endif
3733 }
3734# endif
3735}
3736# endif
3737
3738
3739/**
3740 * Zeros a memory block with a 32-bit aligned size.
3741 *
3742 * @param pv Pointer to the memory block.
3743 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3744 */
3745#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3746DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3747#else
3748DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3749{
3750# if RT_INLINE_ASM_USES_INTRIN
3751# ifdef RT_ARCH_AMD64
3752 if (!(cb & 7))
3753 __stosq((unsigned __int64 *)pv, 0, cb / 8);
3754 else
3755# endif
3756 __stosd((unsigned long *)pv, 0, cb / 4);
3757
3758# elif RT_INLINE_ASM_GNU_STYLE
3759 __asm__ __volatile__("rep stosl"
3760 : "=D" (pv),
3761 "=c" (cb)
3762 : "0" (pv),
3763 "1" (cb >> 2),
3764 "a" (0)
3765 : "memory");
3766# else
3767 __asm
3768 {
3769 xor eax, eax
3770# ifdef RT_ARCH_AMD64
3771 mov rcx, [cb]
3772 shr rcx, 2
3773 mov rdi, [pv]
3774# else
3775 mov ecx, [cb]
3776 shr ecx, 2
3777 mov edi, [pv]
3778# endif
3779 rep stosd
3780 }
3781# endif
3782}
3783#endif
3784
3785
3786/**
3787 * Fills a memory block with a 32-bit aligned size.
3788 *
3789 * @param pv Pointer to the memory block.
3790 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3791 * @param u32 The value to fill with.
3792 */
3793#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3794DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3795#else
3796DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3797{
3798# if RT_INLINE_ASM_USES_INTRIN
3799# ifdef RT_ARCH_AMD64
3800 if (!(cb & 7))
3801 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3802 else
3803# endif
3804 __stosd((unsigned long *)pv, u32, cb / 4);
3805
3806# elif RT_INLINE_ASM_GNU_STYLE
3807 __asm__ __volatile__("rep stosl"
3808 : "=D" (pv),
3809 "=c" (cb)
3810 : "0" (pv),
3811 "1" (cb >> 2),
3812 "a" (u32)
3813 : "memory");
3814# else
3815 __asm
3816 {
3817# ifdef RT_ARCH_AMD64
3818 mov rcx, [cb]
3819 shr rcx, 2
3820 mov rdi, [pv]
3821# else
3822 mov ecx, [cb]
3823 shr ecx, 2
3824 mov edi, [pv]
3825# endif
3826 mov eax, [u32]
3827 rep stosd
3828 }
3829# endif
3830}
3831#endif
3832
3833
3834/**
3835 * Checks if a memory page is all zeros.
3836 *
3837 * @returns true / false.
3838 *
3839 * @param pvPage Pointer to the page. Must be aligned on 16 byte
3840 * boundary
3841 */
3842DECLINLINE(bool) ASMMemIsZeroPage(void const *pvPage)
3843{
3844# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
3845 union { RTCCUINTREG r; bool f; } uAX;
3846 RTCCUINTREG xCX, xDI;
3847 Assert(!((uintptr_t)pvPage & 15));
3848 __asm__ __volatile__("repe; "
3849# ifdef RT_ARCH_AMD64
3850 "scasq\n\t"
3851# else
3852 "scasl\n\t"
3853# endif
3854 "setnc %%al\n\t"
3855 : "=&c" (xCX),
3856 "=&D" (xDI),
3857 "=&a" (uAX.r)
3858 : "mr" (pvPage),
3859# ifdef RT_ARCH_AMD64
3860 "0" (RT_ASM_PAGE_SIZE/8),
3861# else
3862 "0" (RT_ASM_PAGE_SIZE/4),
3863# endif
3864 "1" (pvPage),
3865 "2" (0));
3866 return uAX.f;
3867# else
3868 uintptr_t const *puPtr = (uintptr_t const *)pvPage;
3869 int cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
3870 Assert(!((uintptr_t)pvPage & 15));
3871 for (;;)
3872 {
3873 if (puPtr[0]) return false;
3874 if (puPtr[4]) return false;
3875
3876 if (puPtr[2]) return false;
3877 if (puPtr[6]) return false;
3878
3879 if (puPtr[1]) return false;
3880 if (puPtr[5]) return false;
3881
3882 if (puPtr[3]) return false;
3883 if (puPtr[7]) return false;
3884
3885 if (!--cLeft)
3886 return true;
3887 puPtr += 8;
3888 }
3889 return true;
3890# endif
3891}
3892
3893
3894/**
3895 * Checks if a memory block is filled with the specified byte.
3896 *
3897 * This is a sort of inverted memchr.
3898 *
3899 * @returns Pointer to the byte which doesn't equal u8.
3900 * @returns NULL if all equal to u8.
3901 *
3902 * @param pv Pointer to the memory block.
3903 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3904 * @param u8 The value it's supposed to be filled with.
3905 *
3906 * @todo Fix name, it is a predicate function but it's not returning boolean!
3907 */
3908DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
3909{
3910/** @todo rewrite this in inline assembly? */
3911 uint8_t const *pb = (uint8_t const *)pv;
3912 for (; cb; cb--, pb++)
3913 if (RT_LIKELY(*pb == u8))
3914 { /* likely */ }
3915 else
3916 return (void *)pb;
3917 return NULL;
3918}
3919
3920
3921/**
3922 * Checks if a memory block is filled with the specified 32-bit value.
3923 *
3924 * This is a sort of inverted memchr.
3925 *
3926 * @returns Pointer to the first value which doesn't equal u32.
3927 * @returns NULL if all equal to u32.
3928 *
3929 * @param pv Pointer to the memory block.
3930 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3931 * @param u32 The value it's supposed to be filled with.
3932 *
3933 * @todo Fix name, it is a predicate function but it's not returning boolean!
3934 */
3935DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
3936{
3937/** @todo rewrite this in inline assembly? */
3938 uint32_t const *pu32 = (uint32_t const *)pv;
3939 for (; cb; cb -= 4, pu32++)
3940 if (RT_LIKELY(*pu32 == u32))
3941 { /* likely */ }
3942 else
3943 return (uint32_t *)pu32;
3944 return NULL;
3945}
3946
3947
3948/**
3949 * Probes a byte pointer for read access.
3950 *
3951 * While the function will not fault if the byte is not read accessible,
3952 * the idea is to do this in a safe place like before acquiring locks
3953 * and such like.
3954 *
3955 * Also, this functions guarantees that an eager compiler is not going
3956 * to optimize the probing away.
3957 *
3958 * @param pvByte Pointer to the byte.
3959 */
3960#if RT_INLINE_ASM_EXTERNAL
3961DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
3962#else
3963DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
3964{
3965 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3966 uint8_t u8;
3967# if RT_INLINE_ASM_GNU_STYLE
3968 __asm__ __volatile__("movb (%1), %0\n\t"
3969 : "=r" (u8)
3970 : "r" (pvByte));
3971# else
3972 __asm
3973 {
3974# ifdef RT_ARCH_AMD64
3975 mov rax, [pvByte]
3976 mov al, [rax]
3977# else
3978 mov eax, [pvByte]
3979 mov al, [eax]
3980# endif
3981 mov [u8], al
3982 }
3983# endif
3984 return u8;
3985}
3986#endif
3987
3988/**
3989 * Probes a buffer for read access page by page.
3990 *
3991 * While the function will fault if the buffer is not fully read
3992 * accessible, the idea is to do this in a safe place like before
3993 * acquiring locks and such like.
3994 *
3995 * Also, this functions guarantees that an eager compiler is not going
3996 * to optimize the probing away.
3997 *
3998 * @param pvBuf Pointer to the buffer.
3999 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4000 */
4001DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
4002{
4003 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4004 /* the first byte */
4005 const uint8_t *pu8 = (const uint8_t *)pvBuf;
4006 ASMProbeReadByte(pu8);
4007
4008 /* the pages in between pages. */
4009 while (cbBuf > RT_ASM_PAGE_SIZE)
4010 {
4011 ASMProbeReadByte(pu8);
4012 cbBuf -= RT_ASM_PAGE_SIZE;
4013 pu8 += RT_ASM_PAGE_SIZE;
4014 }
4015
4016 /* the last byte */
4017 ASMProbeReadByte(pu8 + cbBuf - 1);
4018}
4019
4020
4021
4022/** @defgroup grp_inline_bits Bit Operations
4023 * @{
4024 */
4025
4026
4027/**
4028 * Sets a bit in a bitmap.
4029 *
4030 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
4031 * @param iBit The bit to set.
4032 *
4033 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4034 * However, doing so will yield better performance as well as avoiding
4035 * traps accessing the last bits in the bitmap.
4036 */
4037#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4038DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
4039#else
4040DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
4041{
4042# if RT_INLINE_ASM_USES_INTRIN
4043 _bittestandset((long *)pvBitmap, iBit);
4044
4045# elif RT_INLINE_ASM_GNU_STYLE
4046 __asm__ __volatile__("btsl %1, %0"
4047 : "=m" (*(volatile long *)pvBitmap)
4048 : "Ir" (iBit),
4049 "m" (*(volatile long *)pvBitmap)
4050 : "memory");
4051# else
4052 __asm
4053 {
4054# ifdef RT_ARCH_AMD64
4055 mov rax, [pvBitmap]
4056 mov edx, [iBit]
4057 bts [rax], edx
4058# else
4059 mov eax, [pvBitmap]
4060 mov edx, [iBit]
4061 bts [eax], edx
4062# endif
4063 }
4064# endif
4065}
4066#endif
4067
4068
4069/**
4070 * Atomically sets a bit in a bitmap, ordered.
4071 *
4072 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4073 * the memory access isn't atomic!
4074 * @param iBit The bit to set.
4075 *
4076 * @remarks x86: Requires a 386 or later.
4077 */
4078#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4079DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
4080#else
4081DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
4082{
4083 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4084# if RT_INLINE_ASM_USES_INTRIN
4085 _interlockedbittestandset((long *)pvBitmap, iBit);
4086# elif RT_INLINE_ASM_GNU_STYLE
4087 __asm__ __volatile__("lock; btsl %1, %0"
4088 : "=m" (*(volatile long *)pvBitmap)
4089 : "Ir" (iBit),
4090 "m" (*(volatile long *)pvBitmap)
4091 : "memory");
4092# else
4093 __asm
4094 {
4095# ifdef RT_ARCH_AMD64
4096 mov rax, [pvBitmap]
4097 mov edx, [iBit]
4098 lock bts [rax], edx
4099# else
4100 mov eax, [pvBitmap]
4101 mov edx, [iBit]
4102 lock bts [eax], edx
4103# endif
4104 }
4105# endif
4106}
4107#endif
4108
4109
4110/**
4111 * Clears a bit in a bitmap.
4112 *
4113 * @param pvBitmap Pointer to the bitmap.
4114 * @param iBit The bit to clear.
4115 *
4116 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4117 * However, doing so will yield better performance as well as avoiding
4118 * traps accessing the last bits in the bitmap.
4119 */
4120#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4121DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
4122#else
4123DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
4124{
4125# if RT_INLINE_ASM_USES_INTRIN
4126 _bittestandreset((long *)pvBitmap, iBit);
4127
4128# elif RT_INLINE_ASM_GNU_STYLE
4129 __asm__ __volatile__("btrl %1, %0"
4130 : "=m" (*(volatile long *)pvBitmap)
4131 : "Ir" (iBit),
4132 "m" (*(volatile long *)pvBitmap)
4133 : "memory");
4134# else
4135 __asm
4136 {
4137# ifdef RT_ARCH_AMD64
4138 mov rax, [pvBitmap]
4139 mov edx, [iBit]
4140 btr [rax], edx
4141# else
4142 mov eax, [pvBitmap]
4143 mov edx, [iBit]
4144 btr [eax], edx
4145# endif
4146 }
4147# endif
4148}
4149#endif
4150
4151
4152/**
4153 * Atomically clears a bit in a bitmap, ordered.
4154 *
4155 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4156 * the memory access isn't atomic!
4157 * @param iBit The bit to toggle set.
4158 *
4159 * @remarks No memory barrier, take care on smp.
4160 * @remarks x86: Requires a 386 or later.
4161 */
4162#if RT_INLINE_ASM_EXTERNAL
4163DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
4164#else
4165DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
4166{
4167 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4168# if RT_INLINE_ASM_GNU_STYLE
4169 __asm__ __volatile__("lock; btrl %1, %0"
4170 : "=m" (*(volatile long *)pvBitmap)
4171 : "Ir" (iBit),
4172 "m" (*(volatile long *)pvBitmap)
4173 : "memory");
4174# else
4175 __asm
4176 {
4177# ifdef RT_ARCH_AMD64
4178 mov rax, [pvBitmap]
4179 mov edx, [iBit]
4180 lock btr [rax], edx
4181# else
4182 mov eax, [pvBitmap]
4183 mov edx, [iBit]
4184 lock btr [eax], edx
4185# endif
4186 }
4187# endif
4188}
4189#endif
4190
4191
4192/**
4193 * Toggles a bit in a bitmap.
4194 *
4195 * @param pvBitmap Pointer to the bitmap.
4196 * @param iBit The bit to toggle.
4197 *
4198 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4199 * However, doing so will yield better performance as well as avoiding
4200 * traps accessing the last bits in the bitmap.
4201 */
4202#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4203DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
4204#else
4205DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
4206{
4207# if RT_INLINE_ASM_USES_INTRIN
4208 _bittestandcomplement((long *)pvBitmap, iBit);
4209# elif RT_INLINE_ASM_GNU_STYLE
4210 __asm__ __volatile__("btcl %1, %0"
4211 : "=m" (*(volatile long *)pvBitmap)
4212 : "Ir" (iBit),
4213 "m" (*(volatile long *)pvBitmap)
4214 : "memory");
4215# else
4216 __asm
4217 {
4218# ifdef RT_ARCH_AMD64
4219 mov rax, [pvBitmap]
4220 mov edx, [iBit]
4221 btc [rax], edx
4222# else
4223 mov eax, [pvBitmap]
4224 mov edx, [iBit]
4225 btc [eax], edx
4226# endif
4227 }
4228# endif
4229}
4230#endif
4231
4232
4233/**
4234 * Atomically toggles a bit in a bitmap, ordered.
4235 *
4236 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4237 * the memory access isn't atomic!
4238 * @param iBit The bit to test and set.
4239 *
4240 * @remarks x86: Requires a 386 or later.
4241 */
4242#if RT_INLINE_ASM_EXTERNAL
4243DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
4244#else
4245DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
4246{
4247 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4248# if RT_INLINE_ASM_GNU_STYLE
4249 __asm__ __volatile__("lock; btcl %1, %0"
4250 : "=m" (*(volatile long *)pvBitmap)
4251 : "Ir" (iBit),
4252 "m" (*(volatile long *)pvBitmap)
4253 : "memory");
4254# else
4255 __asm
4256 {
4257# ifdef RT_ARCH_AMD64
4258 mov rax, [pvBitmap]
4259 mov edx, [iBit]
4260 lock btc [rax], edx
4261# else
4262 mov eax, [pvBitmap]
4263 mov edx, [iBit]
4264 lock btc [eax], edx
4265# endif
4266 }
4267# endif
4268}
4269#endif
4270
4271
4272/**
4273 * Tests and sets a bit in a bitmap.
4274 *
4275 * @returns true if the bit was set.
4276 * @returns false if the bit was clear.
4277 *
4278 * @param pvBitmap Pointer to the bitmap.
4279 * @param iBit The bit to test and set.
4280 *
4281 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4282 * However, doing so will yield better performance as well as avoiding
4283 * traps accessing the last bits in the bitmap.
4284 */
4285#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4286DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4287#else
4288DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4289{
4290 union { bool f; uint32_t u32; uint8_t u8; } rc;
4291# if RT_INLINE_ASM_USES_INTRIN
4292 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
4293
4294# elif RT_INLINE_ASM_GNU_STYLE
4295 __asm__ __volatile__("btsl %2, %1\n\t"
4296 "setc %b0\n\t"
4297 "andl $1, %0\n\t"
4298 : "=q" (rc.u32),
4299 "=m" (*(volatile long *)pvBitmap)
4300 : "Ir" (iBit),
4301 "m" (*(volatile long *)pvBitmap)
4302 : "memory");
4303# else
4304 __asm
4305 {
4306 mov edx, [iBit]
4307# ifdef RT_ARCH_AMD64
4308 mov rax, [pvBitmap]
4309 bts [rax], edx
4310# else
4311 mov eax, [pvBitmap]
4312 bts [eax], edx
4313# endif
4314 setc al
4315 and eax, 1
4316 mov [rc.u32], eax
4317 }
4318# endif
4319 return rc.f;
4320}
4321#endif
4322
4323
4324/**
4325 * Atomically tests and sets a bit in a bitmap, ordered.
4326 *
4327 * @returns true if the bit was set.
4328 * @returns false if the bit was clear.
4329 *
4330 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4331 * the memory access isn't atomic!
4332 * @param iBit The bit to set.
4333 *
4334 * @remarks x86: Requires a 386 or later.
4335 */
4336#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4337DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4338#else
4339DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4340{
4341 union { bool f; uint32_t u32; uint8_t u8; } rc;
4342 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4343# if RT_INLINE_ASM_USES_INTRIN
4344 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
4345# elif RT_INLINE_ASM_GNU_STYLE
4346 __asm__ __volatile__("lock; btsl %2, %1\n\t"
4347 "setc %b0\n\t"
4348 "andl $1, %0\n\t"
4349 : "=q" (rc.u32),
4350 "=m" (*(volatile long *)pvBitmap)
4351 : "Ir" (iBit),
4352 "m" (*(volatile long *)pvBitmap)
4353 : "memory");
4354# else
4355 __asm
4356 {
4357 mov edx, [iBit]
4358# ifdef RT_ARCH_AMD64
4359 mov rax, [pvBitmap]
4360 lock bts [rax], edx
4361# else
4362 mov eax, [pvBitmap]
4363 lock bts [eax], edx
4364# endif
4365 setc al
4366 and eax, 1
4367 mov [rc.u32], eax
4368 }
4369# endif
4370 return rc.f;
4371}
4372#endif
4373
4374
4375/**
4376 * Tests and clears a bit in a bitmap.
4377 *
4378 * @returns true if the bit was set.
4379 * @returns false if the bit was clear.
4380 *
4381 * @param pvBitmap Pointer to the bitmap.
4382 * @param iBit The bit to test and clear.
4383 *
4384 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4385 * However, doing so will yield better performance as well as avoiding
4386 * traps accessing the last bits in the bitmap.
4387 */
4388#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4389DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4390#else
4391DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4392{
4393 union { bool f; uint32_t u32; uint8_t u8; } rc;
4394# if RT_INLINE_ASM_USES_INTRIN
4395 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
4396
4397# elif RT_INLINE_ASM_GNU_STYLE
4398 __asm__ __volatile__("btrl %2, %1\n\t"
4399 "setc %b0\n\t"
4400 "andl $1, %0\n\t"
4401 : "=q" (rc.u32),
4402 "=m" (*(volatile long *)pvBitmap)
4403 : "Ir" (iBit),
4404 "m" (*(volatile long *)pvBitmap)
4405 : "memory");
4406# else
4407 __asm
4408 {
4409 mov edx, [iBit]
4410# ifdef RT_ARCH_AMD64
4411 mov rax, [pvBitmap]
4412 btr [rax], edx
4413# else
4414 mov eax, [pvBitmap]
4415 btr [eax], edx
4416# endif
4417 setc al
4418 and eax, 1
4419 mov [rc.u32], eax
4420 }
4421# endif
4422 return rc.f;
4423}
4424#endif
4425
4426
4427/**
4428 * Atomically tests and clears a bit in a bitmap, ordered.
4429 *
4430 * @returns true if the bit was set.
4431 * @returns false if the bit was clear.
4432 *
4433 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4434 * the memory access isn't atomic!
4435 * @param iBit The bit to test and clear.
4436 *
4437 * @remarks No memory barrier, take care on smp.
4438 * @remarks x86: Requires a 386 or later.
4439 */
4440#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4441DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4442#else
4443DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4444{
4445 union { bool f; uint32_t u32; uint8_t u8; } rc;
4446 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4447# if RT_INLINE_ASM_USES_INTRIN
4448 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
4449
4450# elif RT_INLINE_ASM_GNU_STYLE
4451 __asm__ __volatile__("lock; btrl %2, %1\n\t"
4452 "setc %b0\n\t"
4453 "andl $1, %0\n\t"
4454 : "=q" (rc.u32),
4455 "=m" (*(volatile long *)pvBitmap)
4456 : "Ir" (iBit),
4457 "m" (*(volatile long *)pvBitmap)
4458 : "memory");
4459# else
4460 __asm
4461 {
4462 mov edx, [iBit]
4463# ifdef RT_ARCH_AMD64
4464 mov rax, [pvBitmap]
4465 lock btr [rax], edx
4466# else
4467 mov eax, [pvBitmap]
4468 lock btr [eax], edx
4469# endif
4470 setc al
4471 and eax, 1
4472 mov [rc.u32], eax
4473 }
4474# endif
4475 return rc.f;
4476}
4477#endif
4478
4479
4480/**
4481 * Tests and toggles a bit in a bitmap.
4482 *
4483 * @returns true if the bit was set.
4484 * @returns false if the bit was clear.
4485 *
4486 * @param pvBitmap Pointer to the bitmap.
4487 * @param iBit The bit to test and toggle.
4488 *
4489 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4490 * However, doing so will yield better performance as well as avoiding
4491 * traps accessing the last bits in the bitmap.
4492 */
4493#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4494DECLASM(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4495#else
4496DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4497{
4498 union { bool f; uint32_t u32; uint8_t u8; } rc;
4499# if RT_INLINE_ASM_USES_INTRIN
4500 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
4501
4502# elif RT_INLINE_ASM_GNU_STYLE
4503 __asm__ __volatile__("btcl %2, %1\n\t"
4504 "setc %b0\n\t"
4505 "andl $1, %0\n\t"
4506 : "=q" (rc.u32),
4507 "=m" (*(volatile long *)pvBitmap)
4508 : "Ir" (iBit),
4509 "m" (*(volatile long *)pvBitmap)
4510 : "memory");
4511# else
4512 __asm
4513 {
4514 mov edx, [iBit]
4515# ifdef RT_ARCH_AMD64
4516 mov rax, [pvBitmap]
4517 btc [rax], edx
4518# else
4519 mov eax, [pvBitmap]
4520 btc [eax], edx
4521# endif
4522 setc al
4523 and eax, 1
4524 mov [rc.u32], eax
4525 }
4526# endif
4527 return rc.f;
4528}
4529#endif
4530
4531
4532/**
4533 * Atomically tests and toggles a bit in a bitmap, ordered.
4534 *
4535 * @returns true if the bit was set.
4536 * @returns false if the bit was clear.
4537 *
4538 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4539 * the memory access isn't atomic!
4540 * @param iBit The bit to test and toggle.
4541 *
4542 * @remarks x86: Requires a 386 or later.
4543 */
4544#if RT_INLINE_ASM_EXTERNAL
4545DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4546#else
4547DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4548{
4549 union { bool f; uint32_t u32; uint8_t u8; } rc;
4550 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4551# if RT_INLINE_ASM_GNU_STYLE
4552 __asm__ __volatile__("lock; btcl %2, %1\n\t"
4553 "setc %b0\n\t"
4554 "andl $1, %0\n\t"
4555 : "=q" (rc.u32),
4556 "=m" (*(volatile long *)pvBitmap)
4557 : "Ir" (iBit),
4558 "m" (*(volatile long *)pvBitmap)
4559 : "memory");
4560# else
4561 __asm
4562 {
4563 mov edx, [iBit]
4564# ifdef RT_ARCH_AMD64
4565 mov rax, [pvBitmap]
4566 lock btc [rax], edx
4567# else
4568 mov eax, [pvBitmap]
4569 lock btc [eax], edx
4570# endif
4571 setc al
4572 and eax, 1
4573 mov [rc.u32], eax
4574 }
4575# endif
4576 return rc.f;
4577}
4578#endif
4579
4580
4581/**
4582 * Tests if a bit in a bitmap is set.
4583 *
4584 * @returns true if the bit is set.
4585 * @returns false if the bit is clear.
4586 *
4587 * @param pvBitmap Pointer to the bitmap.
4588 * @param iBit The bit to test.
4589 *
4590 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4591 * However, doing so will yield better performance as well as avoiding
4592 * traps accessing the last bits in the bitmap.
4593 */
4594#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4595DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
4596#else
4597DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
4598{
4599 union { bool f; uint32_t u32; uint8_t u8; } rc;
4600# if RT_INLINE_ASM_USES_INTRIN
4601 rc.u32 = _bittest((long *)pvBitmap, iBit);
4602# elif RT_INLINE_ASM_GNU_STYLE
4603
4604 __asm__ __volatile__("btl %2, %1\n\t"
4605 "setc %b0\n\t"
4606 "andl $1, %0\n\t"
4607 : "=q" (rc.u32)
4608 : "m" (*(const volatile long *)pvBitmap),
4609 "Ir" (iBit)
4610 : "memory");
4611# else
4612 __asm
4613 {
4614 mov edx, [iBit]
4615# ifdef RT_ARCH_AMD64
4616 mov rax, [pvBitmap]
4617 bt [rax], edx
4618# else
4619 mov eax, [pvBitmap]
4620 bt [eax], edx
4621# endif
4622 setc al
4623 and eax, 1
4624 mov [rc.u32], eax
4625 }
4626# endif
4627 return rc.f;
4628}
4629#endif
4630
4631
4632/**
4633 * Clears a bit range within a bitmap.
4634 *
4635 * @param pvBitmap Pointer to the bitmap.
4636 * @param iBitStart The First bit to clear.
4637 * @param iBitEnd The first bit not to clear.
4638 */
4639DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4640{
4641 if (iBitStart < iBitEnd)
4642 {
4643 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4644 int32_t iStart = iBitStart & ~31;
4645 int32_t iEnd = iBitEnd & ~31;
4646 if (iStart == iEnd)
4647 *pu32 &= ((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4648 else
4649 {
4650 /* bits in first dword. */
4651 if (iBitStart & 31)
4652 {
4653 *pu32 &= (UINT32_C(1) << (iBitStart & 31)) - 1;
4654 pu32++;
4655 iBitStart = iStart + 32;
4656 }
4657
4658 /* whole dword. */
4659 if (iBitStart != iEnd)
4660 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4661
4662 /* bits in last dword. */
4663 if (iBitEnd & 31)
4664 {
4665 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4666 *pu32 &= ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4667 }
4668 }
4669 }
4670}
4671
4672
4673/**
4674 * Sets a bit range within a bitmap.
4675 *
4676 * @param pvBitmap Pointer to the bitmap.
4677 * @param iBitStart The First bit to set.
4678 * @param iBitEnd The first bit not to set.
4679 */
4680DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4681{
4682 if (iBitStart < iBitEnd)
4683 {
4684 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4685 int32_t iStart = iBitStart & ~31;
4686 int32_t iEnd = iBitEnd & ~31;
4687 if (iStart == iEnd)
4688 *pu32 |= ((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31);
4689 else
4690 {
4691 /* bits in first dword. */
4692 if (iBitStart & 31)
4693 {
4694 *pu32 |= ~((UINT32_C(1) << (iBitStart & 31)) - 1);
4695 pu32++;
4696 iBitStart = iStart + 32;
4697 }
4698
4699 /* whole dword. */
4700 if (iBitStart != iEnd)
4701 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
4702
4703 /* bits in last dword. */
4704 if (iBitEnd & 31)
4705 {
4706 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4707 *pu32 |= (UINT32_C(1) << (iBitEnd & 31)) - 1;
4708 }
4709 }
4710 }
4711}
4712
4713
4714/**
4715 * Finds the first clear bit in a bitmap.
4716 *
4717 * @returns Index of the first zero bit.
4718 * @returns -1 if no clear bit was found.
4719 * @param pvBitmap Pointer to the bitmap.
4720 * @param cBits The number of bits in the bitmap. Multiple of 32.
4721 */
4722#if RT_INLINE_ASM_EXTERNAL
4723DECLASM(int32_t) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
4724#else
4725DECLINLINE(int32_t) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
4726{
4727 if (cBits)
4728 {
4729 int32_t iBit;
4730# if RT_INLINE_ASM_GNU_STYLE
4731 RTCCUINTREG uEAX, uECX, uEDI;
4732 cBits = RT_ALIGN_32(cBits, 32);
4733 __asm__ __volatile__("repe; scasl\n\t"
4734 "je 1f\n\t"
4735# ifdef RT_ARCH_AMD64
4736 "lea -4(%%rdi), %%rdi\n\t"
4737 "xorl (%%rdi), %%eax\n\t"
4738 "subq %5, %%rdi\n\t"
4739# else
4740 "lea -4(%%edi), %%edi\n\t"
4741 "xorl (%%edi), %%eax\n\t"
4742 "subl %5, %%edi\n\t"
4743# endif
4744 "shll $3, %%edi\n\t"
4745 "bsfl %%eax, %%edx\n\t"
4746 "addl %%edi, %%edx\n\t"
4747 "1:\t\n"
4748 : "=d" (iBit),
4749 "=&c" (uECX),
4750 "=&D" (uEDI),
4751 "=&a" (uEAX)
4752 : "0" (0xffffffff),
4753 "mr" (pvBitmap),
4754 "1" (cBits >> 5),
4755 "2" (pvBitmap),
4756 "3" (0xffffffff));
4757# else
4758 cBits = RT_ALIGN_32(cBits, 32);
4759 __asm
4760 {
4761# ifdef RT_ARCH_AMD64
4762 mov rdi, [pvBitmap]
4763 mov rbx, rdi
4764# else
4765 mov edi, [pvBitmap]
4766 mov ebx, edi
4767# endif
4768 mov edx, 0ffffffffh
4769 mov eax, edx
4770 mov ecx, [cBits]
4771 shr ecx, 5
4772 repe scasd
4773 je done
4774
4775# ifdef RT_ARCH_AMD64
4776 lea rdi, [rdi - 4]
4777 xor eax, [rdi]
4778 sub rdi, rbx
4779# else
4780 lea edi, [edi - 4]
4781 xor eax, [edi]
4782 sub edi, ebx
4783# endif
4784 shl edi, 3
4785 bsf edx, eax
4786 add edx, edi
4787 done:
4788 mov [iBit], edx
4789 }
4790# endif
4791 return iBit;
4792 }
4793 return -1;
4794}
4795#endif
4796
4797
4798/**
4799 * Finds the next clear bit in a bitmap.
4800 *
4801 * @returns Index of the first zero bit.
4802 * @returns -1 if no clear bit was found.
4803 * @param pvBitmap Pointer to the bitmap.
4804 * @param cBits The number of bits in the bitmap. Multiple of 32.
4805 * @param iBitPrev The bit returned from the last search.
4806 * The search will start at iBitPrev + 1.
4807 */
4808#if RT_INLINE_ASM_EXTERNAL
4809DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4810#else
4811DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4812{
4813 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4814 int iBit = ++iBitPrev & 31;
4815 if (iBit)
4816 {
4817 /*
4818 * Inspect the 32-bit word containing the unaligned bit.
4819 */
4820 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
4821
4822# if RT_INLINE_ASM_USES_INTRIN
4823 unsigned long ulBit = 0;
4824 if (_BitScanForward(&ulBit, u32))
4825 return ulBit + iBitPrev;
4826# else
4827# if RT_INLINE_ASM_GNU_STYLE
4828 __asm__ __volatile__("bsf %1, %0\n\t"
4829 "jnz 1f\n\t"
4830 "movl $-1, %0\n\t"
4831 "1:\n\t"
4832 : "=r" (iBit)
4833 : "r" (u32));
4834# else
4835 __asm
4836 {
4837 mov edx, [u32]
4838 bsf eax, edx
4839 jnz done
4840 mov eax, 0ffffffffh
4841 done:
4842 mov [iBit], eax
4843 }
4844# endif
4845 if (iBit >= 0)
4846 return iBit + iBitPrev;
4847# endif
4848
4849 /*
4850 * Skip ahead and see if there is anything left to search.
4851 */
4852 iBitPrev |= 31;
4853 iBitPrev++;
4854 if (cBits <= (uint32_t)iBitPrev)
4855 return -1;
4856 }
4857
4858 /*
4859 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4860 */
4861 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4862 if (iBit >= 0)
4863 iBit += iBitPrev;
4864 return iBit;
4865}
4866#endif
4867
4868
4869/**
4870 * Finds the first set bit in a bitmap.
4871 *
4872 * @returns Index of the first set bit.
4873 * @returns -1 if no clear bit was found.
4874 * @param pvBitmap Pointer to the bitmap.
4875 * @param cBits The number of bits in the bitmap. Multiple of 32.
4876 */
4877#if RT_INLINE_ASM_EXTERNAL
4878DECLASM(int32_t) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
4879#else
4880DECLINLINE(int32_t) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
4881{
4882 if (cBits)
4883 {
4884 int32_t iBit;
4885# if RT_INLINE_ASM_GNU_STYLE
4886 RTCCUINTREG uEAX, uECX, uEDI;
4887 cBits = RT_ALIGN_32(cBits, 32);
4888 __asm__ __volatile__("repe; scasl\n\t"
4889 "je 1f\n\t"
4890# ifdef RT_ARCH_AMD64
4891 "lea -4(%%rdi), %%rdi\n\t"
4892 "movl (%%rdi), %%eax\n\t"
4893 "subq %5, %%rdi\n\t"
4894# else
4895 "lea -4(%%edi), %%edi\n\t"
4896 "movl (%%edi), %%eax\n\t"
4897 "subl %5, %%edi\n\t"
4898# endif
4899 "shll $3, %%edi\n\t"
4900 "bsfl %%eax, %%edx\n\t"
4901 "addl %%edi, %%edx\n\t"
4902 "1:\t\n"
4903 : "=d" (iBit),
4904 "=&c" (uECX),
4905 "=&D" (uEDI),
4906 "=&a" (uEAX)
4907 : "0" (0xffffffff),
4908 "mr" (pvBitmap),
4909 "1" (cBits >> 5),
4910 "2" (pvBitmap),
4911 "3" (0));
4912# else
4913 cBits = RT_ALIGN_32(cBits, 32);
4914 __asm
4915 {
4916# ifdef RT_ARCH_AMD64
4917 mov rdi, [pvBitmap]
4918 mov rbx, rdi
4919# else
4920 mov edi, [pvBitmap]
4921 mov ebx, edi
4922# endif
4923 mov edx, 0ffffffffh
4924 xor eax, eax
4925 mov ecx, [cBits]
4926 shr ecx, 5
4927 repe scasd
4928 je done
4929# ifdef RT_ARCH_AMD64
4930 lea rdi, [rdi - 4]
4931 mov eax, [rdi]
4932 sub rdi, rbx
4933# else
4934 lea edi, [edi - 4]
4935 mov eax, [edi]
4936 sub edi, ebx
4937# endif
4938 shl edi, 3
4939 bsf edx, eax
4940 add edx, edi
4941 done:
4942 mov [iBit], edx
4943 }
4944# endif
4945 return iBit;
4946 }
4947 return -1;
4948}
4949#endif
4950
4951
4952/**
4953 * Finds the next set bit in a bitmap.
4954 *
4955 * @returns Index of the next set bit.
4956 * @returns -1 if no set bit was found.
4957 * @param pvBitmap Pointer to the bitmap.
4958 * @param cBits The number of bits in the bitmap. Multiple of 32.
4959 * @param iBitPrev The bit returned from the last search.
4960 * The search will start at iBitPrev + 1.
4961 */
4962#if RT_INLINE_ASM_EXTERNAL
4963DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4964#else
4965DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4966{
4967 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4968 int iBit = ++iBitPrev & 31;
4969 if (iBit)
4970 {
4971 /*
4972 * Inspect the 32-bit word containing the unaligned bit.
4973 */
4974 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
4975
4976# if RT_INLINE_ASM_USES_INTRIN
4977 unsigned long ulBit = 0;
4978 if (_BitScanForward(&ulBit, u32))
4979 return ulBit + iBitPrev;
4980# else
4981# if RT_INLINE_ASM_GNU_STYLE
4982 __asm__ __volatile__("bsf %1, %0\n\t"
4983 "jnz 1f\n\t"
4984 "movl $-1, %0\n\t"
4985 "1:\n\t"
4986 : "=r" (iBit)
4987 : "r" (u32));
4988# else
4989 __asm
4990 {
4991 mov edx, [u32]
4992 bsf eax, edx
4993 jnz done
4994 mov eax, 0ffffffffh
4995 done:
4996 mov [iBit], eax
4997 }
4998# endif
4999 if (iBit >= 0)
5000 return iBit + iBitPrev;
5001# endif
5002
5003 /*
5004 * Skip ahead and see if there is anything left to search.
5005 */
5006 iBitPrev |= 31;
5007 iBitPrev++;
5008 if (cBits <= (uint32_t)iBitPrev)
5009 return -1;
5010 }
5011
5012 /*
5013 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
5014 */
5015 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
5016 if (iBit >= 0)
5017 iBit += iBitPrev;
5018 return iBit;
5019}
5020#endif
5021
5022
5023/**
5024 * Finds the first bit which is set in the given 32-bit integer.
5025 * Bits are numbered from 1 (least significant) to 32.
5026 *
5027 * @returns index [1..32] of the first set bit.
5028 * @returns 0 if all bits are cleared.
5029 * @param u32 Integer to search for set bits.
5030 * @remarks Similar to ffs() in BSD.
5031 */
5032#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5033DECLASM(unsigned) ASMBitFirstSetU32(uint32_t u32);
5034#else
5035DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5036{
5037# if RT_INLINE_ASM_USES_INTRIN
5038 unsigned long iBit;
5039 if (_BitScanForward(&iBit, u32))
5040 iBit++;
5041 else
5042 iBit = 0;
5043# elif RT_INLINE_ASM_GNU_STYLE
5044 uint32_t iBit;
5045 __asm__ __volatile__("bsf %1, %0\n\t"
5046 "jnz 1f\n\t"
5047 "xorl %0, %0\n\t"
5048 "jmp 2f\n"
5049 "1:\n\t"
5050 "incl %0\n"
5051 "2:\n\t"
5052 : "=r" (iBit)
5053 : "rm" (u32));
5054# else
5055 uint32_t iBit;
5056 _asm
5057 {
5058 bsf eax, [u32]
5059 jnz found
5060 xor eax, eax
5061 jmp done
5062 found:
5063 inc eax
5064 done:
5065 mov [iBit], eax
5066 }
5067# endif
5068 return iBit;
5069}
5070#endif
5071
5072
5073/**
5074 * Finds the first bit which is set in the given 32-bit integer.
5075 * Bits are numbered from 1 (least significant) to 32.
5076 *
5077 * @returns index [1..32] of the first set bit.
5078 * @returns 0 if all bits are cleared.
5079 * @param i32 Integer to search for set bits.
5080 * @remark Similar to ffs() in BSD.
5081 */
5082DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5083{
5084 return ASMBitFirstSetU32((uint32_t)i32);
5085}
5086
5087
5088/**
5089 * Finds the first bit which is set in the given 64-bit integer.
5090 *
5091 * Bits are numbered from 1 (least significant) to 64.
5092 *
5093 * @returns index [1..64] of the first set bit.
5094 * @returns 0 if all bits are cleared.
5095 * @param u64 Integer to search for set bits.
5096 * @remarks Similar to ffs() in BSD.
5097 */
5098#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5099DECLASM(unsigned) ASMBitFirstSetU64(uint64_t u64);
5100#else
5101DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64)
5102{
5103# if RT_INLINE_ASM_USES_INTRIN
5104 unsigned long iBit;
5105# if ARCH_BITS == 64
5106 if (_BitScanForward64(&iBit, u64))
5107 iBit++;
5108 else
5109 iBit = 0;
5110# else
5111 if (_BitScanForward(&iBit, (uint32_t)u64))
5112 iBit++;
5113 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
5114 iBit += 33;
5115 else
5116 iBit = 0;
5117# endif
5118# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5119 uint64_t iBit;
5120 __asm__ __volatile__("bsfq %1, %0\n\t"
5121 "jnz 1f\n\t"
5122 "xorl %0, %0\n\t"
5123 "jmp 2f\n"
5124 "1:\n\t"
5125 "incl %0\n"
5126 "2:\n\t"
5127 : "=r" (iBit)
5128 : "rm" (u64));
5129# else
5130 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
5131 if (!iBit)
5132 {
5133 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
5134 if (iBit)
5135 iBit += 32;
5136 }
5137# endif
5138 return (unsigned)iBit;
5139}
5140#endif
5141
5142
5143/**
5144 * Finds the first bit which is set in the given 16-bit integer.
5145 *
5146 * Bits are numbered from 1 (least significant) to 16.
5147 *
5148 * @returns index [1..16] of the first set bit.
5149 * @returns 0 if all bits are cleared.
5150 * @param u16 Integer to search for set bits.
5151 * @remarks For 16-bit bs3kit code.
5152 */
5153#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5154DECLASM(unsigned) ASMBitFirstSetU16(uint16_t u16);
5155#else
5156DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16)
5157{
5158 return ASMBitFirstSetU32((uint32_t)u16);
5159}
5160#endif
5161
5162
5163/**
5164 * Finds the last bit which is set in the given 32-bit integer.
5165 * Bits are numbered from 1 (least significant) to 32.
5166 *
5167 * @returns index [1..32] of the last set bit.
5168 * @returns 0 if all bits are cleared.
5169 * @param u32 Integer to search for set bits.
5170 * @remark Similar to fls() in BSD.
5171 */
5172#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5173DECLASM(unsigned) ASMBitLastSetU32(uint32_t u32);
5174#else
5175DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5176{
5177# if RT_INLINE_ASM_USES_INTRIN
5178 unsigned long iBit;
5179 if (_BitScanReverse(&iBit, u32))
5180 iBit++;
5181 else
5182 iBit = 0;
5183# elif RT_INLINE_ASM_GNU_STYLE
5184 uint32_t iBit;
5185 __asm__ __volatile__("bsrl %1, %0\n\t"
5186 "jnz 1f\n\t"
5187 "xorl %0, %0\n\t"
5188 "jmp 2f\n"
5189 "1:\n\t"
5190 "incl %0\n"
5191 "2:\n\t"
5192 : "=r" (iBit)
5193 : "rm" (u32));
5194# else
5195 uint32_t iBit;
5196 _asm
5197 {
5198 bsr eax, [u32]
5199 jnz found
5200 xor eax, eax
5201 jmp done
5202 found:
5203 inc eax
5204 done:
5205 mov [iBit], eax
5206 }
5207# endif
5208 return iBit;
5209}
5210#endif
5211
5212
5213/**
5214 * Finds the last bit which is set in the given 32-bit integer.
5215 * Bits are numbered from 1 (least significant) to 32.
5216 *
5217 * @returns index [1..32] of the last set bit.
5218 * @returns 0 if all bits are cleared.
5219 * @param i32 Integer to search for set bits.
5220 * @remark Similar to fls() in BSD.
5221 */
5222DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5223{
5224 return ASMBitLastSetU32((uint32_t)i32);
5225}
5226
5227
5228/**
5229 * Finds the last bit which is set in the given 64-bit integer.
5230 *
5231 * Bits are numbered from 1 (least significant) to 64.
5232 *
5233 * @returns index [1..64] of the last set bit.
5234 * @returns 0 if all bits are cleared.
5235 * @param u64 Integer to search for set bits.
5236 * @remark Similar to fls() in BSD.
5237 */
5238#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5239DECLASM(unsigned) ASMBitLastSetU64(uint64_t u64);
5240#else
5241DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64)
5242{
5243# if RT_INLINE_ASM_USES_INTRIN
5244 unsigned long iBit;
5245# if ARCH_BITS == 64
5246 if (_BitScanReverse64(&iBit, u64))
5247 iBit++;
5248 else
5249 iBit = 0;
5250# else
5251 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
5252 iBit += 33;
5253 else if (_BitScanReverse(&iBit, (uint32_t)u64))
5254 iBit++;
5255 else
5256 iBit = 0;
5257# endif
5258# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5259 uint64_t iBit;
5260 __asm__ __volatile__("bsrq %1, %0\n\t"
5261 "jnz 1f\n\t"
5262 "xorl %0, %0\n\t"
5263 "jmp 2f\n"
5264 "1:\n\t"
5265 "incl %0\n"
5266 "2:\n\t"
5267 : "=r" (iBit)
5268 : "rm" (u64));
5269# else
5270 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
5271 if (iBit)
5272 iBit += 32;
5273 else
5274 iBit = ASMBitLastSetU32((uint32_t)u64);
5275#endif
5276 return (unsigned)iBit;
5277}
5278#endif
5279
5280
5281/**
5282 * Finds the last bit which is set in the given 16-bit integer.
5283 *
5284 * Bits are numbered from 1 (least significant) to 16.
5285 *
5286 * @returns index [1..16] of the last set bit.
5287 * @returns 0 if all bits are cleared.
5288 * @param u16 Integer to search for set bits.
5289 * @remarks For 16-bit bs3kit code.
5290 */
5291#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5292DECLASM(unsigned) ASMBitLastSetU16(uint16_t u16);
5293#else
5294DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16)
5295{
5296 return ASMBitLastSetU32((uint32_t)u16);
5297}
5298#endif
5299
5300
5301/**
5302 * Reverse the byte order of the given 16-bit integer.
5303 *
5304 * @returns Revert
5305 * @param u16 16-bit integer value.
5306 */
5307#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5308DECLASM(uint16_t) ASMByteSwapU16(uint16_t u16);
5309#else
5310DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
5311{
5312# if RT_INLINE_ASM_USES_INTRIN
5313 u16 = _byteswap_ushort(u16);
5314# elif RT_INLINE_ASM_GNU_STYLE
5315 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
5316# else
5317 _asm
5318 {
5319 mov ax, [u16]
5320 ror ax, 8
5321 mov [u16], ax
5322 }
5323# endif
5324 return u16;
5325}
5326#endif
5327
5328
5329/**
5330 * Reverse the byte order of the given 32-bit integer.
5331 *
5332 * @returns Revert
5333 * @param u32 32-bit integer value.
5334 */
5335#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5336DECLASM(uint32_t) ASMByteSwapU32(uint32_t u32);
5337#else
5338DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5339{
5340# if RT_INLINE_ASM_USES_INTRIN
5341 u32 = _byteswap_ulong(u32);
5342# elif RT_INLINE_ASM_GNU_STYLE
5343 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5344# else
5345 _asm
5346 {
5347 mov eax, [u32]
5348 bswap eax
5349 mov [u32], eax
5350 }
5351# endif
5352 return u32;
5353}
5354#endif
5355
5356
5357/**
5358 * Reverse the byte order of the given 64-bit integer.
5359 *
5360 * @returns Revert
5361 * @param u64 64-bit integer value.
5362 */
5363DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
5364{
5365#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
5366 u64 = _byteswap_uint64(u64);
5367#else
5368 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
5369 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
5370#endif
5371 return u64;
5372}
5373
5374
5375/**
5376 * Rotate 32-bit unsigned value to the left by @a cShift.
5377 *
5378 * @returns Rotated value.
5379 * @param u32 The value to rotate.
5380 * @param cShift How many bits to rotate by.
5381 */
5382#ifdef __WATCOMC__
5383DECLASM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift);
5384#else
5385DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift)
5386{
5387# if RT_INLINE_ASM_USES_INTRIN
5388 return _rotl(u32, cShift);
5389# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5390 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5391 return u32;
5392# else
5393 cShift &= 31;
5394 return (u32 << cShift) | (u32 >> (32 - cShift));
5395# endif
5396}
5397#endif
5398
5399
5400/**
5401 * Rotate 32-bit unsigned value to the right by @a cShift.
5402 *
5403 * @returns Rotated value.
5404 * @param u32 The value to rotate.
5405 * @param cShift How many bits to rotate by.
5406 */
5407#ifdef __WATCOMC__
5408DECLASM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift);
5409#else
5410DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift)
5411{
5412# if RT_INLINE_ASM_USES_INTRIN
5413 return _rotr(u32, cShift);
5414# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5415 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5416 return u32;
5417# else
5418 cShift &= 31;
5419 return (u32 >> cShift) | (u32 << (32 - cShift));
5420# endif
5421}
5422#endif
5423
5424
5425/**
5426 * Rotate 64-bit unsigned value to the left by @a cShift.
5427 *
5428 * @returns Rotated value.
5429 * @param u64 The value to rotate.
5430 * @param cShift How many bits to rotate by.
5431 */
5432DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift)
5433{
5434#if RT_INLINE_ASM_USES_INTRIN
5435 return _rotl64(u64, cShift);
5436#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5437 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5438 return u64;
5439#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5440 uint32_t uSpill;
5441 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5442 "jz 1f\n\t"
5443 "xchgl %%eax, %%edx\n\t"
5444 "1:\n\t"
5445 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5446 "jz 2f\n\t"
5447 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5448 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
5449 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
5450 "2:\n\t" /* } */
5451 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5452 : "0" (u64),
5453 "1" (cShift));
5454 return u64;
5455#else
5456 cShift &= 63;
5457 return (u64 << cShift) | (u64 >> (64 - cShift));
5458#endif
5459}
5460
5461
5462/**
5463 * Rotate 64-bit unsigned value to the right by @a cShift.
5464 *
5465 * @returns Rotated value.
5466 * @param u64 The value to rotate.
5467 * @param cShift How many bits to rotate by.
5468 */
5469DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift)
5470{
5471#if RT_INLINE_ASM_USES_INTRIN
5472 return _rotr64(u64, cShift);
5473#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5474 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5475 return u64;
5476#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5477 uint32_t uSpill;
5478 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5479 "jz 1f\n\t"
5480 "xchgl %%eax, %%edx\n\t"
5481 "1:\n\t"
5482 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5483 "jz 2f\n\t"
5484 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5485 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
5486 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
5487 "2:\n\t" /* } */
5488 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5489 : "0" (u64),
5490 "1" (cShift));
5491 return u64;
5492#else
5493 cShift &= 63;
5494 return (u64 >> cShift) | (u64 << (64 - cShift));
5495#endif
5496}
5497
5498/** @} */
5499
5500
5501/** @} */
5502
5503#endif
5504
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette