VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 62534

Last change on this file since 62534 was 62473, checked in by vboxsync, 8 years ago

(C) 2016

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 157.9 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2016 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37/* Solaris 10 header ugliness */
38#ifdef u
39# undef u
40#endif
41
42#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
43# include <intrin.h>
44 /* Emit the intrinsics at all optimization levels. */
45# pragma intrinsic(_ReadWriteBarrier)
46# pragma intrinsic(__cpuid)
47# pragma intrinsic(__stosd)
48# pragma intrinsic(__stosw)
49# pragma intrinsic(__stosb)
50# pragma intrinsic(_BitScanForward)
51# pragma intrinsic(_BitScanReverse)
52# pragma intrinsic(_bittest)
53# pragma intrinsic(_bittestandset)
54# pragma intrinsic(_bittestandreset)
55# pragma intrinsic(_bittestandcomplement)
56# pragma intrinsic(_byteswap_ushort)
57# pragma intrinsic(_byteswap_ulong)
58# pragma intrinsic(_interlockedbittestandset)
59# pragma intrinsic(_interlockedbittestandreset)
60# pragma intrinsic(_InterlockedAnd)
61# pragma intrinsic(_InterlockedOr)
62# pragma intrinsic(_InterlockedIncrement)
63# pragma intrinsic(_InterlockedDecrement)
64# pragma intrinsic(_InterlockedExchange)
65# pragma intrinsic(_InterlockedExchangeAdd)
66# pragma intrinsic(_InterlockedCompareExchange)
67# pragma intrinsic(_InterlockedCompareExchange64)
68# pragma intrinsic(_rotl)
69# pragma intrinsic(_rotr)
70# pragma intrinsic(_rotl64)
71# pragma intrinsic(_rotr64)
72# ifdef RT_ARCH_AMD64
73# pragma intrinsic(__stosq)
74# pragma intrinsic(_byteswap_uint64)
75# pragma intrinsic(_InterlockedExchange64)
76# pragma intrinsic(_InterlockedExchangeAdd64)
77# pragma intrinsic(_InterlockedAnd64)
78# pragma intrinsic(_InterlockedOr64)
79# pragma intrinsic(_InterlockedIncrement64)
80# pragma intrinsic(_InterlockedDecrement64)
81# endif
82#endif
83
84/*
85 * Include #pragma aux definitions for Watcom C/C++.
86 */
87#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
88# include "asm-watcom-x86-16.h"
89#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
90# include "asm-watcom-x86-32.h"
91#endif
92
93
94
95/** @defgroup grp_rt_asm ASM - Assembly Routines
96 * @ingroup grp_rt
97 *
98 * @remarks The difference between ordered and unordered atomic operations are that
99 * the former will complete outstanding reads and writes before continuing
100 * while the latter doesn't make any promises about the order. Ordered
101 * operations doesn't, it seems, make any 100% promise wrt to whether
102 * the operation will complete before any subsequent memory access.
103 * (please, correct if wrong.)
104 *
105 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
106 * are unordered (note the Uo).
107 *
108 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
109 * or even optimize assembler instructions away. For instance, in the following code
110 * the second rdmsr instruction is optimized away because gcc treats that instruction
111 * as deterministic:
112 *
113 * @code
114 * static inline uint64_t rdmsr_low(int idx)
115 * {
116 * uint32_t low;
117 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
118 * }
119 * ...
120 * uint32_t msr1 = rdmsr_low(1);
121 * foo(msr1);
122 * msr1 = rdmsr_low(1);
123 * bar(msr1);
124 * @endcode
125 *
126 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
127 * use the result of the first call as input parameter for bar() as well. For rdmsr this
128 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
129 * machine status information in general.
130 *
131 * @{
132 */
133
134
135/** @def RT_INLINE_ASM_GCC_4_3_X_X86
136 * Used to work around some 4.3.x register allocation issues in this version of
137 * the compiler. So far this workaround is still required for 4.4 and 4.5 but
138 * definitely not for 5.x */
139#define RT_INLINE_ASM_GCC_4_3_X_X86 (RT_GNUC_PREREQ(4, 3) && !RT_GNUC_PREREQ(5, 0) && defined(__i386__))
140#ifndef RT_INLINE_ASM_GCC_4_3_X_X86
141# define RT_INLINE_ASM_GCC_4_3_X_X86 0
142#endif
143
144/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
145 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
146 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
147 * mode, x86.
148 *
149 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
150 * when in PIC mode on x86.
151 */
152#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
153# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
154# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
155# else
156# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC \
157 ( (defined(PIC) || defined(__PIC__)) \
158 && defined(RT_ARCH_X86) \
159 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
160 || defined(RT_OS_DARWIN)) )
161# endif
162#endif
163
164
165/** @def ASMReturnAddress
166 * Gets the return address of the current (or calling if you like) function or method.
167 */
168#ifdef _MSC_VER
169# ifdef __cplusplus
170extern "C"
171# endif
172void * _ReturnAddress(void);
173# pragma intrinsic(_ReturnAddress)
174# define ASMReturnAddress() _ReturnAddress()
175#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
176# define ASMReturnAddress() __builtin_return_address(0)
177#elif defined(__WATCOMC__)
178# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
179#else
180# error "Unsupported compiler."
181#endif
182
183
184/**
185 * Compiler memory barrier.
186 *
187 * Ensure that the compiler does not use any cached (register/tmp stack) memory
188 * values or any outstanding writes when returning from this function.
189 *
190 * This function must be used if non-volatile data is modified by a
191 * device or the VMM. Typical cases are port access, MMIO access,
192 * trapping instruction, etc.
193 */
194#if RT_INLINE_ASM_GNU_STYLE
195# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
196#elif RT_INLINE_ASM_USES_INTRIN
197# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
198#elif defined(__WATCOMC__)
199void ASMCompilerBarrier(void);
200#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
201DECLINLINE(void) ASMCompilerBarrier(void)
202{
203 __asm
204 {
205 }
206}
207#endif
208
209
210/** @def ASMBreakpoint
211 * Debugger Breakpoint.
212 * @deprecated Use RT_BREAKPOINT instead.
213 * @internal
214 */
215#define ASMBreakpoint() RT_BREAKPOINT()
216
217
218/**
219 * Spinloop hint for platforms that have these, empty function on the other
220 * platforms.
221 *
222 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
223 * spin locks.
224 */
225#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
226DECLASM(void) ASMNopPause(void);
227#else
228DECLINLINE(void) ASMNopPause(void)
229{
230# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
231# if RT_INLINE_ASM_GNU_STYLE
232 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
233# else
234 __asm {
235 _emit 0f3h
236 _emit 090h
237 }
238# endif
239# else
240 /* dummy */
241# endif
242}
243#endif
244
245
246/**
247 * Atomically Exchange an unsigned 8-bit value, ordered.
248 *
249 * @returns Current *pu8 value
250 * @param pu8 Pointer to the 8-bit variable to update.
251 * @param u8 The 8-bit value to assign to *pu8.
252 */
253#if RT_INLINE_ASM_EXTERNAL
254DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
255#else
256DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
257{
258# if RT_INLINE_ASM_GNU_STYLE
259 __asm__ __volatile__("xchgb %0, %1\n\t"
260 : "=m" (*pu8),
261 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
262 : "1" (u8),
263 "m" (*pu8));
264# else
265 __asm
266 {
267# ifdef RT_ARCH_AMD64
268 mov rdx, [pu8]
269 mov al, [u8]
270 xchg [rdx], al
271 mov [u8], al
272# else
273 mov edx, [pu8]
274 mov al, [u8]
275 xchg [edx], al
276 mov [u8], al
277# endif
278 }
279# endif
280 return u8;
281}
282#endif
283
284
285/**
286 * Atomically Exchange a signed 8-bit value, ordered.
287 *
288 * @returns Current *pu8 value
289 * @param pi8 Pointer to the 8-bit variable to update.
290 * @param i8 The 8-bit value to assign to *pi8.
291 */
292DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
293{
294 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
295}
296
297
298/**
299 * Atomically Exchange a bool value, ordered.
300 *
301 * @returns Current *pf value
302 * @param pf Pointer to the 8-bit variable to update.
303 * @param f The 8-bit value to assign to *pi8.
304 */
305DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
306{
307#ifdef _MSC_VER
308 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
309#else
310 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
311#endif
312}
313
314
315/**
316 * Atomically Exchange an unsigned 16-bit value, ordered.
317 *
318 * @returns Current *pu16 value
319 * @param pu16 Pointer to the 16-bit variable to update.
320 * @param u16 The 16-bit value to assign to *pu16.
321 */
322#if RT_INLINE_ASM_EXTERNAL
323DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
324#else
325DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
326{
327# if RT_INLINE_ASM_GNU_STYLE
328 __asm__ __volatile__("xchgw %0, %1\n\t"
329 : "=m" (*pu16),
330 "=r" (u16)
331 : "1" (u16),
332 "m" (*pu16));
333# else
334 __asm
335 {
336# ifdef RT_ARCH_AMD64
337 mov rdx, [pu16]
338 mov ax, [u16]
339 xchg [rdx], ax
340 mov [u16], ax
341# else
342 mov edx, [pu16]
343 mov ax, [u16]
344 xchg [edx], ax
345 mov [u16], ax
346# endif
347 }
348# endif
349 return u16;
350}
351#endif
352
353
354/**
355 * Atomically Exchange a signed 16-bit value, ordered.
356 *
357 * @returns Current *pu16 value
358 * @param pi16 Pointer to the 16-bit variable to update.
359 * @param i16 The 16-bit value to assign to *pi16.
360 */
361DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
362{
363 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
364}
365
366
367/**
368 * Atomically Exchange an unsigned 32-bit value, ordered.
369 *
370 * @returns Current *pu32 value
371 * @param pu32 Pointer to the 32-bit variable to update.
372 * @param u32 The 32-bit value to assign to *pu32.
373 *
374 * @remarks Does not work on 286 and earlier.
375 */
376#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
377DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
378#else
379DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
380{
381# if RT_INLINE_ASM_GNU_STYLE
382 __asm__ __volatile__("xchgl %0, %1\n\t"
383 : "=m" (*pu32),
384 "=r" (u32)
385 : "1" (u32),
386 "m" (*pu32));
387
388# elif RT_INLINE_ASM_USES_INTRIN
389 u32 = _InterlockedExchange((long *)pu32, u32);
390
391# else
392 __asm
393 {
394# ifdef RT_ARCH_AMD64
395 mov rdx, [pu32]
396 mov eax, u32
397 xchg [rdx], eax
398 mov [u32], eax
399# else
400 mov edx, [pu32]
401 mov eax, u32
402 xchg [edx], eax
403 mov [u32], eax
404# endif
405 }
406# endif
407 return u32;
408}
409#endif
410
411
412/**
413 * Atomically Exchange a signed 32-bit value, ordered.
414 *
415 * @returns Current *pu32 value
416 * @param pi32 Pointer to the 32-bit variable to update.
417 * @param i32 The 32-bit value to assign to *pi32.
418 */
419DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
420{
421 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
422}
423
424
425/**
426 * Atomically Exchange an unsigned 64-bit value, ordered.
427 *
428 * @returns Current *pu64 value
429 * @param pu64 Pointer to the 64-bit variable to update.
430 * @param u64 The 64-bit value to assign to *pu64.
431 *
432 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
433 */
434#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
435 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
436DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
437#else
438DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
439{
440# if defined(RT_ARCH_AMD64)
441# if RT_INLINE_ASM_USES_INTRIN
442 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
443
444# elif RT_INLINE_ASM_GNU_STYLE
445 __asm__ __volatile__("xchgq %0, %1\n\t"
446 : "=m" (*pu64),
447 "=r" (u64)
448 : "1" (u64),
449 "m" (*pu64));
450# else
451 __asm
452 {
453 mov rdx, [pu64]
454 mov rax, [u64]
455 xchg [rdx], rax
456 mov [u64], rax
457 }
458# endif
459# else /* !RT_ARCH_AMD64 */
460# if RT_INLINE_ASM_GNU_STYLE
461# if defined(PIC) || defined(__PIC__)
462 uint32_t u32EBX = (uint32_t)u64;
463 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
464 "xchgl %%ebx, %3\n\t"
465 "1:\n\t"
466 "lock; cmpxchg8b (%5)\n\t"
467 "jnz 1b\n\t"
468 "movl %3, %%ebx\n\t"
469 /*"xchgl %%esi, %5\n\t"*/
470 : "=A" (u64),
471 "=m" (*pu64)
472 : "0" (*pu64),
473 "m" ( u32EBX ),
474 "c" ( (uint32_t)(u64 >> 32) ),
475 "S" (pu64));
476# else /* !PIC */
477 __asm__ __volatile__("1:\n\t"
478 "lock; cmpxchg8b %1\n\t"
479 "jnz 1b\n\t"
480 : "=A" (u64),
481 "=m" (*pu64)
482 : "0" (*pu64),
483 "b" ( (uint32_t)u64 ),
484 "c" ( (uint32_t)(u64 >> 32) ));
485# endif
486# else
487 __asm
488 {
489 mov ebx, dword ptr [u64]
490 mov ecx, dword ptr [u64 + 4]
491 mov edi, pu64
492 mov eax, dword ptr [edi]
493 mov edx, dword ptr [edi + 4]
494 retry:
495 lock cmpxchg8b [edi]
496 jnz retry
497 mov dword ptr [u64], eax
498 mov dword ptr [u64 + 4], edx
499 }
500# endif
501# endif /* !RT_ARCH_AMD64 */
502 return u64;
503}
504#endif
505
506
507/**
508 * Atomically Exchange an signed 64-bit value, ordered.
509 *
510 * @returns Current *pi64 value
511 * @param pi64 Pointer to the 64-bit variable to update.
512 * @param i64 The 64-bit value to assign to *pi64.
513 */
514DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
515{
516 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
517}
518
519
520/**
521 * Atomically Exchange a pointer value, ordered.
522 *
523 * @returns Current *ppv value
524 * @param ppv Pointer to the pointer variable to update.
525 * @param pv The pointer value to assign to *ppv.
526 */
527DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
528{
529#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
530 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
531#elif ARCH_BITS == 64
532 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
533#else
534# error "ARCH_BITS is bogus"
535#endif
536}
537
538
539/**
540 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
541 *
542 * @returns Current *pv value
543 * @param ppv Pointer to the pointer variable to update.
544 * @param pv The pointer value to assign to *ppv.
545 * @param Type The type of *ppv, sans volatile.
546 */
547#ifdef __GNUC__
548# define ASMAtomicXchgPtrT(ppv, pv, Type) \
549 __extension__ \
550 ({\
551 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
552 Type const pvTypeChecked = (pv); \
553 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
554 pvTypeCheckedRet; \
555 })
556#else
557# define ASMAtomicXchgPtrT(ppv, pv, Type) \
558 (Type)ASMAtomicXchgPtr((void * volatile *)(ppv), (void *)(pv))
559#endif
560
561
562/**
563 * Atomically Exchange a raw-mode context pointer value, ordered.
564 *
565 * @returns Current *ppv value
566 * @param ppvRC Pointer to the pointer variable to update.
567 * @param pvRC The pointer value to assign to *ppv.
568 */
569DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
570{
571 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
572}
573
574
575/**
576 * Atomically Exchange a ring-0 pointer value, ordered.
577 *
578 * @returns Current *ppv value
579 * @param ppvR0 Pointer to the pointer variable to update.
580 * @param pvR0 The pointer value to assign to *ppv.
581 */
582DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
583{
584#if R0_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
585 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
586#elif R0_ARCH_BITS == 64
587 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
588#else
589# error "R0_ARCH_BITS is bogus"
590#endif
591}
592
593
594/**
595 * Atomically Exchange a ring-3 pointer value, ordered.
596 *
597 * @returns Current *ppv value
598 * @param ppvR3 Pointer to the pointer variable to update.
599 * @param pvR3 The pointer value to assign to *ppv.
600 */
601DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
602{
603#if R3_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
604 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
605#elif R3_ARCH_BITS == 64
606 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
607#else
608# error "R3_ARCH_BITS is bogus"
609#endif
610}
611
612
613/** @def ASMAtomicXchgHandle
614 * Atomically Exchange a typical IPRT handle value, ordered.
615 *
616 * @param ph Pointer to the value to update.
617 * @param hNew The new value to assigned to *pu.
618 * @param phRes Where to store the current *ph value.
619 *
620 * @remarks This doesn't currently work for all handles (like RTFILE).
621 */
622#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
623# define ASMAtomicXchgHandle(ph, hNew, phRes) \
624 do { \
625 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
626 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
627 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
628 } while (0)
629#elif HC_ARCH_BITS == 64
630# define ASMAtomicXchgHandle(ph, hNew, phRes) \
631 do { \
632 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
633 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
634 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
635 } while (0)
636#else
637# error HC_ARCH_BITS
638#endif
639
640
641/**
642 * Atomically Exchange a value which size might differ
643 * between platforms or compilers, ordered.
644 *
645 * @param pu Pointer to the variable to update.
646 * @param uNew The value to assign to *pu.
647 * @todo This is busted as its missing the result argument.
648 */
649#define ASMAtomicXchgSize(pu, uNew) \
650 do { \
651 switch (sizeof(*(pu))) { \
652 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
653 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
654 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
655 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
656 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
657 } \
658 } while (0)
659
660/**
661 * Atomically Exchange a value which size might differ
662 * between platforms or compilers, ordered.
663 *
664 * @param pu Pointer to the variable to update.
665 * @param uNew The value to assign to *pu.
666 * @param puRes Where to store the current *pu value.
667 */
668#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
669 do { \
670 switch (sizeof(*(pu))) { \
671 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
672 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
673 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
674 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
675 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
676 } \
677 } while (0)
678
679
680
681/**
682 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
683 *
684 * @returns true if xchg was done.
685 * @returns false if xchg wasn't done.
686 *
687 * @param pu8 Pointer to the value to update.
688 * @param u8New The new value to assigned to *pu8.
689 * @param u8Old The old value to *pu8 compare with.
690 *
691 * @remarks x86: Requires a 486 or later.
692 */
693#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
694DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, const uint8_t u8Old);
695#else
696DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, uint8_t u8Old)
697{
698 uint8_t u8Ret;
699 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
700 "setz %1\n\t"
701 : "=m" (*pu8),
702 "=qm" (u8Ret),
703 "=a" (u8Old)
704 : "q" (u8New),
705 "2" (u8Old),
706 "m" (*pu8));
707 return (bool)u8Ret;
708}
709#endif
710
711
712/**
713 * Atomically Compare and Exchange a signed 8-bit value, ordered.
714 *
715 * @returns true if xchg was done.
716 * @returns false if xchg wasn't done.
717 *
718 * @param pi8 Pointer to the value to update.
719 * @param i8New The new value to assigned to *pi8.
720 * @param i8Old The old value to *pi8 compare with.
721 *
722 * @remarks x86: Requires a 486 or later.
723 */
724DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t *pi8, const int8_t i8New, const int8_t i8Old)
725{
726 return ASMAtomicCmpXchgU8((volatile uint8_t *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old);
727}
728
729
730/**
731 * Atomically Compare and Exchange a bool value, ordered.
732 *
733 * @returns true if xchg was done.
734 * @returns false if xchg wasn't done.
735 *
736 * @param pf Pointer to the value to update.
737 * @param fNew The new value to assigned to *pf.
738 * @param fOld The old value to *pf compare with.
739 *
740 * @remarks x86: Requires a 486 or later.
741 */
742DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool *pf, const bool fNew, const bool fOld)
743{
744 return ASMAtomicCmpXchgU8((volatile uint8_t *)pf, (const uint8_t)fNew, (const uint8_t)fOld);
745}
746
747
748/**
749 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
750 *
751 * @returns true if xchg was done.
752 * @returns false if xchg wasn't done.
753 *
754 * @param pu32 Pointer to the value to update.
755 * @param u32New The new value to assigned to *pu32.
756 * @param u32Old The old value to *pu32 compare with.
757 *
758 * @remarks x86: Requires a 486 or later.
759 */
760#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
761DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
762#else
763DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
764{
765# if RT_INLINE_ASM_GNU_STYLE
766 uint8_t u8Ret;
767 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
768 "setz %1\n\t"
769 : "=m" (*pu32),
770 "=qm" (u8Ret),
771 "=a" (u32Old)
772 : "r" (u32New),
773 "2" (u32Old),
774 "m" (*pu32));
775 return (bool)u8Ret;
776
777# elif RT_INLINE_ASM_USES_INTRIN
778 return (uint32_t)_InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
779
780# else
781 uint32_t u32Ret;
782 __asm
783 {
784# ifdef RT_ARCH_AMD64
785 mov rdx, [pu32]
786# else
787 mov edx, [pu32]
788# endif
789 mov eax, [u32Old]
790 mov ecx, [u32New]
791# ifdef RT_ARCH_AMD64
792 lock cmpxchg [rdx], ecx
793# else
794 lock cmpxchg [edx], ecx
795# endif
796 setz al
797 movzx eax, al
798 mov [u32Ret], eax
799 }
800 return !!u32Ret;
801# endif
802}
803#endif
804
805
806/**
807 * Atomically Compare and Exchange a signed 32-bit value, ordered.
808 *
809 * @returns true if xchg was done.
810 * @returns false if xchg wasn't done.
811 *
812 * @param pi32 Pointer to the value to update.
813 * @param i32New The new value to assigned to *pi32.
814 * @param i32Old The old value to *pi32 compare with.
815 *
816 * @remarks x86: Requires a 486 or later.
817 */
818DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
819{
820 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
821}
822
823
824/**
825 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
826 *
827 * @returns true if xchg was done.
828 * @returns false if xchg wasn't done.
829 *
830 * @param pu64 Pointer to the 64-bit variable to update.
831 * @param u64New The 64-bit value to assign to *pu64.
832 * @param u64Old The value to compare with.
833 *
834 * @remarks x86: Requires a Pentium or later.
835 */
836#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
837 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
838DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
839#else
840DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, uint64_t u64New, uint64_t u64Old)
841{
842# if RT_INLINE_ASM_USES_INTRIN
843 return (uint64_t)_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
844
845# elif defined(RT_ARCH_AMD64)
846# if RT_INLINE_ASM_GNU_STYLE
847 uint8_t u8Ret;
848 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
849 "setz %1\n\t"
850 : "=m" (*pu64),
851 "=qm" (u8Ret),
852 "=a" (u64Old)
853 : "r" (u64New),
854 "2" (u64Old),
855 "m" (*pu64));
856 return (bool)u8Ret;
857# else
858 bool fRet;
859 __asm
860 {
861 mov rdx, [pu32]
862 mov rax, [u64Old]
863 mov rcx, [u64New]
864 lock cmpxchg [rdx], rcx
865 setz al
866 mov [fRet], al
867 }
868 return fRet;
869# endif
870# else /* !RT_ARCH_AMD64 */
871 uint32_t u32Ret;
872# if RT_INLINE_ASM_GNU_STYLE
873# if defined(PIC) || defined(__PIC__)
874 uint32_t u32EBX = (uint32_t)u64New;
875 uint32_t u32Spill;
876 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
877 "lock; cmpxchg8b (%6)\n\t"
878 "setz %%al\n\t"
879 "movl %4, %%ebx\n\t"
880 "movzbl %%al, %%eax\n\t"
881 : "=a" (u32Ret),
882 "=d" (u32Spill),
883# if RT_GNUC_PREREQ(4, 3)
884 "+m" (*pu64)
885# else
886 "=m" (*pu64)
887# endif
888 : "A" (u64Old),
889 "m" ( u32EBX ),
890 "c" ( (uint32_t)(u64New >> 32) ),
891 "S" (pu64));
892# else /* !PIC */
893 uint32_t u32Spill;
894 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
895 "setz %%al\n\t"
896 "movzbl %%al, %%eax\n\t"
897 : "=a" (u32Ret),
898 "=d" (u32Spill),
899 "+m" (*pu64)
900 : "A" (u64Old),
901 "b" ( (uint32_t)u64New ),
902 "c" ( (uint32_t)(u64New >> 32) ));
903# endif
904 return (bool)u32Ret;
905# else
906 __asm
907 {
908 mov ebx, dword ptr [u64New]
909 mov ecx, dword ptr [u64New + 4]
910 mov edi, [pu64]
911 mov eax, dword ptr [u64Old]
912 mov edx, dword ptr [u64Old + 4]
913 lock cmpxchg8b [edi]
914 setz al
915 movzx eax, al
916 mov dword ptr [u32Ret], eax
917 }
918 return !!u32Ret;
919# endif
920# endif /* !RT_ARCH_AMD64 */
921}
922#endif
923
924
925/**
926 * Atomically Compare and exchange a signed 64-bit value, ordered.
927 *
928 * @returns true if xchg was done.
929 * @returns false if xchg wasn't done.
930 *
931 * @param pi64 Pointer to the 64-bit variable to update.
932 * @param i64 The 64-bit value to assign to *pu64.
933 * @param i64Old The value to compare with.
934 *
935 * @remarks x86: Requires a Pentium or later.
936 */
937DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
938{
939 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
940}
941
942
943/**
944 * Atomically Compare and Exchange a pointer value, ordered.
945 *
946 * @returns true if xchg was done.
947 * @returns false if xchg wasn't done.
948 *
949 * @param ppv Pointer to the value to update.
950 * @param pvNew The new value to assigned to *ppv.
951 * @param pvOld The old value to *ppv compare with.
952 *
953 * @remarks x86: Requires a 486 or later.
954 */
955DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld)
956{
957#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
958 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
959#elif ARCH_BITS == 64
960 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
961#else
962# error "ARCH_BITS is bogus"
963#endif
964}
965
966
967/**
968 * Atomically Compare and Exchange a pointer value, ordered.
969 *
970 * @returns true if xchg was done.
971 * @returns false if xchg wasn't done.
972 *
973 * @param ppv Pointer to the value to update.
974 * @param pvNew The new value to assigned to *ppv.
975 * @param pvOld The old value to *ppv compare with.
976 *
977 * @remarks This is relatively type safe on GCC platforms.
978 * @remarks x86: Requires a 486 or later.
979 */
980#ifdef __GNUC__
981# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
982 __extension__ \
983 ({\
984 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
985 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
986 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
987 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
988 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
989 fMacroRet; \
990 })
991#else
992# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
993 ASMAtomicCmpXchgPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld))
994#endif
995
996
997/** @def ASMAtomicCmpXchgHandle
998 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
999 *
1000 * @param ph Pointer to the value to update.
1001 * @param hNew The new value to assigned to *pu.
1002 * @param hOld The old value to *pu compare with.
1003 * @param fRc Where to store the result.
1004 *
1005 * @remarks This doesn't currently work for all handles (like RTFILE).
1006 * @remarks x86: Requires a 486 or later.
1007 */
1008#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1009# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1010 do { \
1011 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1012 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1013 } while (0)
1014#elif HC_ARCH_BITS == 64
1015# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1016 do { \
1017 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1018 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1019 } while (0)
1020#else
1021# error HC_ARCH_BITS
1022#endif
1023
1024
1025/** @def ASMAtomicCmpXchgSize
1026 * Atomically Compare and Exchange a value which size might differ
1027 * between platforms or compilers, ordered.
1028 *
1029 * @param pu Pointer to the value to update.
1030 * @param uNew The new value to assigned to *pu.
1031 * @param uOld The old value to *pu compare with.
1032 * @param fRc Where to store the result.
1033 *
1034 * @remarks x86: Requires a 486 or later.
1035 */
1036#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1037 do { \
1038 switch (sizeof(*(pu))) { \
1039 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1040 break; \
1041 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1042 break; \
1043 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1044 (fRc) = false; \
1045 break; \
1046 } \
1047 } while (0)
1048
1049
1050/**
1051 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1052 * passes back old value, ordered.
1053 *
1054 * @returns true if xchg was done.
1055 * @returns false if xchg wasn't done.
1056 *
1057 * @param pu32 Pointer to the value to update.
1058 * @param u32New The new value to assigned to *pu32.
1059 * @param u32Old The old value to *pu32 compare with.
1060 * @param pu32Old Pointer store the old value at.
1061 *
1062 * @remarks x86: Requires a 486 or later.
1063 */
1064#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1065DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
1066#else
1067DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
1068{
1069# if RT_INLINE_ASM_GNU_STYLE
1070 uint8_t u8Ret;
1071 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1072 "setz %1\n\t"
1073 : "=m" (*pu32),
1074 "=qm" (u8Ret),
1075 "=a" (*pu32Old)
1076 : "r" (u32New),
1077 "a" (u32Old),
1078 "m" (*pu32));
1079 return (bool)u8Ret;
1080
1081# elif RT_INLINE_ASM_USES_INTRIN
1082 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
1083
1084# else
1085 uint32_t u32Ret;
1086 __asm
1087 {
1088# ifdef RT_ARCH_AMD64
1089 mov rdx, [pu32]
1090# else
1091 mov edx, [pu32]
1092# endif
1093 mov eax, [u32Old]
1094 mov ecx, [u32New]
1095# ifdef RT_ARCH_AMD64
1096 lock cmpxchg [rdx], ecx
1097 mov rdx, [pu32Old]
1098 mov [rdx], eax
1099# else
1100 lock cmpxchg [edx], ecx
1101 mov edx, [pu32Old]
1102 mov [edx], eax
1103# endif
1104 setz al
1105 movzx eax, al
1106 mov [u32Ret], eax
1107 }
1108 return !!u32Ret;
1109# endif
1110}
1111#endif
1112
1113
1114/**
1115 * Atomically Compare and Exchange a signed 32-bit value, additionally
1116 * passes back old value, ordered.
1117 *
1118 * @returns true if xchg was done.
1119 * @returns false if xchg wasn't done.
1120 *
1121 * @param pi32 Pointer to the value to update.
1122 * @param i32New The new value to assigned to *pi32.
1123 * @param i32Old The old value to *pi32 compare with.
1124 * @param pi32Old Pointer store the old value at.
1125 *
1126 * @remarks x86: Requires a 486 or later.
1127 */
1128DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
1129{
1130 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
1131}
1132
1133
1134/**
1135 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1136 * passing back old value, ordered.
1137 *
1138 * @returns true if xchg was done.
1139 * @returns false if xchg wasn't done.
1140 *
1141 * @param pu64 Pointer to the 64-bit variable to update.
1142 * @param u64New The 64-bit value to assign to *pu64.
1143 * @param u64Old The value to compare with.
1144 * @param pu64Old Pointer store the old value at.
1145 *
1146 * @remarks x86: Requires a Pentium or later.
1147 */
1148#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1149 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1150DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
1151#else
1152DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
1153{
1154# if RT_INLINE_ASM_USES_INTRIN
1155 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
1156
1157# elif defined(RT_ARCH_AMD64)
1158# if RT_INLINE_ASM_GNU_STYLE
1159 uint8_t u8Ret;
1160 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1161 "setz %1\n\t"
1162 : "=m" (*pu64),
1163 "=qm" (u8Ret),
1164 "=a" (*pu64Old)
1165 : "r" (u64New),
1166 "a" (u64Old),
1167 "m" (*pu64));
1168 return (bool)u8Ret;
1169# else
1170 bool fRet;
1171 __asm
1172 {
1173 mov rdx, [pu32]
1174 mov rax, [u64Old]
1175 mov rcx, [u64New]
1176 lock cmpxchg [rdx], rcx
1177 mov rdx, [pu64Old]
1178 mov [rdx], rax
1179 setz al
1180 mov [fRet], al
1181 }
1182 return fRet;
1183# endif
1184# else /* !RT_ARCH_AMD64 */
1185# if RT_INLINE_ASM_GNU_STYLE
1186 uint64_t u64Ret;
1187# if defined(PIC) || defined(__PIC__)
1188 /* NB: this code uses a memory clobber description, because the clean
1189 * solution with an output value for *pu64 makes gcc run out of registers.
1190 * This will cause suboptimal code, and anyone with a better solution is
1191 * welcome to improve this. */
1192 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1193 "lock; cmpxchg8b %3\n\t"
1194 "xchgl %%ebx, %1\n\t"
1195 : "=A" (u64Ret)
1196 : "DS" ((uint32_t)u64New),
1197 "c" ((uint32_t)(u64New >> 32)),
1198 "m" (*pu64),
1199 "0" (u64Old)
1200 : "memory" );
1201# else /* !PIC */
1202 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1203 : "=A" (u64Ret),
1204 "=m" (*pu64)
1205 : "b" ((uint32_t)u64New),
1206 "c" ((uint32_t)(u64New >> 32)),
1207 "m" (*pu64),
1208 "0" (u64Old));
1209# endif
1210 *pu64Old = u64Ret;
1211 return u64Ret == u64Old;
1212# else
1213 uint32_t u32Ret;
1214 __asm
1215 {
1216 mov ebx, dword ptr [u64New]
1217 mov ecx, dword ptr [u64New + 4]
1218 mov edi, [pu64]
1219 mov eax, dword ptr [u64Old]
1220 mov edx, dword ptr [u64Old + 4]
1221 lock cmpxchg8b [edi]
1222 mov ebx, [pu64Old]
1223 mov [ebx], eax
1224 setz al
1225 movzx eax, al
1226 add ebx, 4
1227 mov [ebx], edx
1228 mov dword ptr [u32Ret], eax
1229 }
1230 return !!u32Ret;
1231# endif
1232# endif /* !RT_ARCH_AMD64 */
1233}
1234#endif
1235
1236
1237/**
1238 * Atomically Compare and exchange a signed 64-bit value, additionally
1239 * passing back old value, ordered.
1240 *
1241 * @returns true if xchg was done.
1242 * @returns false if xchg wasn't done.
1243 *
1244 * @param pi64 Pointer to the 64-bit variable to update.
1245 * @param i64 The 64-bit value to assign to *pu64.
1246 * @param i64Old The value to compare with.
1247 * @param pi64Old Pointer store the old value at.
1248 *
1249 * @remarks x86: Requires a Pentium or later.
1250 */
1251DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
1252{
1253 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
1254}
1255
1256/** @def ASMAtomicCmpXchgExHandle
1257 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1258 *
1259 * @param ph Pointer to the value to update.
1260 * @param hNew The new value to assigned to *pu.
1261 * @param hOld The old value to *pu compare with.
1262 * @param fRc Where to store the result.
1263 * @param phOldVal Pointer to where to store the old value.
1264 *
1265 * @remarks This doesn't currently work for all handles (like RTFILE).
1266 */
1267#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1268# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1269 do { \
1270 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1271 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1272 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
1273 } while (0)
1274#elif HC_ARCH_BITS == 64
1275# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1276 do { \
1277 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1278 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1279 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
1280 } while (0)
1281#else
1282# error HC_ARCH_BITS
1283#endif
1284
1285
1286/** @def ASMAtomicCmpXchgExSize
1287 * Atomically Compare and Exchange a value which size might differ
1288 * between platforms or compilers. Additionally passes back old value.
1289 *
1290 * @param pu Pointer to the value to update.
1291 * @param uNew The new value to assigned to *pu.
1292 * @param uOld The old value to *pu compare with.
1293 * @param fRc Where to store the result.
1294 * @param puOldVal Pointer to where to store the old value.
1295 *
1296 * @remarks x86: Requires a 486 or later.
1297 */
1298#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1299 do { \
1300 switch (sizeof(*(pu))) { \
1301 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
1302 break; \
1303 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
1304 break; \
1305 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1306 (fRc) = false; \
1307 (uOldVal) = 0; \
1308 break; \
1309 } \
1310 } while (0)
1311
1312
1313/**
1314 * Atomically Compare and Exchange a pointer value, additionally
1315 * passing back old value, ordered.
1316 *
1317 * @returns true if xchg was done.
1318 * @returns false if xchg wasn't done.
1319 *
1320 * @param ppv Pointer to the value to update.
1321 * @param pvNew The new value to assigned to *ppv.
1322 * @param pvOld The old value to *ppv compare with.
1323 * @param ppvOld Pointer store the old value at.
1324 *
1325 * @remarks x86: Requires a 486 or later.
1326 */
1327DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
1328{
1329#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1330 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
1331#elif ARCH_BITS == 64
1332 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
1333#else
1334# error "ARCH_BITS is bogus"
1335#endif
1336}
1337
1338
1339/**
1340 * Atomically Compare and Exchange a pointer value, additionally
1341 * passing back old value, ordered.
1342 *
1343 * @returns true if xchg was done.
1344 * @returns false if xchg wasn't done.
1345 *
1346 * @param ppv Pointer to the value to update.
1347 * @param pvNew The new value to assigned to *ppv.
1348 * @param pvOld The old value to *ppv compare with.
1349 * @param ppvOld Pointer store the old value at.
1350 *
1351 * @remarks This is relatively type safe on GCC platforms.
1352 * @remarks x86: Requires a 486 or later.
1353 */
1354#ifdef __GNUC__
1355# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1356 __extension__ \
1357 ({\
1358 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1359 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1360 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1361 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1362 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1363 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1364 (void **)ppvOldTypeChecked); \
1365 fMacroRet; \
1366 })
1367#else
1368# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1369 ASMAtomicCmpXchgExPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld), (void **)(ppvOld))
1370#endif
1371
1372
1373/**
1374 * Virtualization unfriendly serializing instruction, always exits.
1375 */
1376#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1377DECLASM(void) ASMSerializeInstructionCpuId(void);
1378#else
1379DECLINLINE(void) ASMSerializeInstructionCpuId(void)
1380{
1381# if RT_INLINE_ASM_GNU_STYLE
1382 RTCCUINTREG xAX = 0;
1383# ifdef RT_ARCH_AMD64
1384 __asm__ __volatile__ ("cpuid"
1385 : "=a" (xAX)
1386 : "0" (xAX)
1387 : "rbx", "rcx", "rdx", "memory");
1388# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1389 __asm__ __volatile__ ("push %%ebx\n\t"
1390 "cpuid\n\t"
1391 "pop %%ebx\n\t"
1392 : "=a" (xAX)
1393 : "0" (xAX)
1394 : "ecx", "edx", "memory");
1395# else
1396 __asm__ __volatile__ ("cpuid"
1397 : "=a" (xAX)
1398 : "0" (xAX)
1399 : "ebx", "ecx", "edx", "memory");
1400# endif
1401
1402# elif RT_INLINE_ASM_USES_INTRIN
1403 int aInfo[4];
1404 _ReadWriteBarrier();
1405 __cpuid(aInfo, 0);
1406
1407# else
1408 __asm
1409 {
1410 push ebx
1411 xor eax, eax
1412 cpuid
1413 pop ebx
1414 }
1415# endif
1416}
1417#endif
1418
1419/**
1420 * Virtualization friendly serializing instruction, though more expensive.
1421 */
1422#if RT_INLINE_ASM_EXTERNAL
1423DECLASM(void) ASMSerializeInstructionIRet(void);
1424#else
1425DECLINLINE(void) ASMSerializeInstructionIRet(void)
1426{
1427# if RT_INLINE_ASM_GNU_STYLE
1428# ifdef RT_ARCH_AMD64
1429 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
1430 "subq $128, %%rsp\n\t" /*redzone*/
1431 "mov %%ss, %%eax\n\t"
1432 "pushq %%rax\n\t"
1433 "pushq %%r10\n\t"
1434 "pushfq\n\t"
1435 "movl %%cs, %%eax\n\t"
1436 "pushq %%rax\n\t"
1437 "leaq 1f(%%rip), %%rax\n\t"
1438 "pushq %%rax\n\t"
1439 "iretq\n\t"
1440 "1:\n\t"
1441 ::: "rax", "r10", "memory");
1442# else
1443 __asm__ __volatile__ ("pushfl\n\t"
1444 "pushl %%cs\n\t"
1445 "pushl $1f\n\t"
1446 "iretl\n\t"
1447 "1:\n\t"
1448 ::: "memory");
1449# endif
1450
1451# else
1452 __asm
1453 {
1454 pushfd
1455 push cs
1456 push la_ret
1457 iretd
1458 la_ret:
1459 }
1460# endif
1461}
1462#endif
1463
1464/**
1465 * Virtualization friendlier serializing instruction, may still cause exits.
1466 */
1467#if RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < 15
1468DECLASM(void) ASMSerializeInstructionRdTscp(void);
1469#else
1470DECLINLINE(void) ASMSerializeInstructionRdTscp(void)
1471{
1472# if RT_INLINE_ASM_GNU_STYLE
1473 /* rdtscp is not supported by ancient linux build VM of course :-( */
1474# ifdef RT_ARCH_AMD64
1475 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
1476 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
1477# else
1478 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
1479 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
1480# endif
1481# else
1482# if RT_INLINE_ASM_USES_INTRIN >= 15
1483 uint32_t uIgnore;
1484 _ReadWriteBarrier();
1485 (void)__rdtscp(&uIgnore);
1486 (void)uIgnore;
1487# else
1488 __asm
1489 {
1490 rdtscp
1491 }
1492# endif
1493# endif
1494}
1495#endif
1496
1497
1498/**
1499 * Serialize Instruction.
1500 */
1501#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
1502# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
1503#else
1504# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
1505#endif
1506
1507
1508/**
1509 * Memory fence, waits for any pending writes and reads to complete.
1510 */
1511DECLINLINE(void) ASMMemoryFence(void)
1512{
1513 /** @todo use mfence? check if all cpus we care for support it. */
1514#if ARCH_BITS == 16
1515 uint16_t volatile u16;
1516 ASMAtomicXchgU16(&u16, 0);
1517#else
1518 uint32_t volatile u32;
1519 ASMAtomicXchgU32(&u32, 0);
1520#endif
1521}
1522
1523
1524/**
1525 * Write fence, waits for any pending writes to complete.
1526 */
1527DECLINLINE(void) ASMWriteFence(void)
1528{
1529 /** @todo use sfence? check if all cpus we care for support it. */
1530 ASMMemoryFence();
1531}
1532
1533
1534/**
1535 * Read fence, waits for any pending reads to complete.
1536 */
1537DECLINLINE(void) ASMReadFence(void)
1538{
1539 /** @todo use lfence? check if all cpus we care for support it. */
1540 ASMMemoryFence();
1541}
1542
1543
1544/**
1545 * Atomically reads an unsigned 8-bit value, ordered.
1546 *
1547 * @returns Current *pu8 value
1548 * @param pu8 Pointer to the 8-bit variable to read.
1549 */
1550DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
1551{
1552 ASMMemoryFence();
1553 return *pu8; /* byte reads are atomic on x86 */
1554}
1555
1556
1557/**
1558 * Atomically reads an unsigned 8-bit value, unordered.
1559 *
1560 * @returns Current *pu8 value
1561 * @param pu8 Pointer to the 8-bit variable to read.
1562 */
1563DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
1564{
1565 return *pu8; /* byte reads are atomic on x86 */
1566}
1567
1568
1569/**
1570 * Atomically reads a signed 8-bit value, ordered.
1571 *
1572 * @returns Current *pi8 value
1573 * @param pi8 Pointer to the 8-bit variable to read.
1574 */
1575DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
1576{
1577 ASMMemoryFence();
1578 return *pi8; /* byte reads are atomic on x86 */
1579}
1580
1581
1582/**
1583 * Atomically reads a signed 8-bit value, unordered.
1584 *
1585 * @returns Current *pi8 value
1586 * @param pi8 Pointer to the 8-bit variable to read.
1587 */
1588DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
1589{
1590 return *pi8; /* byte reads are atomic on x86 */
1591}
1592
1593
1594/**
1595 * Atomically reads an unsigned 16-bit value, ordered.
1596 *
1597 * @returns Current *pu16 value
1598 * @param pu16 Pointer to the 16-bit variable to read.
1599 */
1600DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
1601{
1602 ASMMemoryFence();
1603 Assert(!((uintptr_t)pu16 & 1));
1604 return *pu16;
1605}
1606
1607
1608/**
1609 * Atomically reads an unsigned 16-bit value, unordered.
1610 *
1611 * @returns Current *pu16 value
1612 * @param pu16 Pointer to the 16-bit variable to read.
1613 */
1614DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
1615{
1616 Assert(!((uintptr_t)pu16 & 1));
1617 return *pu16;
1618}
1619
1620
1621/**
1622 * Atomically reads a signed 16-bit value, ordered.
1623 *
1624 * @returns Current *pi16 value
1625 * @param pi16 Pointer to the 16-bit variable to read.
1626 */
1627DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
1628{
1629 ASMMemoryFence();
1630 Assert(!((uintptr_t)pi16 & 1));
1631 return *pi16;
1632}
1633
1634
1635/**
1636 * Atomically reads a signed 16-bit value, unordered.
1637 *
1638 * @returns Current *pi16 value
1639 * @param pi16 Pointer to the 16-bit variable to read.
1640 */
1641DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
1642{
1643 Assert(!((uintptr_t)pi16 & 1));
1644 return *pi16;
1645}
1646
1647
1648/**
1649 * Atomically reads an unsigned 32-bit value, ordered.
1650 *
1651 * @returns Current *pu32 value
1652 * @param pu32 Pointer to the 32-bit variable to read.
1653 */
1654DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
1655{
1656 ASMMemoryFence();
1657 Assert(!((uintptr_t)pu32 & 3));
1658#if ARCH_BITS == 16
1659 AssertFailed(); /** @todo 16-bit */
1660#endif
1661 return *pu32;
1662}
1663
1664
1665/**
1666 * Atomically reads an unsigned 32-bit value, unordered.
1667 *
1668 * @returns Current *pu32 value
1669 * @param pu32 Pointer to the 32-bit variable to read.
1670 */
1671DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
1672{
1673 Assert(!((uintptr_t)pu32 & 3));
1674#if ARCH_BITS == 16
1675 AssertFailed(); /** @todo 16-bit */
1676#endif
1677 return *pu32;
1678}
1679
1680
1681/**
1682 * Atomically reads a signed 32-bit value, ordered.
1683 *
1684 * @returns Current *pi32 value
1685 * @param pi32 Pointer to the 32-bit variable to read.
1686 */
1687DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
1688{
1689 ASMMemoryFence();
1690 Assert(!((uintptr_t)pi32 & 3));
1691#if ARCH_BITS == 16
1692 AssertFailed(); /** @todo 16-bit */
1693#endif
1694 return *pi32;
1695}
1696
1697
1698/**
1699 * Atomically reads a signed 32-bit value, unordered.
1700 *
1701 * @returns Current *pi32 value
1702 * @param pi32 Pointer to the 32-bit variable to read.
1703 */
1704DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
1705{
1706 Assert(!((uintptr_t)pi32 & 3));
1707#if ARCH_BITS == 16
1708 AssertFailed(); /** @todo 16-bit */
1709#endif
1710 return *pi32;
1711}
1712
1713
1714/**
1715 * Atomically reads an unsigned 64-bit value, ordered.
1716 *
1717 * @returns Current *pu64 value
1718 * @param pu64 Pointer to the 64-bit variable to read.
1719 * The memory pointed to must be writable.
1720 *
1721 * @remarks This may fault if the memory is read-only!
1722 * @remarks x86: Requires a Pentium or later.
1723 */
1724#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1725 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1726DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
1727#else
1728DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
1729{
1730 uint64_t u64;
1731# ifdef RT_ARCH_AMD64
1732 Assert(!((uintptr_t)pu64 & 7));
1733/*# if RT_INLINE_ASM_GNU_STYLE
1734 __asm__ __volatile__( "mfence\n\t"
1735 "movq %1, %0\n\t"
1736 : "=r" (u64)
1737 : "m" (*pu64));
1738# else
1739 __asm
1740 {
1741 mfence
1742 mov rdx, [pu64]
1743 mov rax, [rdx]
1744 mov [u64], rax
1745 }
1746# endif*/
1747 ASMMemoryFence();
1748 u64 = *pu64;
1749# else /* !RT_ARCH_AMD64 */
1750# if RT_INLINE_ASM_GNU_STYLE
1751# if defined(PIC) || defined(__PIC__)
1752 uint32_t u32EBX = 0;
1753 Assert(!((uintptr_t)pu64 & 7));
1754 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1755 "lock; cmpxchg8b (%5)\n\t"
1756 "movl %3, %%ebx\n\t"
1757 : "=A" (u64),
1758# if RT_GNUC_PREREQ(4, 3)
1759 "+m" (*pu64)
1760# else
1761 "=m" (*pu64)
1762# endif
1763 : "0" (0ULL),
1764 "m" (u32EBX),
1765 "c" (0),
1766 "S" (pu64));
1767# else /* !PIC */
1768 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1769 : "=A" (u64),
1770 "+m" (*pu64)
1771 : "0" (0ULL),
1772 "b" (0),
1773 "c" (0));
1774# endif
1775# else
1776 Assert(!((uintptr_t)pu64 & 7));
1777 __asm
1778 {
1779 xor eax, eax
1780 xor edx, edx
1781 mov edi, pu64
1782 xor ecx, ecx
1783 xor ebx, ebx
1784 lock cmpxchg8b [edi]
1785 mov dword ptr [u64], eax
1786 mov dword ptr [u64 + 4], edx
1787 }
1788# endif
1789# endif /* !RT_ARCH_AMD64 */
1790 return u64;
1791}
1792#endif
1793
1794
1795/**
1796 * Atomically reads an unsigned 64-bit value, unordered.
1797 *
1798 * @returns Current *pu64 value
1799 * @param pu64 Pointer to the 64-bit variable to read.
1800 * The memory pointed to must be writable.
1801 *
1802 * @remarks This may fault if the memory is read-only!
1803 * @remarks x86: Requires a Pentium or later.
1804 */
1805#if !defined(RT_ARCH_AMD64) \
1806 && ( (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1807 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
1808DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
1809#else
1810DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
1811{
1812 uint64_t u64;
1813# ifdef RT_ARCH_AMD64
1814 Assert(!((uintptr_t)pu64 & 7));
1815/*# if RT_INLINE_ASM_GNU_STYLE
1816 Assert(!((uintptr_t)pu64 & 7));
1817 __asm__ __volatile__("movq %1, %0\n\t"
1818 : "=r" (u64)
1819 : "m" (*pu64));
1820# else
1821 __asm
1822 {
1823 mov rdx, [pu64]
1824 mov rax, [rdx]
1825 mov [u64], rax
1826 }
1827# endif */
1828 u64 = *pu64;
1829# else /* !RT_ARCH_AMD64 */
1830# if RT_INLINE_ASM_GNU_STYLE
1831# if defined(PIC) || defined(__PIC__)
1832 uint32_t u32EBX = 0;
1833 uint32_t u32Spill;
1834 Assert(!((uintptr_t)pu64 & 7));
1835 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1836 "xor %%ecx,%%ecx\n\t"
1837 "xor %%edx,%%edx\n\t"
1838 "xchgl %%ebx, %3\n\t"
1839 "lock; cmpxchg8b (%4)\n\t"
1840 "movl %3, %%ebx\n\t"
1841 : "=A" (u64),
1842# if RT_GNUC_PREREQ(4, 3)
1843 "+m" (*pu64),
1844# else
1845 "=m" (*pu64),
1846# endif
1847 "=c" (u32Spill)
1848 : "m" (u32EBX),
1849 "S" (pu64));
1850# else /* !PIC */
1851 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1852 : "=A" (u64),
1853 "+m" (*pu64)
1854 : "0" (0ULL),
1855 "b" (0),
1856 "c" (0));
1857# endif
1858# else
1859 Assert(!((uintptr_t)pu64 & 7));
1860 __asm
1861 {
1862 xor eax, eax
1863 xor edx, edx
1864 mov edi, pu64
1865 xor ecx, ecx
1866 xor ebx, ebx
1867 lock cmpxchg8b [edi]
1868 mov dword ptr [u64], eax
1869 mov dword ptr [u64 + 4], edx
1870 }
1871# endif
1872# endif /* !RT_ARCH_AMD64 */
1873 return u64;
1874}
1875#endif
1876
1877
1878/**
1879 * Atomically reads a signed 64-bit value, ordered.
1880 *
1881 * @returns Current *pi64 value
1882 * @param pi64 Pointer to the 64-bit variable to read.
1883 * The memory pointed to must be writable.
1884 *
1885 * @remarks This may fault if the memory is read-only!
1886 * @remarks x86: Requires a Pentium or later.
1887 */
1888DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
1889{
1890 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
1891}
1892
1893
1894/**
1895 * Atomically reads a signed 64-bit value, unordered.
1896 *
1897 * @returns Current *pi64 value
1898 * @param pi64 Pointer to the 64-bit variable to read.
1899 * The memory pointed to must be writable.
1900 *
1901 * @remarks This will fault if the memory is read-only!
1902 * @remarks x86: Requires a Pentium or later.
1903 */
1904DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
1905{
1906 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
1907}
1908
1909
1910/**
1911 * Atomically reads a size_t value, ordered.
1912 *
1913 * @returns Current *pcb value
1914 * @param pcb Pointer to the size_t variable to read.
1915 */
1916DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile *pcb)
1917{
1918#if ARCH_BITS == 64
1919 return ASMAtomicReadU64((uint64_t volatile *)pcb);
1920#elif ARCH_BITS == 32
1921 return ASMAtomicReadU32((uint32_t volatile *)pcb);
1922#elif ARCH_BITS == 16
1923 AssertCompileSize(size_t, 2);
1924 return ASMAtomicReadU16((uint16_t volatile *)pcb);
1925#else
1926# error "Unsupported ARCH_BITS value"
1927#endif
1928}
1929
1930
1931/**
1932 * Atomically reads a size_t value, unordered.
1933 *
1934 * @returns Current *pcb value
1935 * @param pcb Pointer to the size_t variable to read.
1936 */
1937DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile *pcb)
1938{
1939#if ARCH_BITS == 64 || (ARCH_BITS == 16 && RT_FAR_DATA)
1940 return ASMAtomicUoReadU64((uint64_t volatile *)pcb);
1941#elif ARCH_BITS == 32
1942 return ASMAtomicUoReadU32((uint32_t volatile *)pcb);
1943#elif ARCH_BITS == 16
1944 AssertCompileSize(size_t, 2);
1945 return ASMAtomicUoReadU16((uint16_t volatile *)pcb);
1946#else
1947# error "Unsupported ARCH_BITS value"
1948#endif
1949}
1950
1951
1952/**
1953 * Atomically reads a pointer value, ordered.
1954 *
1955 * @returns Current *pv value
1956 * @param ppv Pointer to the pointer variable to read.
1957 *
1958 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
1959 * requires less typing (no casts).
1960 */
1961DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
1962{
1963#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1964 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
1965#elif ARCH_BITS == 64
1966 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
1967#else
1968# error "ARCH_BITS is bogus"
1969#endif
1970}
1971
1972/**
1973 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
1974 *
1975 * @returns Current *pv value
1976 * @param ppv Pointer to the pointer variable to read.
1977 * @param Type The type of *ppv, sans volatile.
1978 */
1979#ifdef __GNUC__
1980# define ASMAtomicReadPtrT(ppv, Type) \
1981 __extension__ \
1982 ({\
1983 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
1984 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
1985 pvTypeChecked; \
1986 })
1987#else
1988# define ASMAtomicReadPtrT(ppv, Type) \
1989 (Type)ASMAtomicReadPtr((void * volatile *)(ppv))
1990#endif
1991
1992
1993/**
1994 * Atomically reads a pointer value, unordered.
1995 *
1996 * @returns Current *pv value
1997 * @param ppv Pointer to the pointer variable to read.
1998 *
1999 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
2000 * requires less typing (no casts).
2001 */
2002DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
2003{
2004#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2005 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
2006#elif ARCH_BITS == 64
2007 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
2008#else
2009# error "ARCH_BITS is bogus"
2010#endif
2011}
2012
2013
2014/**
2015 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
2016 *
2017 * @returns Current *pv value
2018 * @param ppv Pointer to the pointer variable to read.
2019 * @param Type The type of *ppv, sans volatile.
2020 */
2021#ifdef __GNUC__
2022# define ASMAtomicUoReadPtrT(ppv, Type) \
2023 __extension__ \
2024 ({\
2025 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2026 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
2027 pvTypeChecked; \
2028 })
2029#else
2030# define ASMAtomicUoReadPtrT(ppv, Type) \
2031 (Type)ASMAtomicUoReadPtr((void * volatile *)(ppv))
2032#endif
2033
2034
2035/**
2036 * Atomically reads a boolean value, ordered.
2037 *
2038 * @returns Current *pf value
2039 * @param pf Pointer to the boolean variable to read.
2040 */
2041DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
2042{
2043 ASMMemoryFence();
2044 return *pf; /* byte reads are atomic on x86 */
2045}
2046
2047
2048/**
2049 * Atomically reads a boolean value, unordered.
2050 *
2051 * @returns Current *pf value
2052 * @param pf Pointer to the boolean variable to read.
2053 */
2054DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
2055{
2056 return *pf; /* byte reads are atomic on x86 */
2057}
2058
2059
2060/**
2061 * Atomically read a typical IPRT handle value, ordered.
2062 *
2063 * @param ph Pointer to the handle variable to read.
2064 * @param phRes Where to store the result.
2065 *
2066 * @remarks This doesn't currently work for all handles (like RTFILE).
2067 */
2068#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2069# define ASMAtomicReadHandle(ph, phRes) \
2070 do { \
2071 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2072 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2073 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
2074 } while (0)
2075#elif HC_ARCH_BITS == 64
2076# define ASMAtomicReadHandle(ph, phRes) \
2077 do { \
2078 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2079 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2080 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
2081 } while (0)
2082#else
2083# error HC_ARCH_BITS
2084#endif
2085
2086
2087/**
2088 * Atomically read a typical IPRT handle value, unordered.
2089 *
2090 * @param ph Pointer to the handle variable to read.
2091 * @param phRes Where to store the result.
2092 *
2093 * @remarks This doesn't currently work for all handles (like RTFILE).
2094 */
2095#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2096# define ASMAtomicUoReadHandle(ph, phRes) \
2097 do { \
2098 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2099 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2100 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
2101 } while (0)
2102#elif HC_ARCH_BITS == 64
2103# define ASMAtomicUoReadHandle(ph, phRes) \
2104 do { \
2105 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2106 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2107 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
2108 } while (0)
2109#else
2110# error HC_ARCH_BITS
2111#endif
2112
2113
2114/**
2115 * Atomically read a value which size might differ
2116 * between platforms or compilers, ordered.
2117 *
2118 * @param pu Pointer to the variable to read.
2119 * @param puRes Where to store the result.
2120 */
2121#define ASMAtomicReadSize(pu, puRes) \
2122 do { \
2123 switch (sizeof(*(pu))) { \
2124 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
2125 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
2126 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
2127 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
2128 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2129 } \
2130 } while (0)
2131
2132
2133/**
2134 * Atomically read a value which size might differ
2135 * between platforms or compilers, unordered.
2136 *
2137 * @param pu Pointer to the variable to read.
2138 * @param puRes Where to store the result.
2139 */
2140#define ASMAtomicUoReadSize(pu, puRes) \
2141 do { \
2142 switch (sizeof(*(pu))) { \
2143 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
2144 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
2145 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
2146 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
2147 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2148 } \
2149 } while (0)
2150
2151
2152/**
2153 * Atomically writes an unsigned 8-bit value, ordered.
2154 *
2155 * @param pu8 Pointer to the 8-bit variable.
2156 * @param u8 The 8-bit value to assign to *pu8.
2157 */
2158DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
2159{
2160 ASMAtomicXchgU8(pu8, u8);
2161}
2162
2163
2164/**
2165 * Atomically writes an unsigned 8-bit value, unordered.
2166 *
2167 * @param pu8 Pointer to the 8-bit variable.
2168 * @param u8 The 8-bit value to assign to *pu8.
2169 */
2170DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
2171{
2172 *pu8 = u8; /* byte writes are atomic on x86 */
2173}
2174
2175
2176/**
2177 * Atomically writes a signed 8-bit value, ordered.
2178 *
2179 * @param pi8 Pointer to the 8-bit variable to read.
2180 * @param i8 The 8-bit value to assign to *pi8.
2181 */
2182DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
2183{
2184 ASMAtomicXchgS8(pi8, i8);
2185}
2186
2187
2188/**
2189 * Atomically writes a signed 8-bit value, unordered.
2190 *
2191 * @param pi8 Pointer to the 8-bit variable to write.
2192 * @param i8 The 8-bit value to assign to *pi8.
2193 */
2194DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
2195{
2196 *pi8 = i8; /* byte writes are atomic on x86 */
2197}
2198
2199
2200/**
2201 * Atomically writes an unsigned 16-bit value, ordered.
2202 *
2203 * @param pu16 Pointer to the 16-bit variable to write.
2204 * @param u16 The 16-bit value to assign to *pu16.
2205 */
2206DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
2207{
2208 ASMAtomicXchgU16(pu16, u16);
2209}
2210
2211
2212/**
2213 * Atomically writes an unsigned 16-bit value, unordered.
2214 *
2215 * @param pu16 Pointer to the 16-bit variable to write.
2216 * @param u16 The 16-bit value to assign to *pu16.
2217 */
2218DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
2219{
2220 Assert(!((uintptr_t)pu16 & 1));
2221 *pu16 = u16;
2222}
2223
2224
2225/**
2226 * Atomically writes a signed 16-bit value, ordered.
2227 *
2228 * @param pi16 Pointer to the 16-bit variable to write.
2229 * @param i16 The 16-bit value to assign to *pi16.
2230 */
2231DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
2232{
2233 ASMAtomicXchgS16(pi16, i16);
2234}
2235
2236
2237/**
2238 * Atomically writes a signed 16-bit value, unordered.
2239 *
2240 * @param pi16 Pointer to the 16-bit variable to write.
2241 * @param i16 The 16-bit value to assign to *pi16.
2242 */
2243DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
2244{
2245 Assert(!((uintptr_t)pi16 & 1));
2246 *pi16 = i16;
2247}
2248
2249
2250/**
2251 * Atomically writes an unsigned 32-bit value, ordered.
2252 *
2253 * @param pu32 Pointer to the 32-bit variable to write.
2254 * @param u32 The 32-bit value to assign to *pu32.
2255 */
2256DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
2257{
2258 ASMAtomicXchgU32(pu32, u32);
2259}
2260
2261
2262/**
2263 * Atomically writes an unsigned 32-bit value, unordered.
2264 *
2265 * @param pu32 Pointer to the 32-bit variable to write.
2266 * @param u32 The 32-bit value to assign to *pu32.
2267 */
2268DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
2269{
2270 Assert(!((uintptr_t)pu32 & 3));
2271#if ARCH_BITS >= 32
2272 *pu32 = u32;
2273#else
2274 ASMAtomicXchgU32(pu32, u32);
2275#endif
2276}
2277
2278
2279/**
2280 * Atomically writes a signed 32-bit value, ordered.
2281 *
2282 * @param pi32 Pointer to the 32-bit variable to write.
2283 * @param i32 The 32-bit value to assign to *pi32.
2284 */
2285DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
2286{
2287 ASMAtomicXchgS32(pi32, i32);
2288}
2289
2290
2291/**
2292 * Atomically writes a signed 32-bit value, unordered.
2293 *
2294 * @param pi32 Pointer to the 32-bit variable to write.
2295 * @param i32 The 32-bit value to assign to *pi32.
2296 */
2297DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
2298{
2299 Assert(!((uintptr_t)pi32 & 3));
2300#if ARCH_BITS >= 32
2301 *pi32 = i32;
2302#else
2303 ASMAtomicXchgS32(pi32, i32);
2304#endif
2305}
2306
2307
2308/**
2309 * Atomically writes an unsigned 64-bit value, ordered.
2310 *
2311 * @param pu64 Pointer to the 64-bit variable to write.
2312 * @param u64 The 64-bit value to assign to *pu64.
2313 */
2314DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
2315{
2316 ASMAtomicXchgU64(pu64, u64);
2317}
2318
2319
2320/**
2321 * Atomically writes an unsigned 64-bit value, unordered.
2322 *
2323 * @param pu64 Pointer to the 64-bit variable to write.
2324 * @param u64 The 64-bit value to assign to *pu64.
2325 */
2326DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
2327{
2328 Assert(!((uintptr_t)pu64 & 7));
2329#if ARCH_BITS == 64
2330 *pu64 = u64;
2331#else
2332 ASMAtomicXchgU64(pu64, u64);
2333#endif
2334}
2335
2336
2337/**
2338 * Atomically writes a signed 64-bit value, ordered.
2339 *
2340 * @param pi64 Pointer to the 64-bit variable to write.
2341 * @param i64 The 64-bit value to assign to *pi64.
2342 */
2343DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
2344{
2345 ASMAtomicXchgS64(pi64, i64);
2346}
2347
2348
2349/**
2350 * Atomically writes a signed 64-bit value, unordered.
2351 *
2352 * @param pi64 Pointer to the 64-bit variable to write.
2353 * @param i64 The 64-bit value to assign to *pi64.
2354 */
2355DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
2356{
2357 Assert(!((uintptr_t)pi64 & 7));
2358#if ARCH_BITS == 64
2359 *pi64 = i64;
2360#else
2361 ASMAtomicXchgS64(pi64, i64);
2362#endif
2363}
2364
2365
2366/**
2367 * Atomically writes a boolean value, unordered.
2368 *
2369 * @param pf Pointer to the boolean variable to write.
2370 * @param f The boolean value to assign to *pf.
2371 */
2372DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
2373{
2374 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
2375}
2376
2377
2378/**
2379 * Atomically writes a boolean value, unordered.
2380 *
2381 * @param pf Pointer to the boolean variable to write.
2382 * @param f The boolean value to assign to *pf.
2383 */
2384DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
2385{
2386 *pf = f; /* byte writes are atomic on x86 */
2387}
2388
2389
2390/**
2391 * Atomically writes a pointer value, ordered.
2392 *
2393 * @param ppv Pointer to the pointer variable to write.
2394 * @param pv The pointer value to assign to *ppv.
2395 */
2396DECLINLINE(void) ASMAtomicWritePtrVoid(void * volatile *ppv, const void *pv)
2397{
2398#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2399 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2400#elif ARCH_BITS == 64
2401 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2402#else
2403# error "ARCH_BITS is bogus"
2404#endif
2405}
2406
2407
2408/**
2409 * Atomically writes a pointer value, ordered.
2410 *
2411 * @param ppv Pointer to the pointer variable to write.
2412 * @param pv The pointer value to assign to *ppv. If NULL use
2413 * ASMAtomicWriteNullPtr or you'll land in trouble.
2414 *
2415 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2416 * NULL.
2417 */
2418#ifdef __GNUC__
2419# define ASMAtomicWritePtr(ppv, pv) \
2420 do \
2421 { \
2422 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2423 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2424 \
2425 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2426 AssertCompile(sizeof(pv) == sizeof(void *)); \
2427 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2428 \
2429 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), (void *)(pvTypeChecked)); \
2430 } while (0)
2431#else
2432# define ASMAtomicWritePtr(ppv, pv) \
2433 do \
2434 { \
2435 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2436 AssertCompile(sizeof(pv) == sizeof(void *)); \
2437 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2438 \
2439 ASMAtomicWritePtrVoid((void * volatile *)(ppv), (void *)(pv)); \
2440 } while (0)
2441#endif
2442
2443
2444/**
2445 * Atomically sets a pointer to NULL, ordered.
2446 *
2447 * @param ppv Pointer to the pointer variable that should be set to NULL.
2448 *
2449 * @remarks This is relatively type safe on GCC platforms.
2450 */
2451#ifdef __GNUC__
2452# define ASMAtomicWriteNullPtr(ppv) \
2453 do \
2454 { \
2455 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
2456 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2457 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2458 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), NULL); \
2459 } while (0)
2460#else
2461# define ASMAtomicWriteNullPtr(ppv) \
2462 do \
2463 { \
2464 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2465 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2466 ASMAtomicWritePtrVoid((void * volatile *)(ppv), NULL); \
2467 } while (0)
2468#endif
2469
2470
2471/**
2472 * Atomically writes a pointer value, unordered.
2473 *
2474 * @returns Current *pv value
2475 * @param ppv Pointer to the pointer variable.
2476 * @param pv The pointer value to assign to *ppv. If NULL use
2477 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2478 *
2479 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2480 * NULL.
2481 */
2482#ifdef __GNUC__
2483# define ASMAtomicUoWritePtr(ppv, pv) \
2484 do \
2485 { \
2486 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2487 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2488 \
2489 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2490 AssertCompile(sizeof(pv) == sizeof(void *)); \
2491 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2492 \
2493 *(ppvTypeChecked) = pvTypeChecked; \
2494 } while (0)
2495#else
2496# define ASMAtomicUoWritePtr(ppv, pv) \
2497 do \
2498 { \
2499 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2500 AssertCompile(sizeof(pv) == sizeof(void *)); \
2501 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2502 *(ppv) = pv; \
2503 } while (0)
2504#endif
2505
2506
2507/**
2508 * Atomically sets a pointer to NULL, unordered.
2509 *
2510 * @param ppv Pointer to the pointer variable that should be set to NULL.
2511 *
2512 * @remarks This is relatively type safe on GCC platforms.
2513 */
2514#ifdef __GNUC__
2515# define ASMAtomicUoWriteNullPtr(ppv) \
2516 do \
2517 { \
2518 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2519 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2520 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2521 *(ppvTypeChecked) = NULL; \
2522 } while (0)
2523#else
2524# define ASMAtomicUoWriteNullPtr(ppv) \
2525 do \
2526 { \
2527 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2528 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2529 *(ppv) = NULL; \
2530 } while (0)
2531#endif
2532
2533
2534/**
2535 * Atomically write a typical IPRT handle value, ordered.
2536 *
2537 * @param ph Pointer to the variable to update.
2538 * @param hNew The value to assign to *ph.
2539 *
2540 * @remarks This doesn't currently work for all handles (like RTFILE).
2541 */
2542#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2543# define ASMAtomicWriteHandle(ph, hNew) \
2544 do { \
2545 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2546 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2547 } while (0)
2548#elif HC_ARCH_BITS == 64
2549# define ASMAtomicWriteHandle(ph, hNew) \
2550 do { \
2551 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2552 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
2553 } while (0)
2554#else
2555# error HC_ARCH_BITS
2556#endif
2557
2558
2559/**
2560 * Atomically write a typical IPRT handle value, unordered.
2561 *
2562 * @param ph Pointer to the variable to update.
2563 * @param hNew The value to assign to *ph.
2564 *
2565 * @remarks This doesn't currently work for all handles (like RTFILE).
2566 */
2567#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2568# define ASMAtomicUoWriteHandle(ph, hNew) \
2569 do { \
2570 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2571 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
2572 } while (0)
2573#elif HC_ARCH_BITS == 64
2574# define ASMAtomicUoWriteHandle(ph, hNew) \
2575 do { \
2576 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2577 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
2578 } while (0)
2579#else
2580# error HC_ARCH_BITS
2581#endif
2582
2583
2584/**
2585 * Atomically write a value which size might differ
2586 * between platforms or compilers, ordered.
2587 *
2588 * @param pu Pointer to the variable to update.
2589 * @param uNew The value to assign to *pu.
2590 */
2591#define ASMAtomicWriteSize(pu, uNew) \
2592 do { \
2593 switch (sizeof(*(pu))) { \
2594 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2595 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2596 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2597 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2598 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2599 } \
2600 } while (0)
2601
2602/**
2603 * Atomically write a value which size might differ
2604 * between platforms or compilers, unordered.
2605 *
2606 * @param pu Pointer to the variable to update.
2607 * @param uNew The value to assign to *pu.
2608 */
2609#define ASMAtomicUoWriteSize(pu, uNew) \
2610 do { \
2611 switch (sizeof(*(pu))) { \
2612 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2613 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2614 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2615 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2616 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2617 } \
2618 } while (0)
2619
2620
2621
2622/**
2623 * Atomically exchanges and adds to a 16-bit value, ordered.
2624 *
2625 * @returns The old value.
2626 * @param pu16 Pointer to the value.
2627 * @param u16 Number to add.
2628 *
2629 * @remarks Currently not implemented, just to make 16-bit code happy.
2630 * @remarks x86: Requires a 486 or later.
2631 */
2632DECLASM(uint16_t) ASMAtomicAddU16(uint16_t volatile *pu16, uint32_t u16);
2633
2634
2635/**
2636 * Atomically exchanges and adds to a 32-bit value, ordered.
2637 *
2638 * @returns The old value.
2639 * @param pu32 Pointer to the value.
2640 * @param u32 Number to add.
2641 *
2642 * @remarks x86: Requires a 486 or later.
2643 */
2644#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2645DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
2646#else
2647DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
2648{
2649# if RT_INLINE_ASM_USES_INTRIN
2650 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
2651 return u32;
2652
2653# elif RT_INLINE_ASM_GNU_STYLE
2654 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2655 : "=r" (u32),
2656 "=m" (*pu32)
2657 : "0" (u32),
2658 "m" (*pu32)
2659 : "memory");
2660 return u32;
2661# else
2662 __asm
2663 {
2664 mov eax, [u32]
2665# ifdef RT_ARCH_AMD64
2666 mov rdx, [pu32]
2667 lock xadd [rdx], eax
2668# else
2669 mov edx, [pu32]
2670 lock xadd [edx], eax
2671# endif
2672 mov [u32], eax
2673 }
2674 return u32;
2675# endif
2676}
2677#endif
2678
2679
2680/**
2681 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2682 *
2683 * @returns The old value.
2684 * @param pi32 Pointer to the value.
2685 * @param i32 Number to add.
2686 *
2687 * @remarks x86: Requires a 486 or later.
2688 */
2689DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
2690{
2691 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
2692}
2693
2694
2695/**
2696 * Atomically exchanges and adds to a 64-bit value, ordered.
2697 *
2698 * @returns The old value.
2699 * @param pu64 Pointer to the value.
2700 * @param u64 Number to add.
2701 *
2702 * @remarks x86: Requires a Pentium or later.
2703 */
2704#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2705DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64);
2706#else
2707DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64)
2708{
2709# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2710 u64 = _InterlockedExchangeAdd64((__int64 *)pu64, u64);
2711 return u64;
2712
2713# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2714 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2715 : "=r" (u64),
2716 "=m" (*pu64)
2717 : "0" (u64),
2718 "m" (*pu64)
2719 : "memory");
2720 return u64;
2721# else
2722 uint64_t u64Old;
2723 for (;;)
2724 {
2725 uint64_t u64New;
2726 u64Old = ASMAtomicUoReadU64(pu64);
2727 u64New = u64Old + u64;
2728 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2729 break;
2730 ASMNopPause();
2731 }
2732 return u64Old;
2733# endif
2734}
2735#endif
2736
2737
2738/**
2739 * Atomically exchanges and adds to a signed 64-bit value, ordered.
2740 *
2741 * @returns The old value.
2742 * @param pi64 Pointer to the value.
2743 * @param i64 Number to add.
2744 *
2745 * @remarks x86: Requires a Pentium or later.
2746 */
2747DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile *pi64, int64_t i64)
2748{
2749 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)i64);
2750}
2751
2752
2753/**
2754 * Atomically exchanges and adds to a size_t value, ordered.
2755 *
2756 * @returns The old value.
2757 * @param pcb Pointer to the size_t value.
2758 * @param cb Number to add.
2759 */
2760DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile *pcb, size_t cb)
2761{
2762#if ARCH_BITS == 64
2763 AssertCompileSize(size_t, 8);
2764 return ASMAtomicAddU64((uint64_t volatile *)pcb, cb);
2765#elif ARCH_BITS == 32
2766 AssertCompileSize(size_t, 4);
2767 return ASMAtomicAddU32((uint32_t volatile *)pcb, cb);
2768#elif ARCH_BITS == 16
2769 AssertCompileSize(size_t, 2);
2770 return ASMAtomicAddU16((uint16_t volatile *)pcb, cb);
2771#else
2772# error "Unsupported ARCH_BITS value"
2773#endif
2774}
2775
2776
2777/**
2778 * Atomically exchanges and adds a value which size might differ between
2779 * platforms or compilers, ordered.
2780 *
2781 * @param pu Pointer to the variable to update.
2782 * @param uNew The value to add to *pu.
2783 * @param puOld Where to store the old value.
2784 */
2785#define ASMAtomicAddSize(pu, uNew, puOld) \
2786 do { \
2787 switch (sizeof(*(pu))) { \
2788 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2789 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2790 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
2791 } \
2792 } while (0)
2793
2794
2795
2796/**
2797 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
2798 *
2799 * @returns The old value.
2800 * @param pu16 Pointer to the value.
2801 * @param u16 Number to subtract.
2802 *
2803 * @remarks x86: Requires a 486 or later.
2804 */
2805DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile *pu16, uint32_t u16)
2806{
2807 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
2808}
2809
2810
2811/**
2812 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
2813 *
2814 * @returns The old value.
2815 * @param pi16 Pointer to the value.
2816 * @param i16 Number to subtract.
2817 *
2818 * @remarks x86: Requires a 486 or later.
2819 */
2820DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile *pi16, int16_t i16)
2821{
2822 return (int16_t)ASMAtomicAddU16((uint16_t volatile *)pi16, (uint16_t)-i16);
2823}
2824
2825
2826/**
2827 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
2828 *
2829 * @returns The old value.
2830 * @param pu32 Pointer to the value.
2831 * @param u32 Number to subtract.
2832 *
2833 * @remarks x86: Requires a 486 or later.
2834 */
2835DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile *pu32, uint32_t u32)
2836{
2837 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
2838}
2839
2840
2841/**
2842 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
2843 *
2844 * @returns The old value.
2845 * @param pi32 Pointer to the value.
2846 * @param i32 Number to subtract.
2847 *
2848 * @remarks x86: Requires a 486 or later.
2849 */
2850DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
2851{
2852 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
2853}
2854
2855
2856/**
2857 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
2858 *
2859 * @returns The old value.
2860 * @param pu64 Pointer to the value.
2861 * @param u64 Number to subtract.
2862 *
2863 * @remarks x86: Requires a Pentium or later.
2864 */
2865DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile *pu64, uint64_t u64)
2866{
2867 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
2868}
2869
2870
2871/**
2872 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
2873 *
2874 * @returns The old value.
2875 * @param pi64 Pointer to the value.
2876 * @param i64 Number to subtract.
2877 *
2878 * @remarks x86: Requires a Pentium or later.
2879 */
2880DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile *pi64, int64_t i64)
2881{
2882 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)-i64);
2883}
2884
2885
2886/**
2887 * Atomically exchanges and subtracts to a size_t value, ordered.
2888 *
2889 * @returns The old value.
2890 * @param pcb Pointer to the size_t value.
2891 * @param cb Number to subtract.
2892 *
2893 * @remarks x86: Requires a 486 or later.
2894 */
2895DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile *pcb, size_t cb)
2896{
2897#if ARCH_BITS == 64
2898 return ASMAtomicSubU64((uint64_t volatile *)pcb, cb);
2899#elif ARCH_BITS == 32
2900 return ASMAtomicSubU32((uint32_t volatile *)pcb, cb);
2901#elif ARCH_BITS == 16
2902 AssertCompileSize(size_t, 2);
2903 return ASMAtomicSubU16((uint16_t volatile *)pcb, cb);
2904#else
2905# error "Unsupported ARCH_BITS value"
2906#endif
2907}
2908
2909
2910/**
2911 * Atomically exchanges and subtracts a value which size might differ between
2912 * platforms or compilers, ordered.
2913 *
2914 * @param pu Pointer to the variable to update.
2915 * @param uNew The value to subtract to *pu.
2916 * @param puOld Where to store the old value.
2917 *
2918 * @remarks x86: Requires a 486 or later.
2919 */
2920#define ASMAtomicSubSize(pu, uNew, puOld) \
2921 do { \
2922 switch (sizeof(*(pu))) { \
2923 case 4: *(uint32_t *)(puOld) = ASMAtomicSubU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2924 case 8: *(uint64_t *)(puOld) = ASMAtomicSubU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2925 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
2926 } \
2927 } while (0)
2928
2929
2930
2931/**
2932 * Atomically increment a 16-bit value, ordered.
2933 *
2934 * @returns The new value.
2935 * @param pu16 Pointer to the value to increment.
2936 * @remarks Not implemented. Just to make 16-bit code happy.
2937 *
2938 * @remarks x86: Requires a 486 or later.
2939 */
2940DECLASM(uint16_t) ASMAtomicIncU16(uint16_t volatile *pu16);
2941
2942
2943/**
2944 * Atomically increment a 32-bit value, ordered.
2945 *
2946 * @returns The new value.
2947 * @param pu32 Pointer to the value to increment.
2948 *
2949 * @remarks x86: Requires a 486 or later.
2950 */
2951#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2952DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2953#else
2954DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2955{
2956 uint32_t u32;
2957# if RT_INLINE_ASM_USES_INTRIN
2958 u32 = _InterlockedIncrement((long *)pu32);
2959 return u32;
2960
2961# elif RT_INLINE_ASM_GNU_STYLE
2962 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2963 : "=r" (u32),
2964 "=m" (*pu32)
2965 : "0" (1),
2966 "m" (*pu32)
2967 : "memory");
2968 return u32+1;
2969# else
2970 __asm
2971 {
2972 mov eax, 1
2973# ifdef RT_ARCH_AMD64
2974 mov rdx, [pu32]
2975 lock xadd [rdx], eax
2976# else
2977 mov edx, [pu32]
2978 lock xadd [edx], eax
2979# endif
2980 mov u32, eax
2981 }
2982 return u32+1;
2983# endif
2984}
2985#endif
2986
2987
2988/**
2989 * Atomically increment a signed 32-bit value, ordered.
2990 *
2991 * @returns The new value.
2992 * @param pi32 Pointer to the value to increment.
2993 *
2994 * @remarks x86: Requires a 486 or later.
2995 */
2996DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2997{
2998 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2999}
3000
3001
3002/**
3003 * Atomically increment a 64-bit value, ordered.
3004 *
3005 * @returns The new value.
3006 * @param pu64 Pointer to the value to increment.
3007 *
3008 * @remarks x86: Requires a Pentium or later.
3009 */
3010#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3011DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64);
3012#else
3013DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64)
3014{
3015# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3016 uint64_t u64;
3017 u64 = _InterlockedIncrement64((__int64 *)pu64);
3018 return u64;
3019
3020# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3021 uint64_t u64;
3022 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
3023 : "=r" (u64),
3024 "=m" (*pu64)
3025 : "0" (1),
3026 "m" (*pu64)
3027 : "memory");
3028 return u64 + 1;
3029# else
3030 return ASMAtomicAddU64(pu64, 1) + 1;
3031# endif
3032}
3033#endif
3034
3035
3036/**
3037 * Atomically increment a signed 64-bit value, ordered.
3038 *
3039 * @returns The new value.
3040 * @param pi64 Pointer to the value to increment.
3041 *
3042 * @remarks x86: Requires a Pentium or later.
3043 */
3044DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile *pi64)
3045{
3046 return (int64_t)ASMAtomicIncU64((uint64_t volatile *)pi64);
3047}
3048
3049
3050/**
3051 * Atomically increment a size_t value, ordered.
3052 *
3053 * @returns The new value.
3054 * @param pcb Pointer to the value to increment.
3055 *
3056 * @remarks x86: Requires a 486 or later.
3057 */
3058DECLINLINE(int64_t) ASMAtomicIncZ(size_t volatile *pcb)
3059{
3060#if ARCH_BITS == 64
3061 return ASMAtomicIncU64((uint64_t volatile *)pcb);
3062#elif ARCH_BITS == 32
3063 return ASMAtomicIncU32((uint32_t volatile *)pcb);
3064#elif ARCH_BITS == 16
3065 return ASMAtomicIncU16((uint16_t volatile *)pcb);
3066#else
3067# error "Unsupported ARCH_BITS value"
3068#endif
3069}
3070
3071
3072
3073/**
3074 * Atomically decrement an unsigned 32-bit value, ordered.
3075 *
3076 * @returns The new value.
3077 * @param pu16 Pointer to the value to decrement.
3078 * @remarks Not implemented. Just to make 16-bit code happy.
3079 *
3080 * @remarks x86: Requires a 486 or later.
3081 */
3082DECLASM(uint32_t) ASMAtomicDecU16(uint16_t volatile *pu16);
3083
3084
3085/**
3086 * Atomically decrement an unsigned 32-bit value, ordered.
3087 *
3088 * @returns The new value.
3089 * @param pu32 Pointer to the value to decrement.
3090 *
3091 * @remarks x86: Requires a 486 or later.
3092 */
3093#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3094DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3095#else
3096DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3097{
3098 uint32_t u32;
3099# if RT_INLINE_ASM_USES_INTRIN
3100 u32 = _InterlockedDecrement((long *)pu32);
3101 return u32;
3102
3103# elif RT_INLINE_ASM_GNU_STYLE
3104 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3105 : "=r" (u32),
3106 "=m" (*pu32)
3107 : "0" (-1),
3108 "m" (*pu32)
3109 : "memory");
3110 return u32-1;
3111# else
3112 __asm
3113 {
3114 mov eax, -1
3115# ifdef RT_ARCH_AMD64
3116 mov rdx, [pu32]
3117 lock xadd [rdx], eax
3118# else
3119 mov edx, [pu32]
3120 lock xadd [edx], eax
3121# endif
3122 mov u32, eax
3123 }
3124 return u32-1;
3125# endif
3126}
3127#endif
3128
3129
3130/**
3131 * Atomically decrement a signed 32-bit value, ordered.
3132 *
3133 * @returns The new value.
3134 * @param pi32 Pointer to the value to decrement.
3135 *
3136 * @remarks x86: Requires a 486 or later.
3137 */
3138DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3139{
3140 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3141}
3142
3143
3144/**
3145 * Atomically decrement an unsigned 64-bit value, ordered.
3146 *
3147 * @returns The new value.
3148 * @param pu64 Pointer to the value to decrement.
3149 *
3150 * @remarks x86: Requires a Pentium or later.
3151 */
3152#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3153DECLASM(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64);
3154#else
3155DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64)
3156{
3157# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3158 uint64_t u64 = _InterlockedDecrement64((__int64 volatile *)pu64);
3159 return u64;
3160
3161# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3162 uint64_t u64;
3163 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
3164 : "=r" (u64),
3165 "=m" (*pu64)
3166 : "0" (~(uint64_t)0),
3167 "m" (*pu64)
3168 : "memory");
3169 return u64-1;
3170# else
3171 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
3172# endif
3173}
3174#endif
3175
3176
3177/**
3178 * Atomically decrement a signed 64-bit value, ordered.
3179 *
3180 * @returns The new value.
3181 * @param pi64 Pointer to the value to decrement.
3182 *
3183 * @remarks x86: Requires a Pentium or later.
3184 */
3185DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile *pi64)
3186{
3187 return (int64_t)ASMAtomicDecU64((uint64_t volatile *)pi64);
3188}
3189
3190
3191/**
3192 * Atomically decrement a size_t value, ordered.
3193 *
3194 * @returns The new value.
3195 * @param pcb Pointer to the value to decrement.
3196 *
3197 * @remarks x86: Requires a 486 or later.
3198 */
3199DECLINLINE(int64_t) ASMAtomicDecZ(size_t volatile *pcb)
3200{
3201#if ARCH_BITS == 64
3202 return ASMAtomicDecU64((uint64_t volatile *)pcb);
3203#elif ARCH_BITS == 32
3204 return ASMAtomicDecU32((uint32_t volatile *)pcb);
3205#elif ARCH_BITS == 16
3206 return ASMAtomicDecU16((uint16_t volatile *)pcb);
3207#else
3208# error "Unsupported ARCH_BITS value"
3209#endif
3210}
3211
3212
3213/**
3214 * Atomically Or an unsigned 32-bit value, ordered.
3215 *
3216 * @param pu32 Pointer to the pointer variable to OR u32 with.
3217 * @param u32 The value to OR *pu32 with.
3218 *
3219 * @remarks x86: Requires a 386 or later.
3220 */
3221#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3222DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3223#else
3224DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3225{
3226# if RT_INLINE_ASM_USES_INTRIN
3227 _InterlockedOr((long volatile *)pu32, (long)u32);
3228
3229# elif RT_INLINE_ASM_GNU_STYLE
3230 __asm__ __volatile__("lock; orl %1, %0\n\t"
3231 : "=m" (*pu32)
3232 : "ir" (u32),
3233 "m" (*pu32));
3234# else
3235 __asm
3236 {
3237 mov eax, [u32]
3238# ifdef RT_ARCH_AMD64
3239 mov rdx, [pu32]
3240 lock or [rdx], eax
3241# else
3242 mov edx, [pu32]
3243 lock or [edx], eax
3244# endif
3245 }
3246# endif
3247}
3248#endif
3249
3250
3251/**
3252 * Atomically Or a signed 32-bit value, ordered.
3253 *
3254 * @param pi32 Pointer to the pointer variable to OR u32 with.
3255 * @param i32 The value to OR *pu32 with.
3256 *
3257 * @remarks x86: Requires a 386 or later.
3258 */
3259DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3260{
3261 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3262}
3263
3264
3265/**
3266 * Atomically Or an unsigned 64-bit value, ordered.
3267 *
3268 * @param pu64 Pointer to the pointer variable to OR u64 with.
3269 * @param u64 The value to OR *pu64 with.
3270 *
3271 * @remarks x86: Requires a Pentium or later.
3272 */
3273#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3274DECLASM(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64);
3275#else
3276DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64)
3277{
3278# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3279 _InterlockedOr64((__int64 volatile *)pu64, (__int64)u64);
3280
3281# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3282 __asm__ __volatile__("lock; orq %1, %q0\n\t"
3283 : "=m" (*pu64)
3284 : "r" (u64),
3285 "m" (*pu64));
3286# else
3287 for (;;)
3288 {
3289 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3290 uint64_t u64New = u64Old | u64;
3291 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3292 break;
3293 ASMNopPause();
3294 }
3295# endif
3296}
3297#endif
3298
3299
3300/**
3301 * Atomically Or a signed 64-bit value, ordered.
3302 *
3303 * @param pi64 Pointer to the pointer variable to OR u64 with.
3304 * @param i64 The value to OR *pu64 with.
3305 *
3306 * @remarks x86: Requires a Pentium or later.
3307 */
3308DECLINLINE(void) ASMAtomicOrS64(int64_t volatile *pi64, int64_t i64)
3309{
3310 ASMAtomicOrU64((uint64_t volatile *)pi64, i64);
3311}
3312
3313
3314/**
3315 * Atomically And an unsigned 32-bit value, ordered.
3316 *
3317 * @param pu32 Pointer to the pointer variable to AND u32 with.
3318 * @param u32 The value to AND *pu32 with.
3319 *
3320 * @remarks x86: Requires a 386 or later.
3321 */
3322#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3323DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3324#else
3325DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3326{
3327# if RT_INLINE_ASM_USES_INTRIN
3328 _InterlockedAnd((long volatile *)pu32, u32);
3329
3330# elif RT_INLINE_ASM_GNU_STYLE
3331 __asm__ __volatile__("lock; andl %1, %0\n\t"
3332 : "=m" (*pu32)
3333 : "ir" (u32),
3334 "m" (*pu32));
3335# else
3336 __asm
3337 {
3338 mov eax, [u32]
3339# ifdef RT_ARCH_AMD64
3340 mov rdx, [pu32]
3341 lock and [rdx], eax
3342# else
3343 mov edx, [pu32]
3344 lock and [edx], eax
3345# endif
3346 }
3347# endif
3348}
3349#endif
3350
3351
3352/**
3353 * Atomically And a signed 32-bit value, ordered.
3354 *
3355 * @param pi32 Pointer to the pointer variable to AND i32 with.
3356 * @param i32 The value to AND *pi32 with.
3357 *
3358 * @remarks x86: Requires a 386 or later.
3359 */
3360DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3361{
3362 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3363}
3364
3365
3366/**
3367 * Atomically And an unsigned 64-bit value, ordered.
3368 *
3369 * @param pu64 Pointer to the pointer variable to AND u64 with.
3370 * @param u64 The value to AND *pu64 with.
3371 *
3372 * @remarks x86: Requires a Pentium or later.
3373 */
3374#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3375DECLASM(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64);
3376#else
3377DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64)
3378{
3379# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3380 _InterlockedAnd64((__int64 volatile *)pu64, u64);
3381
3382# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3383 __asm__ __volatile__("lock; andq %1, %0\n\t"
3384 : "=m" (*pu64)
3385 : "r" (u64),
3386 "m" (*pu64));
3387# else
3388 for (;;)
3389 {
3390 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3391 uint64_t u64New = u64Old & u64;
3392 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3393 break;
3394 ASMNopPause();
3395 }
3396# endif
3397}
3398#endif
3399
3400
3401/**
3402 * Atomically And a signed 64-bit value, ordered.
3403 *
3404 * @param pi64 Pointer to the pointer variable to AND i64 with.
3405 * @param i64 The value to AND *pi64 with.
3406 *
3407 * @remarks x86: Requires a Pentium or later.
3408 */
3409DECLINLINE(void) ASMAtomicAndS64(int64_t volatile *pi64, int64_t i64)
3410{
3411 ASMAtomicAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3412}
3413
3414
3415/**
3416 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
3417 *
3418 * @param pu32 Pointer to the pointer variable to OR u32 with.
3419 * @param u32 The value to OR *pu32 with.
3420 *
3421 * @remarks x86: Requires a 386 or later.
3422 */
3423#if RT_INLINE_ASM_EXTERNAL
3424DECLASM(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32);
3425#else
3426DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32)
3427{
3428# if RT_INLINE_ASM_GNU_STYLE
3429 __asm__ __volatile__("orl %1, %0\n\t"
3430 : "=m" (*pu32)
3431 : "ir" (u32),
3432 "m" (*pu32));
3433# else
3434 __asm
3435 {
3436 mov eax, [u32]
3437# ifdef RT_ARCH_AMD64
3438 mov rdx, [pu32]
3439 or [rdx], eax
3440# else
3441 mov edx, [pu32]
3442 or [edx], eax
3443# endif
3444 }
3445# endif
3446}
3447#endif
3448
3449
3450/**
3451 * Atomically OR a signed 32-bit value, unordered.
3452 *
3453 * @param pi32 Pointer to the pointer variable to OR u32 with.
3454 * @param i32 The value to OR *pu32 with.
3455 *
3456 * @remarks x86: Requires a 386 or later.
3457 */
3458DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile *pi32, int32_t i32)
3459{
3460 ASMAtomicUoOrU32((uint32_t volatile *)pi32, i32);
3461}
3462
3463
3464/**
3465 * Atomically OR an unsigned 64-bit value, unordered.
3466 *
3467 * @param pu64 Pointer to the pointer variable to OR u64 with.
3468 * @param u64 The value to OR *pu64 with.
3469 *
3470 * @remarks x86: Requires a Pentium or later.
3471 */
3472#if RT_INLINE_ASM_EXTERNAL
3473DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64);
3474#else
3475DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64)
3476{
3477# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3478 __asm__ __volatile__("orq %1, %q0\n\t"
3479 : "=m" (*pu64)
3480 : "r" (u64),
3481 "m" (*pu64));
3482# else
3483 for (;;)
3484 {
3485 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3486 uint64_t u64New = u64Old | u64;
3487 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3488 break;
3489 ASMNopPause();
3490 }
3491# endif
3492}
3493#endif
3494
3495
3496/**
3497 * Atomically Or a signed 64-bit value, unordered.
3498 *
3499 * @param pi64 Pointer to the pointer variable to OR u64 with.
3500 * @param i64 The value to OR *pu64 with.
3501 *
3502 * @remarks x86: Requires a Pentium or later.
3503 */
3504DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile *pi64, int64_t i64)
3505{
3506 ASMAtomicUoOrU64((uint64_t volatile *)pi64, i64);
3507}
3508
3509
3510/**
3511 * Atomically And an unsigned 32-bit value, unordered.
3512 *
3513 * @param pu32 Pointer to the pointer variable to AND u32 with.
3514 * @param u32 The value to AND *pu32 with.
3515 *
3516 * @remarks x86: Requires a 386 or later.
3517 */
3518#if RT_INLINE_ASM_EXTERNAL
3519DECLASM(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32);
3520#else
3521DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32)
3522{
3523# if RT_INLINE_ASM_GNU_STYLE
3524 __asm__ __volatile__("andl %1, %0\n\t"
3525 : "=m" (*pu32)
3526 : "ir" (u32),
3527 "m" (*pu32));
3528# else
3529 __asm
3530 {
3531 mov eax, [u32]
3532# ifdef RT_ARCH_AMD64
3533 mov rdx, [pu32]
3534 and [rdx], eax
3535# else
3536 mov edx, [pu32]
3537 and [edx], eax
3538# endif
3539 }
3540# endif
3541}
3542#endif
3543
3544
3545/**
3546 * Atomically And a signed 32-bit value, unordered.
3547 *
3548 * @param pi32 Pointer to the pointer variable to AND i32 with.
3549 * @param i32 The value to AND *pi32 with.
3550 *
3551 * @remarks x86: Requires a 386 or later.
3552 */
3553DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile *pi32, int32_t i32)
3554{
3555 ASMAtomicUoAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3556}
3557
3558
3559/**
3560 * Atomically And an unsigned 64-bit value, unordered.
3561 *
3562 * @param pu64 Pointer to the pointer variable to AND u64 with.
3563 * @param u64 The value to AND *pu64 with.
3564 *
3565 * @remarks x86: Requires a Pentium or later.
3566 */
3567#if RT_INLINE_ASM_EXTERNAL
3568DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64);
3569#else
3570DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64)
3571{
3572# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3573 __asm__ __volatile__("andq %1, %0\n\t"
3574 : "=m" (*pu64)
3575 : "r" (u64),
3576 "m" (*pu64));
3577# else
3578 for (;;)
3579 {
3580 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3581 uint64_t u64New = u64Old & u64;
3582 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3583 break;
3584 ASMNopPause();
3585 }
3586# endif
3587}
3588#endif
3589
3590
3591/**
3592 * Atomically And a signed 64-bit value, unordered.
3593 *
3594 * @param pi64 Pointer to the pointer variable to AND i64 with.
3595 * @param i64 The value to AND *pi64 with.
3596 *
3597 * @remarks x86: Requires a Pentium or later.
3598 */
3599DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile *pi64, int64_t i64)
3600{
3601 ASMAtomicUoAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3602}
3603
3604
3605/**
3606 * Atomically increment an unsigned 32-bit value, unordered.
3607 *
3608 * @returns the new value.
3609 * @param pu32 Pointer to the variable to increment.
3610 *
3611 * @remarks x86: Requires a 486 or later.
3612 */
3613#if RT_INLINE_ASM_EXTERNAL
3614DECLASM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile *pu32);
3615#else
3616DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile *pu32)
3617{
3618 uint32_t u32;
3619# if RT_INLINE_ASM_GNU_STYLE
3620 __asm__ __volatile__("xaddl %0, %1\n\t"
3621 : "=r" (u32),
3622 "=m" (*pu32)
3623 : "0" (1),
3624 "m" (*pu32)
3625 : "memory");
3626 return u32 + 1;
3627# else
3628 __asm
3629 {
3630 mov eax, 1
3631# ifdef RT_ARCH_AMD64
3632 mov rdx, [pu32]
3633 xadd [rdx], eax
3634# else
3635 mov edx, [pu32]
3636 xadd [edx], eax
3637# endif
3638 mov u32, eax
3639 }
3640 return u32 + 1;
3641# endif
3642}
3643#endif
3644
3645
3646/**
3647 * Atomically decrement an unsigned 32-bit value, unordered.
3648 *
3649 * @returns the new value.
3650 * @param pu32 Pointer to the variable to decrement.
3651 *
3652 * @remarks x86: Requires a 486 or later.
3653 */
3654#if RT_INLINE_ASM_EXTERNAL
3655DECLASM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile *pu32);
3656#else
3657DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile *pu32)
3658{
3659 uint32_t u32;
3660# if RT_INLINE_ASM_GNU_STYLE
3661 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3662 : "=r" (u32),
3663 "=m" (*pu32)
3664 : "0" (-1),
3665 "m" (*pu32)
3666 : "memory");
3667 return u32 - 1;
3668# else
3669 __asm
3670 {
3671 mov eax, -1
3672# ifdef RT_ARCH_AMD64
3673 mov rdx, [pu32]
3674 xadd [rdx], eax
3675# else
3676 mov edx, [pu32]
3677 xadd [edx], eax
3678# endif
3679 mov u32, eax
3680 }
3681 return u32 - 1;
3682# endif
3683}
3684#endif
3685
3686
3687/** @def RT_ASM_PAGE_SIZE
3688 * We try avoid dragging in iprt/param.h here.
3689 * @internal
3690 */
3691#if defined(RT_ARCH_SPARC64)
3692# define RT_ASM_PAGE_SIZE 0x2000
3693# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3694# if PAGE_SIZE != 0x2000
3695# error "PAGE_SIZE is not 0x2000!"
3696# endif
3697# endif
3698#else
3699# define RT_ASM_PAGE_SIZE 0x1000
3700# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3701# if PAGE_SIZE != 0x1000
3702# error "PAGE_SIZE is not 0x1000!"
3703# endif
3704# endif
3705#endif
3706
3707/**
3708 * Zeros a 4K memory page.
3709 *
3710 * @param pv Pointer to the memory block. This must be page aligned.
3711 */
3712#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3713DECLASM(void) ASMMemZeroPage(volatile void *pv);
3714# else
3715DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
3716{
3717# if RT_INLINE_ASM_USES_INTRIN
3718# ifdef RT_ARCH_AMD64
3719 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
3720# else
3721 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
3722# endif
3723
3724# elif RT_INLINE_ASM_GNU_STYLE
3725 RTCCUINTREG uDummy;
3726# ifdef RT_ARCH_AMD64
3727 __asm__ __volatile__("rep stosq"
3728 : "=D" (pv),
3729 "=c" (uDummy)
3730 : "0" (pv),
3731 "c" (RT_ASM_PAGE_SIZE >> 3),
3732 "a" (0)
3733 : "memory");
3734# else
3735 __asm__ __volatile__("rep stosl"
3736 : "=D" (pv),
3737 "=c" (uDummy)
3738 : "0" (pv),
3739 "c" (RT_ASM_PAGE_SIZE >> 2),
3740 "a" (0)
3741 : "memory");
3742# endif
3743# else
3744 __asm
3745 {
3746# ifdef RT_ARCH_AMD64
3747 xor rax, rax
3748 mov ecx, 0200h
3749 mov rdi, [pv]
3750 rep stosq
3751# else
3752 xor eax, eax
3753 mov ecx, 0400h
3754 mov edi, [pv]
3755 rep stosd
3756# endif
3757 }
3758# endif
3759}
3760# endif
3761
3762
3763/**
3764 * Zeros a memory block with a 32-bit aligned size.
3765 *
3766 * @param pv Pointer to the memory block.
3767 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3768 */
3769#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3770DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3771#else
3772DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3773{
3774# if RT_INLINE_ASM_USES_INTRIN
3775# ifdef RT_ARCH_AMD64
3776 if (!(cb & 7))
3777 __stosq((unsigned __int64 *)pv, 0, cb / 8);
3778 else
3779# endif
3780 __stosd((unsigned long *)pv, 0, cb / 4);
3781
3782# elif RT_INLINE_ASM_GNU_STYLE
3783 __asm__ __volatile__("rep stosl"
3784 : "=D" (pv),
3785 "=c" (cb)
3786 : "0" (pv),
3787 "1" (cb >> 2),
3788 "a" (0)
3789 : "memory");
3790# else
3791 __asm
3792 {
3793 xor eax, eax
3794# ifdef RT_ARCH_AMD64
3795 mov rcx, [cb]
3796 shr rcx, 2
3797 mov rdi, [pv]
3798# else
3799 mov ecx, [cb]
3800 shr ecx, 2
3801 mov edi, [pv]
3802# endif
3803 rep stosd
3804 }
3805# endif
3806}
3807#endif
3808
3809
3810/**
3811 * Fills a memory block with a 32-bit aligned size.
3812 *
3813 * @param pv Pointer to the memory block.
3814 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3815 * @param u32 The value to fill with.
3816 */
3817#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3818DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3819#else
3820DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3821{
3822# if RT_INLINE_ASM_USES_INTRIN
3823# ifdef RT_ARCH_AMD64
3824 if (!(cb & 7))
3825 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3826 else
3827# endif
3828 __stosd((unsigned long *)pv, u32, cb / 4);
3829
3830# elif RT_INLINE_ASM_GNU_STYLE
3831 __asm__ __volatile__("rep stosl"
3832 : "=D" (pv),
3833 "=c" (cb)
3834 : "0" (pv),
3835 "1" (cb >> 2),
3836 "a" (u32)
3837 : "memory");
3838# else
3839 __asm
3840 {
3841# ifdef RT_ARCH_AMD64
3842 mov rcx, [cb]
3843 shr rcx, 2
3844 mov rdi, [pv]
3845# else
3846 mov ecx, [cb]
3847 shr ecx, 2
3848 mov edi, [pv]
3849# endif
3850 mov eax, [u32]
3851 rep stosd
3852 }
3853# endif
3854}
3855#endif
3856
3857
3858/**
3859 * Checks if a memory block is all zeros.
3860 *
3861 * @returns Pointer to the first non-zero byte.
3862 * @returns NULL if all zero.
3863 *
3864 * @param pv Pointer to the memory block.
3865 * @param cb Number of bytes in the block.
3866 *
3867 * @todo Fix name, it is a predicate function but it's not returning boolean!
3868 */
3869#if !defined(RT_OS_LINUX) || !defined(__KERNEL__)
3870DECLASM(void *) ASMMemFirstNonZero(void const *pv, size_t cb);
3871#else
3872DECLINLINE(void *) ASMMemFirstNonZero(void const *pv, size_t cb)
3873{
3874 uint8_t const *pb = (uint8_t const *)pv;
3875 for (; cb; cb--, pb++)
3876 if (RT_LIKELY(*pb == 0))
3877 { /* likely */ }
3878 else
3879 return (void *)pb;
3880 return NULL;
3881}
3882#endif
3883
3884
3885/**
3886 * Checks if a memory block is all zeros.
3887 *
3888 * @returns true if zero, false if not.
3889 *
3890 * @param pv Pointer to the memory block.
3891 * @param cb Number of bytes in the block.
3892 *
3893 * @sa ASMMemFirstNonZero
3894 */
3895DECLINLINE(bool) ASMMemIsZero(void const *pv, size_t cb)
3896{
3897 return ASMMemFirstNonZero(pv, cb) == NULL;
3898}
3899
3900
3901/**
3902 * Checks if a memory page is all zeros.
3903 *
3904 * @returns true / false.
3905 *
3906 * @param pvPage Pointer to the page. Must be aligned on 16 byte
3907 * boundary
3908 */
3909DECLINLINE(bool) ASMMemIsZeroPage(void const *pvPage)
3910{
3911# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
3912 union { RTCCUINTREG r; bool f; } uAX;
3913 RTCCUINTREG xCX, xDI;
3914 Assert(!((uintptr_t)pvPage & 15));
3915 __asm__ __volatile__("repe; "
3916# ifdef RT_ARCH_AMD64
3917 "scasq\n\t"
3918# else
3919 "scasl\n\t"
3920# endif
3921 "setnc %%al\n\t"
3922 : "=&c" (xCX),
3923 "=&D" (xDI),
3924 "=&a" (uAX.r)
3925 : "mr" (pvPage),
3926# ifdef RT_ARCH_AMD64
3927 "0" (RT_ASM_PAGE_SIZE/8),
3928# else
3929 "0" (RT_ASM_PAGE_SIZE/4),
3930# endif
3931 "1" (pvPage),
3932 "2" (0));
3933 return uAX.f;
3934# else
3935 uintptr_t const *puPtr = (uintptr_t const *)pvPage;
3936 int cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
3937 Assert(!((uintptr_t)pvPage & 15));
3938 for (;;)
3939 {
3940 if (puPtr[0]) return false;
3941 if (puPtr[4]) return false;
3942
3943 if (puPtr[2]) return false;
3944 if (puPtr[6]) return false;
3945
3946 if (puPtr[1]) return false;
3947 if (puPtr[5]) return false;
3948
3949 if (puPtr[3]) return false;
3950 if (puPtr[7]) return false;
3951
3952 if (!--cLeft)
3953 return true;
3954 puPtr += 8;
3955 }
3956 return true;
3957# endif
3958}
3959
3960
3961/**
3962 * Checks if a memory block is filled with the specified byte, returning the
3963 * first mismatch.
3964 *
3965 * This is sort of an inverted memchr.
3966 *
3967 * @returns Pointer to the byte which doesn't equal u8.
3968 * @returns NULL if all equal to u8.
3969 *
3970 * @param pv Pointer to the memory block.
3971 * @param cb Number of bytes in the block.
3972 * @param u8 The value it's supposed to be filled with.
3973 *
3974 * @remarks No alignment requirements.
3975 */
3976#if (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
3977 && (!defined(RT_OS_FREEBSD) || !defined(_KERNEL))
3978DECLASM(void *) ASMMemFirstMismatchingU8(void const *pv, size_t cb, uint8_t u8);
3979#else
3980DECLINLINE(void *) ASMMemFirstMismatchingU8(void const *pv, size_t cb, uint8_t u8)
3981{
3982 uint8_t const *pb = (uint8_t const *)pv;
3983 for (; cb; cb--, pb++)
3984 if (RT_LIKELY(*pb == u8))
3985 { /* likely */ }
3986 else
3987 return (void *)pb;
3988 return NULL;
3989}
3990#endif
3991
3992
3993/**
3994 * Checks if a memory block is filled with the specified byte.
3995 *
3996 * @returns true if all matching, false if not.
3997 *
3998 * @param pv Pointer to the memory block.
3999 * @param cb Number of bytes in the block.
4000 * @param u8 The value it's supposed to be filled with.
4001 *
4002 * @remarks No alignment requirements.
4003 */
4004DECLINLINE(bool) ASMMemIsAllU8(void const *pv, size_t cb, uint8_t u8)
4005{
4006 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
4007}
4008
4009
4010/**
4011 * Checks if a memory block is filled with the specified 32-bit value.
4012 *
4013 * This is a sort of inverted memchr.
4014 *
4015 * @returns Pointer to the first value which doesn't equal u32.
4016 * @returns NULL if all equal to u32.
4017 *
4018 * @param pv Pointer to the memory block.
4019 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4020 * @param u32 The value it's supposed to be filled with.
4021 */
4022DECLINLINE(uint32_t *) ASMMemFirstMismatchingU32(void const *pv, size_t cb, uint32_t u32)
4023{
4024/** @todo rewrite this in inline assembly? */
4025 uint32_t const *pu32 = (uint32_t const *)pv;
4026 for (; cb; cb -= 4, pu32++)
4027 if (RT_LIKELY(*pu32 == u32))
4028 { /* likely */ }
4029 else
4030 return (uint32_t *)pu32;
4031 return NULL;
4032}
4033
4034
4035/**
4036 * Probes a byte pointer for read access.
4037 *
4038 * While the function will not fault if the byte is not read accessible,
4039 * the idea is to do this in a safe place like before acquiring locks
4040 * and such like.
4041 *
4042 * Also, this functions guarantees that an eager compiler is not going
4043 * to optimize the probing away.
4044 *
4045 * @param pvByte Pointer to the byte.
4046 */
4047#if RT_INLINE_ASM_EXTERNAL
4048DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
4049#else
4050DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
4051{
4052 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4053 uint8_t u8;
4054# if RT_INLINE_ASM_GNU_STYLE
4055 __asm__ __volatile__("movb (%1), %0\n\t"
4056 : "=r" (u8)
4057 : "r" (pvByte));
4058# else
4059 __asm
4060 {
4061# ifdef RT_ARCH_AMD64
4062 mov rax, [pvByte]
4063 mov al, [rax]
4064# else
4065 mov eax, [pvByte]
4066 mov al, [eax]
4067# endif
4068 mov [u8], al
4069 }
4070# endif
4071 return u8;
4072}
4073#endif
4074
4075/**
4076 * Probes a buffer for read access page by page.
4077 *
4078 * While the function will fault if the buffer is not fully read
4079 * accessible, the idea is to do this in a safe place like before
4080 * acquiring locks and such like.
4081 *
4082 * Also, this functions guarantees that an eager compiler is not going
4083 * to optimize the probing away.
4084 *
4085 * @param pvBuf Pointer to the buffer.
4086 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4087 */
4088DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
4089{
4090 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4091 /* the first byte */
4092 const uint8_t *pu8 = (const uint8_t *)pvBuf;
4093 ASMProbeReadByte(pu8);
4094
4095 /* the pages in between pages. */
4096 while (cbBuf > RT_ASM_PAGE_SIZE)
4097 {
4098 ASMProbeReadByte(pu8);
4099 cbBuf -= RT_ASM_PAGE_SIZE;
4100 pu8 += RT_ASM_PAGE_SIZE;
4101 }
4102
4103 /* the last byte */
4104 ASMProbeReadByte(pu8 + cbBuf - 1);
4105}
4106
4107
4108
4109/** @defgroup grp_inline_bits Bit Operations
4110 * @{
4111 */
4112
4113
4114/**
4115 * Sets a bit in a bitmap.
4116 *
4117 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
4118 * @param iBit The bit to set.
4119 *
4120 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4121 * However, doing so will yield better performance as well as avoiding
4122 * traps accessing the last bits in the bitmap.
4123 */
4124#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4125DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
4126#else
4127DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
4128{
4129# if RT_INLINE_ASM_USES_INTRIN
4130 _bittestandset((long *)pvBitmap, iBit);
4131
4132# elif RT_INLINE_ASM_GNU_STYLE
4133 __asm__ __volatile__("btsl %1, %0"
4134 : "=m" (*(volatile long *)pvBitmap)
4135 : "Ir" (iBit),
4136 "m" (*(volatile long *)pvBitmap)
4137 : "memory");
4138# else
4139 __asm
4140 {
4141# ifdef RT_ARCH_AMD64
4142 mov rax, [pvBitmap]
4143 mov edx, [iBit]
4144 bts [rax], edx
4145# else
4146 mov eax, [pvBitmap]
4147 mov edx, [iBit]
4148 bts [eax], edx
4149# endif
4150 }
4151# endif
4152}
4153#endif
4154
4155
4156/**
4157 * Atomically sets a bit in a bitmap, ordered.
4158 *
4159 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4160 * the memory access isn't atomic!
4161 * @param iBit The bit to set.
4162 *
4163 * @remarks x86: Requires a 386 or later.
4164 */
4165#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4166DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
4167#else
4168DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
4169{
4170 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4171# if RT_INLINE_ASM_USES_INTRIN
4172 _interlockedbittestandset((long *)pvBitmap, iBit);
4173# elif RT_INLINE_ASM_GNU_STYLE
4174 __asm__ __volatile__("lock; btsl %1, %0"
4175 : "=m" (*(volatile long *)pvBitmap)
4176 : "Ir" (iBit),
4177 "m" (*(volatile long *)pvBitmap)
4178 : "memory");
4179# else
4180 __asm
4181 {
4182# ifdef RT_ARCH_AMD64
4183 mov rax, [pvBitmap]
4184 mov edx, [iBit]
4185 lock bts [rax], edx
4186# else
4187 mov eax, [pvBitmap]
4188 mov edx, [iBit]
4189 lock bts [eax], edx
4190# endif
4191 }
4192# endif
4193}
4194#endif
4195
4196
4197/**
4198 * Clears a bit in a bitmap.
4199 *
4200 * @param pvBitmap Pointer to the bitmap.
4201 * @param iBit The bit to clear.
4202 *
4203 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4204 * However, doing so will yield better performance as well as avoiding
4205 * traps accessing the last bits in the bitmap.
4206 */
4207#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4208DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
4209#else
4210DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
4211{
4212# if RT_INLINE_ASM_USES_INTRIN
4213 _bittestandreset((long *)pvBitmap, iBit);
4214
4215# elif RT_INLINE_ASM_GNU_STYLE
4216 __asm__ __volatile__("btrl %1, %0"
4217 : "=m" (*(volatile long *)pvBitmap)
4218 : "Ir" (iBit),
4219 "m" (*(volatile long *)pvBitmap)
4220 : "memory");
4221# else
4222 __asm
4223 {
4224# ifdef RT_ARCH_AMD64
4225 mov rax, [pvBitmap]
4226 mov edx, [iBit]
4227 btr [rax], edx
4228# else
4229 mov eax, [pvBitmap]
4230 mov edx, [iBit]
4231 btr [eax], edx
4232# endif
4233 }
4234# endif
4235}
4236#endif
4237
4238
4239/**
4240 * Atomically clears a bit in a bitmap, ordered.
4241 *
4242 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4243 * the memory access isn't atomic!
4244 * @param iBit The bit to toggle set.
4245 *
4246 * @remarks No memory barrier, take care on smp.
4247 * @remarks x86: Requires a 386 or later.
4248 */
4249#if RT_INLINE_ASM_EXTERNAL
4250DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
4251#else
4252DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
4253{
4254 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4255# if RT_INLINE_ASM_GNU_STYLE
4256 __asm__ __volatile__("lock; btrl %1, %0"
4257 : "=m" (*(volatile long *)pvBitmap)
4258 : "Ir" (iBit),
4259 "m" (*(volatile long *)pvBitmap)
4260 : "memory");
4261# else
4262 __asm
4263 {
4264# ifdef RT_ARCH_AMD64
4265 mov rax, [pvBitmap]
4266 mov edx, [iBit]
4267 lock btr [rax], edx
4268# else
4269 mov eax, [pvBitmap]
4270 mov edx, [iBit]
4271 lock btr [eax], edx
4272# endif
4273 }
4274# endif
4275}
4276#endif
4277
4278
4279/**
4280 * Toggles a bit in a bitmap.
4281 *
4282 * @param pvBitmap Pointer to the bitmap.
4283 * @param iBit The bit to toggle.
4284 *
4285 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4286 * However, doing so will yield better performance as well as avoiding
4287 * traps accessing the last bits in the bitmap.
4288 */
4289#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4290DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
4291#else
4292DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
4293{
4294# if RT_INLINE_ASM_USES_INTRIN
4295 _bittestandcomplement((long *)pvBitmap, iBit);
4296# elif RT_INLINE_ASM_GNU_STYLE
4297 __asm__ __volatile__("btcl %1, %0"
4298 : "=m" (*(volatile long *)pvBitmap)
4299 : "Ir" (iBit),
4300 "m" (*(volatile long *)pvBitmap)
4301 : "memory");
4302# else
4303 __asm
4304 {
4305# ifdef RT_ARCH_AMD64
4306 mov rax, [pvBitmap]
4307 mov edx, [iBit]
4308 btc [rax], edx
4309# else
4310 mov eax, [pvBitmap]
4311 mov edx, [iBit]
4312 btc [eax], edx
4313# endif
4314 }
4315# endif
4316}
4317#endif
4318
4319
4320/**
4321 * Atomically toggles a bit in a bitmap, ordered.
4322 *
4323 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4324 * the memory access isn't atomic!
4325 * @param iBit The bit to test and set.
4326 *
4327 * @remarks x86: Requires a 386 or later.
4328 */
4329#if RT_INLINE_ASM_EXTERNAL
4330DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
4331#else
4332DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
4333{
4334 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4335# if RT_INLINE_ASM_GNU_STYLE
4336 __asm__ __volatile__("lock; btcl %1, %0"
4337 : "=m" (*(volatile long *)pvBitmap)
4338 : "Ir" (iBit),
4339 "m" (*(volatile long *)pvBitmap)
4340 : "memory");
4341# else
4342 __asm
4343 {
4344# ifdef RT_ARCH_AMD64
4345 mov rax, [pvBitmap]
4346 mov edx, [iBit]
4347 lock btc [rax], edx
4348# else
4349 mov eax, [pvBitmap]
4350 mov edx, [iBit]
4351 lock btc [eax], edx
4352# endif
4353 }
4354# endif
4355}
4356#endif
4357
4358
4359/**
4360 * Tests and sets a bit in a bitmap.
4361 *
4362 * @returns true if the bit was set.
4363 * @returns false if the bit was clear.
4364 *
4365 * @param pvBitmap Pointer to the bitmap.
4366 * @param iBit The bit to test and set.
4367 *
4368 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4369 * However, doing so will yield better performance as well as avoiding
4370 * traps accessing the last bits in the bitmap.
4371 */
4372#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4373DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4374#else
4375DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4376{
4377 union { bool f; uint32_t u32; uint8_t u8; } rc;
4378# if RT_INLINE_ASM_USES_INTRIN
4379 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
4380
4381# elif RT_INLINE_ASM_GNU_STYLE
4382 __asm__ __volatile__("btsl %2, %1\n\t"
4383 "setc %b0\n\t"
4384 "andl $1, %0\n\t"
4385 : "=q" (rc.u32),
4386 "=m" (*(volatile long *)pvBitmap)
4387 : "Ir" (iBit),
4388 "m" (*(volatile long *)pvBitmap)
4389 : "memory");
4390# else
4391 __asm
4392 {
4393 mov edx, [iBit]
4394# ifdef RT_ARCH_AMD64
4395 mov rax, [pvBitmap]
4396 bts [rax], edx
4397# else
4398 mov eax, [pvBitmap]
4399 bts [eax], edx
4400# endif
4401 setc al
4402 and eax, 1
4403 mov [rc.u32], eax
4404 }
4405# endif
4406 return rc.f;
4407}
4408#endif
4409
4410
4411/**
4412 * Atomically tests and sets a bit in a bitmap, ordered.
4413 *
4414 * @returns true if the bit was set.
4415 * @returns false if the bit was clear.
4416 *
4417 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4418 * the memory access isn't atomic!
4419 * @param iBit The bit to set.
4420 *
4421 * @remarks x86: Requires a 386 or later.
4422 */
4423#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4424DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4425#else
4426DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4427{
4428 union { bool f; uint32_t u32; uint8_t u8; } rc;
4429 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4430# if RT_INLINE_ASM_USES_INTRIN
4431 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
4432# elif RT_INLINE_ASM_GNU_STYLE
4433 __asm__ __volatile__("lock; btsl %2, %1\n\t"
4434 "setc %b0\n\t"
4435 "andl $1, %0\n\t"
4436 : "=q" (rc.u32),
4437 "=m" (*(volatile long *)pvBitmap)
4438 : "Ir" (iBit),
4439 "m" (*(volatile long *)pvBitmap)
4440 : "memory");
4441# else
4442 __asm
4443 {
4444 mov edx, [iBit]
4445# ifdef RT_ARCH_AMD64
4446 mov rax, [pvBitmap]
4447 lock bts [rax], edx
4448# else
4449 mov eax, [pvBitmap]
4450 lock bts [eax], edx
4451# endif
4452 setc al
4453 and eax, 1
4454 mov [rc.u32], eax
4455 }
4456# endif
4457 return rc.f;
4458}
4459#endif
4460
4461
4462/**
4463 * Tests and clears a bit in a bitmap.
4464 *
4465 * @returns true if the bit was set.
4466 * @returns false if the bit was clear.
4467 *
4468 * @param pvBitmap Pointer to the bitmap.
4469 * @param iBit The bit to test and clear.
4470 *
4471 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4472 * However, doing so will yield better performance as well as avoiding
4473 * traps accessing the last bits in the bitmap.
4474 */
4475#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4476DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4477#else
4478DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4479{
4480 union { bool f; uint32_t u32; uint8_t u8; } rc;
4481# if RT_INLINE_ASM_USES_INTRIN
4482 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
4483
4484# elif RT_INLINE_ASM_GNU_STYLE
4485 __asm__ __volatile__("btrl %2, %1\n\t"
4486 "setc %b0\n\t"
4487 "andl $1, %0\n\t"
4488 : "=q" (rc.u32),
4489 "=m" (*(volatile long *)pvBitmap)
4490 : "Ir" (iBit),
4491 "m" (*(volatile long *)pvBitmap)
4492 : "memory");
4493# else
4494 __asm
4495 {
4496 mov edx, [iBit]
4497# ifdef RT_ARCH_AMD64
4498 mov rax, [pvBitmap]
4499 btr [rax], edx
4500# else
4501 mov eax, [pvBitmap]
4502 btr [eax], edx
4503# endif
4504 setc al
4505 and eax, 1
4506 mov [rc.u32], eax
4507 }
4508# endif
4509 return rc.f;
4510}
4511#endif
4512
4513
4514/**
4515 * Atomically tests and clears a bit in a bitmap, ordered.
4516 *
4517 * @returns true if the bit was set.
4518 * @returns false if the bit was clear.
4519 *
4520 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4521 * the memory access isn't atomic!
4522 * @param iBit The bit to test and clear.
4523 *
4524 * @remarks No memory barrier, take care on smp.
4525 * @remarks x86: Requires a 386 or later.
4526 */
4527#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4528DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4529#else
4530DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4531{
4532 union { bool f; uint32_t u32; uint8_t u8; } rc;
4533 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4534# if RT_INLINE_ASM_USES_INTRIN
4535 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
4536
4537# elif RT_INLINE_ASM_GNU_STYLE
4538 __asm__ __volatile__("lock; btrl %2, %1\n\t"
4539 "setc %b0\n\t"
4540 "andl $1, %0\n\t"
4541 : "=q" (rc.u32),
4542 "=m" (*(volatile long *)pvBitmap)
4543 : "Ir" (iBit),
4544 "m" (*(volatile long *)pvBitmap)
4545 : "memory");
4546# else
4547 __asm
4548 {
4549 mov edx, [iBit]
4550# ifdef RT_ARCH_AMD64
4551 mov rax, [pvBitmap]
4552 lock btr [rax], edx
4553# else
4554 mov eax, [pvBitmap]
4555 lock btr [eax], edx
4556# endif
4557 setc al
4558 and eax, 1
4559 mov [rc.u32], eax
4560 }
4561# endif
4562 return rc.f;
4563}
4564#endif
4565
4566
4567/**
4568 * Tests and toggles a bit in a bitmap.
4569 *
4570 * @returns true if the bit was set.
4571 * @returns false if the bit was clear.
4572 *
4573 * @param pvBitmap Pointer to the bitmap.
4574 * @param iBit The bit to test and toggle.
4575 *
4576 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4577 * However, doing so will yield better performance as well as avoiding
4578 * traps accessing the last bits in the bitmap.
4579 */
4580#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4581DECLASM(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4582#else
4583DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4584{
4585 union { bool f; uint32_t u32; uint8_t u8; } rc;
4586# if RT_INLINE_ASM_USES_INTRIN
4587 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
4588
4589# elif RT_INLINE_ASM_GNU_STYLE
4590 __asm__ __volatile__("btcl %2, %1\n\t"
4591 "setc %b0\n\t"
4592 "andl $1, %0\n\t"
4593 : "=q" (rc.u32),
4594 "=m" (*(volatile long *)pvBitmap)
4595 : "Ir" (iBit),
4596 "m" (*(volatile long *)pvBitmap)
4597 : "memory");
4598# else
4599 __asm
4600 {
4601 mov edx, [iBit]
4602# ifdef RT_ARCH_AMD64
4603 mov rax, [pvBitmap]
4604 btc [rax], edx
4605# else
4606 mov eax, [pvBitmap]
4607 btc [eax], edx
4608# endif
4609 setc al
4610 and eax, 1
4611 mov [rc.u32], eax
4612 }
4613# endif
4614 return rc.f;
4615}
4616#endif
4617
4618
4619/**
4620 * Atomically tests and toggles a bit in a bitmap, ordered.
4621 *
4622 * @returns true if the bit was set.
4623 * @returns false if the bit was clear.
4624 *
4625 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4626 * the memory access isn't atomic!
4627 * @param iBit The bit to test and toggle.
4628 *
4629 * @remarks x86: Requires a 386 or later.
4630 */
4631#if RT_INLINE_ASM_EXTERNAL
4632DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4633#else
4634DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4635{
4636 union { bool f; uint32_t u32; uint8_t u8; } rc;
4637 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4638# if RT_INLINE_ASM_GNU_STYLE
4639 __asm__ __volatile__("lock; btcl %2, %1\n\t"
4640 "setc %b0\n\t"
4641 "andl $1, %0\n\t"
4642 : "=q" (rc.u32),
4643 "=m" (*(volatile long *)pvBitmap)
4644 : "Ir" (iBit),
4645 "m" (*(volatile long *)pvBitmap)
4646 : "memory");
4647# else
4648 __asm
4649 {
4650 mov edx, [iBit]
4651# ifdef RT_ARCH_AMD64
4652 mov rax, [pvBitmap]
4653 lock btc [rax], edx
4654# else
4655 mov eax, [pvBitmap]
4656 lock btc [eax], edx
4657# endif
4658 setc al
4659 and eax, 1
4660 mov [rc.u32], eax
4661 }
4662# endif
4663 return rc.f;
4664}
4665#endif
4666
4667
4668/**
4669 * Tests if a bit in a bitmap is set.
4670 *
4671 * @returns true if the bit is set.
4672 * @returns false if the bit is clear.
4673 *
4674 * @param pvBitmap Pointer to the bitmap.
4675 * @param iBit The bit to test.
4676 *
4677 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4678 * However, doing so will yield better performance as well as avoiding
4679 * traps accessing the last bits in the bitmap.
4680 */
4681#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4682DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
4683#else
4684DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
4685{
4686 union { bool f; uint32_t u32; uint8_t u8; } rc;
4687# if RT_INLINE_ASM_USES_INTRIN
4688 rc.u32 = _bittest((long *)pvBitmap, iBit);
4689# elif RT_INLINE_ASM_GNU_STYLE
4690
4691 __asm__ __volatile__("btl %2, %1\n\t"
4692 "setc %b0\n\t"
4693 "andl $1, %0\n\t"
4694 : "=q" (rc.u32)
4695 : "m" (*(const volatile long *)pvBitmap),
4696 "Ir" (iBit)
4697 : "memory");
4698# else
4699 __asm
4700 {
4701 mov edx, [iBit]
4702# ifdef RT_ARCH_AMD64
4703 mov rax, [pvBitmap]
4704 bt [rax], edx
4705# else
4706 mov eax, [pvBitmap]
4707 bt [eax], edx
4708# endif
4709 setc al
4710 and eax, 1
4711 mov [rc.u32], eax
4712 }
4713# endif
4714 return rc.f;
4715}
4716#endif
4717
4718
4719/**
4720 * Clears a bit range within a bitmap.
4721 *
4722 * @param pvBitmap Pointer to the bitmap.
4723 * @param iBitStart The First bit to clear.
4724 * @param iBitEnd The first bit not to clear.
4725 */
4726DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4727{
4728 if (iBitStart < iBitEnd)
4729 {
4730 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4731 int32_t iStart = iBitStart & ~31;
4732 int32_t iEnd = iBitEnd & ~31;
4733 if (iStart == iEnd)
4734 *pu32 &= ((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4735 else
4736 {
4737 /* bits in first dword. */
4738 if (iBitStart & 31)
4739 {
4740 *pu32 &= (UINT32_C(1) << (iBitStart & 31)) - 1;
4741 pu32++;
4742 iBitStart = iStart + 32;
4743 }
4744
4745 /* whole dword. */
4746 if (iBitStart != iEnd)
4747 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4748
4749 /* bits in last dword. */
4750 if (iBitEnd & 31)
4751 {
4752 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4753 *pu32 &= ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4754 }
4755 }
4756 }
4757}
4758
4759
4760/**
4761 * Sets a bit range within a bitmap.
4762 *
4763 * @param pvBitmap Pointer to the bitmap.
4764 * @param iBitStart The First bit to set.
4765 * @param iBitEnd The first bit not to set.
4766 */
4767DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4768{
4769 if (iBitStart < iBitEnd)
4770 {
4771 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4772 int32_t iStart = iBitStart & ~31;
4773 int32_t iEnd = iBitEnd & ~31;
4774 if (iStart == iEnd)
4775 *pu32 |= ((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31);
4776 else
4777 {
4778 /* bits in first dword. */
4779 if (iBitStart & 31)
4780 {
4781 *pu32 |= ~((UINT32_C(1) << (iBitStart & 31)) - 1);
4782 pu32++;
4783 iBitStart = iStart + 32;
4784 }
4785
4786 /* whole dword. */
4787 if (iBitStart != iEnd)
4788 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
4789
4790 /* bits in last dword. */
4791 if (iBitEnd & 31)
4792 {
4793 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4794 *pu32 |= (UINT32_C(1) << (iBitEnd & 31)) - 1;
4795 }
4796 }
4797 }
4798}
4799
4800
4801/**
4802 * Finds the first clear bit in a bitmap.
4803 *
4804 * @returns Index of the first zero bit.
4805 * @returns -1 if no clear bit was found.
4806 * @param pvBitmap Pointer to the bitmap.
4807 * @param cBits The number of bits in the bitmap. Multiple of 32.
4808 */
4809#if RT_INLINE_ASM_EXTERNAL
4810DECLASM(int32_t) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
4811#else
4812DECLINLINE(int32_t) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
4813{
4814 if (cBits)
4815 {
4816 int32_t iBit;
4817# if RT_INLINE_ASM_GNU_STYLE
4818 RTCCUINTREG uEAX, uECX, uEDI;
4819 cBits = RT_ALIGN_32(cBits, 32);
4820 __asm__ __volatile__("repe; scasl\n\t"
4821 "je 1f\n\t"
4822# ifdef RT_ARCH_AMD64
4823 "lea -4(%%rdi), %%rdi\n\t"
4824 "xorl (%%rdi), %%eax\n\t"
4825 "subq %5, %%rdi\n\t"
4826# else
4827 "lea -4(%%edi), %%edi\n\t"
4828 "xorl (%%edi), %%eax\n\t"
4829 "subl %5, %%edi\n\t"
4830# endif
4831 "shll $3, %%edi\n\t"
4832 "bsfl %%eax, %%edx\n\t"
4833 "addl %%edi, %%edx\n\t"
4834 "1:\t\n"
4835 : "=d" (iBit),
4836 "=&c" (uECX),
4837 "=&D" (uEDI),
4838 "=&a" (uEAX)
4839 : "0" (0xffffffff),
4840 "mr" (pvBitmap),
4841 "1" (cBits >> 5),
4842 "2" (pvBitmap),
4843 "3" (0xffffffff));
4844# else
4845 cBits = RT_ALIGN_32(cBits, 32);
4846 __asm
4847 {
4848# ifdef RT_ARCH_AMD64
4849 mov rdi, [pvBitmap]
4850 mov rbx, rdi
4851# else
4852 mov edi, [pvBitmap]
4853 mov ebx, edi
4854# endif
4855 mov edx, 0ffffffffh
4856 mov eax, edx
4857 mov ecx, [cBits]
4858 shr ecx, 5
4859 repe scasd
4860 je done
4861
4862# ifdef RT_ARCH_AMD64
4863 lea rdi, [rdi - 4]
4864 xor eax, [rdi]
4865 sub rdi, rbx
4866# else
4867 lea edi, [edi - 4]
4868 xor eax, [edi]
4869 sub edi, ebx
4870# endif
4871 shl edi, 3
4872 bsf edx, eax
4873 add edx, edi
4874 done:
4875 mov [iBit], edx
4876 }
4877# endif
4878 return iBit;
4879 }
4880 return -1;
4881}
4882#endif
4883
4884
4885/**
4886 * Finds the next clear bit in a bitmap.
4887 *
4888 * @returns Index of the first zero bit.
4889 * @returns -1 if no clear bit was found.
4890 * @param pvBitmap Pointer to the bitmap.
4891 * @param cBits The number of bits in the bitmap. Multiple of 32.
4892 * @param iBitPrev The bit returned from the last search.
4893 * The search will start at iBitPrev + 1.
4894 */
4895#if RT_INLINE_ASM_EXTERNAL
4896DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4897#else
4898DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4899{
4900 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4901 int iBit = ++iBitPrev & 31;
4902 if (iBit)
4903 {
4904 /*
4905 * Inspect the 32-bit word containing the unaligned bit.
4906 */
4907 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
4908
4909# if RT_INLINE_ASM_USES_INTRIN
4910 unsigned long ulBit = 0;
4911 if (_BitScanForward(&ulBit, u32))
4912 return ulBit + iBitPrev;
4913# else
4914# if RT_INLINE_ASM_GNU_STYLE
4915 __asm__ __volatile__("bsf %1, %0\n\t"
4916 "jnz 1f\n\t"
4917 "movl $-1, %0\n\t"
4918 "1:\n\t"
4919 : "=r" (iBit)
4920 : "r" (u32));
4921# else
4922 __asm
4923 {
4924 mov edx, [u32]
4925 bsf eax, edx
4926 jnz done
4927 mov eax, 0ffffffffh
4928 done:
4929 mov [iBit], eax
4930 }
4931# endif
4932 if (iBit >= 0)
4933 return iBit + iBitPrev;
4934# endif
4935
4936 /*
4937 * Skip ahead and see if there is anything left to search.
4938 */
4939 iBitPrev |= 31;
4940 iBitPrev++;
4941 if (cBits <= (uint32_t)iBitPrev)
4942 return -1;
4943 }
4944
4945 /*
4946 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4947 */
4948 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4949 if (iBit >= 0)
4950 iBit += iBitPrev;
4951 return iBit;
4952}
4953#endif
4954
4955
4956/**
4957 * Finds the first set bit in a bitmap.
4958 *
4959 * @returns Index of the first set bit.
4960 * @returns -1 if no clear bit was found.
4961 * @param pvBitmap Pointer to the bitmap.
4962 * @param cBits The number of bits in the bitmap. Multiple of 32.
4963 */
4964#if RT_INLINE_ASM_EXTERNAL
4965DECLASM(int32_t) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
4966#else
4967DECLINLINE(int32_t) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
4968{
4969 if (cBits)
4970 {
4971 int32_t iBit;
4972# if RT_INLINE_ASM_GNU_STYLE
4973 RTCCUINTREG uEAX, uECX, uEDI;
4974 cBits = RT_ALIGN_32(cBits, 32);
4975 __asm__ __volatile__("repe; scasl\n\t"
4976 "je 1f\n\t"
4977# ifdef RT_ARCH_AMD64
4978 "lea -4(%%rdi), %%rdi\n\t"
4979 "movl (%%rdi), %%eax\n\t"
4980 "subq %5, %%rdi\n\t"
4981# else
4982 "lea -4(%%edi), %%edi\n\t"
4983 "movl (%%edi), %%eax\n\t"
4984 "subl %5, %%edi\n\t"
4985# endif
4986 "shll $3, %%edi\n\t"
4987 "bsfl %%eax, %%edx\n\t"
4988 "addl %%edi, %%edx\n\t"
4989 "1:\t\n"
4990 : "=d" (iBit),
4991 "=&c" (uECX),
4992 "=&D" (uEDI),
4993 "=&a" (uEAX)
4994 : "0" (0xffffffff),
4995 "mr" (pvBitmap),
4996 "1" (cBits >> 5),
4997 "2" (pvBitmap),
4998 "3" (0));
4999# else
5000 cBits = RT_ALIGN_32(cBits, 32);
5001 __asm
5002 {
5003# ifdef RT_ARCH_AMD64
5004 mov rdi, [pvBitmap]
5005 mov rbx, rdi
5006# else
5007 mov edi, [pvBitmap]
5008 mov ebx, edi
5009# endif
5010 mov edx, 0ffffffffh
5011 xor eax, eax
5012 mov ecx, [cBits]
5013 shr ecx, 5
5014 repe scasd
5015 je done
5016# ifdef RT_ARCH_AMD64
5017 lea rdi, [rdi - 4]
5018 mov eax, [rdi]
5019 sub rdi, rbx
5020# else
5021 lea edi, [edi - 4]
5022 mov eax, [edi]
5023 sub edi, ebx
5024# endif
5025 shl edi, 3
5026 bsf edx, eax
5027 add edx, edi
5028 done:
5029 mov [iBit], edx
5030 }
5031# endif
5032 return iBit;
5033 }
5034 return -1;
5035}
5036#endif
5037
5038
5039/**
5040 * Finds the next set bit in a bitmap.
5041 *
5042 * @returns Index of the next set bit.
5043 * @returns -1 if no set bit was found.
5044 * @param pvBitmap Pointer to the bitmap.
5045 * @param cBits The number of bits in the bitmap. Multiple of 32.
5046 * @param iBitPrev The bit returned from the last search.
5047 * The search will start at iBitPrev + 1.
5048 */
5049#if RT_INLINE_ASM_EXTERNAL
5050DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5051#else
5052DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5053{
5054 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
5055 int iBit = ++iBitPrev & 31;
5056 if (iBit)
5057 {
5058 /*
5059 * Inspect the 32-bit word containing the unaligned bit.
5060 */
5061 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
5062
5063# if RT_INLINE_ASM_USES_INTRIN
5064 unsigned long ulBit = 0;
5065 if (_BitScanForward(&ulBit, u32))
5066 return ulBit + iBitPrev;
5067# else
5068# if RT_INLINE_ASM_GNU_STYLE
5069 __asm__ __volatile__("bsf %1, %0\n\t"
5070 "jnz 1f\n\t"
5071 "movl $-1, %0\n\t"
5072 "1:\n\t"
5073 : "=r" (iBit)
5074 : "r" (u32));
5075# else
5076 __asm
5077 {
5078 mov edx, [u32]
5079 bsf eax, edx
5080 jnz done
5081 mov eax, 0ffffffffh
5082 done:
5083 mov [iBit], eax
5084 }
5085# endif
5086 if (iBit >= 0)
5087 return iBit + iBitPrev;
5088# endif
5089
5090 /*
5091 * Skip ahead and see if there is anything left to search.
5092 */
5093 iBitPrev |= 31;
5094 iBitPrev++;
5095 if (cBits <= (uint32_t)iBitPrev)
5096 return -1;
5097 }
5098
5099 /*
5100 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
5101 */
5102 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
5103 if (iBit >= 0)
5104 iBit += iBitPrev;
5105 return iBit;
5106}
5107#endif
5108
5109
5110/**
5111 * Finds the first bit which is set in the given 32-bit integer.
5112 * Bits are numbered from 1 (least significant) to 32.
5113 *
5114 * @returns index [1..32] of the first set bit.
5115 * @returns 0 if all bits are cleared.
5116 * @param u32 Integer to search for set bits.
5117 * @remarks Similar to ffs() in BSD.
5118 */
5119#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5120DECLASM(unsigned) ASMBitFirstSetU32(uint32_t u32);
5121#else
5122DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5123{
5124# if RT_INLINE_ASM_USES_INTRIN
5125 unsigned long iBit;
5126 if (_BitScanForward(&iBit, u32))
5127 iBit++;
5128 else
5129 iBit = 0;
5130# elif RT_INLINE_ASM_GNU_STYLE
5131 uint32_t iBit;
5132 __asm__ __volatile__("bsf %1, %0\n\t"
5133 "jnz 1f\n\t"
5134 "xorl %0, %0\n\t"
5135 "jmp 2f\n"
5136 "1:\n\t"
5137 "incl %0\n"
5138 "2:\n\t"
5139 : "=r" (iBit)
5140 : "rm" (u32));
5141# else
5142 uint32_t iBit;
5143 _asm
5144 {
5145 bsf eax, [u32]
5146 jnz found
5147 xor eax, eax
5148 jmp done
5149 found:
5150 inc eax
5151 done:
5152 mov [iBit], eax
5153 }
5154# endif
5155 return iBit;
5156}
5157#endif
5158
5159
5160/**
5161 * Finds the first bit which is set in the given 32-bit integer.
5162 * Bits are numbered from 1 (least significant) to 32.
5163 *
5164 * @returns index [1..32] of the first set bit.
5165 * @returns 0 if all bits are cleared.
5166 * @param i32 Integer to search for set bits.
5167 * @remark Similar to ffs() in BSD.
5168 */
5169DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5170{
5171 return ASMBitFirstSetU32((uint32_t)i32);
5172}
5173
5174
5175/**
5176 * Finds the first bit which is set in the given 64-bit integer.
5177 *
5178 * Bits are numbered from 1 (least significant) to 64.
5179 *
5180 * @returns index [1..64] of the first set bit.
5181 * @returns 0 if all bits are cleared.
5182 * @param u64 Integer to search for set bits.
5183 * @remarks Similar to ffs() in BSD.
5184 */
5185#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5186DECLASM(unsigned) ASMBitFirstSetU64(uint64_t u64);
5187#else
5188DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64)
5189{
5190# if RT_INLINE_ASM_USES_INTRIN
5191 unsigned long iBit;
5192# if ARCH_BITS == 64
5193 if (_BitScanForward64(&iBit, u64))
5194 iBit++;
5195 else
5196 iBit = 0;
5197# else
5198 if (_BitScanForward(&iBit, (uint32_t)u64))
5199 iBit++;
5200 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
5201 iBit += 33;
5202 else
5203 iBit = 0;
5204# endif
5205# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5206 uint64_t iBit;
5207 __asm__ __volatile__("bsfq %1, %0\n\t"
5208 "jnz 1f\n\t"
5209 "xorl %0, %0\n\t"
5210 "jmp 2f\n"
5211 "1:\n\t"
5212 "incl %0\n"
5213 "2:\n\t"
5214 : "=r" (iBit)
5215 : "rm" (u64));
5216# else
5217 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
5218 if (!iBit)
5219 {
5220 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
5221 if (iBit)
5222 iBit += 32;
5223 }
5224# endif
5225 return (unsigned)iBit;
5226}
5227#endif
5228
5229
5230/**
5231 * Finds the first bit which is set in the given 16-bit integer.
5232 *
5233 * Bits are numbered from 1 (least significant) to 16.
5234 *
5235 * @returns index [1..16] of the first set bit.
5236 * @returns 0 if all bits are cleared.
5237 * @param u16 Integer to search for set bits.
5238 * @remarks For 16-bit bs3kit code.
5239 */
5240#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5241DECLASM(unsigned) ASMBitFirstSetU16(uint16_t u16);
5242#else
5243DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16)
5244{
5245 return ASMBitFirstSetU32((uint32_t)u16);
5246}
5247#endif
5248
5249
5250/**
5251 * Finds the last bit which is set in the given 32-bit integer.
5252 * Bits are numbered from 1 (least significant) to 32.
5253 *
5254 * @returns index [1..32] of the last set bit.
5255 * @returns 0 if all bits are cleared.
5256 * @param u32 Integer to search for set bits.
5257 * @remark Similar to fls() in BSD.
5258 */
5259#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5260DECLASM(unsigned) ASMBitLastSetU32(uint32_t u32);
5261#else
5262DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5263{
5264# if RT_INLINE_ASM_USES_INTRIN
5265 unsigned long iBit;
5266 if (_BitScanReverse(&iBit, u32))
5267 iBit++;
5268 else
5269 iBit = 0;
5270# elif RT_INLINE_ASM_GNU_STYLE
5271 uint32_t iBit;
5272 __asm__ __volatile__("bsrl %1, %0\n\t"
5273 "jnz 1f\n\t"
5274 "xorl %0, %0\n\t"
5275 "jmp 2f\n"
5276 "1:\n\t"
5277 "incl %0\n"
5278 "2:\n\t"
5279 : "=r" (iBit)
5280 : "rm" (u32));
5281# else
5282 uint32_t iBit;
5283 _asm
5284 {
5285 bsr eax, [u32]
5286 jnz found
5287 xor eax, eax
5288 jmp done
5289 found:
5290 inc eax
5291 done:
5292 mov [iBit], eax
5293 }
5294# endif
5295 return iBit;
5296}
5297#endif
5298
5299
5300/**
5301 * Finds the last bit which is set in the given 32-bit integer.
5302 * Bits are numbered from 1 (least significant) to 32.
5303 *
5304 * @returns index [1..32] of the last set bit.
5305 * @returns 0 if all bits are cleared.
5306 * @param i32 Integer to search for set bits.
5307 * @remark Similar to fls() in BSD.
5308 */
5309DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5310{
5311 return ASMBitLastSetU32((uint32_t)i32);
5312}
5313
5314
5315/**
5316 * Finds the last bit which is set in the given 64-bit integer.
5317 *
5318 * Bits are numbered from 1 (least significant) to 64.
5319 *
5320 * @returns index [1..64] of the last set bit.
5321 * @returns 0 if all bits are cleared.
5322 * @param u64 Integer to search for set bits.
5323 * @remark Similar to fls() in BSD.
5324 */
5325#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5326DECLASM(unsigned) ASMBitLastSetU64(uint64_t u64);
5327#else
5328DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64)
5329{
5330# if RT_INLINE_ASM_USES_INTRIN
5331 unsigned long iBit;
5332# if ARCH_BITS == 64
5333 if (_BitScanReverse64(&iBit, u64))
5334 iBit++;
5335 else
5336 iBit = 0;
5337# else
5338 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
5339 iBit += 33;
5340 else if (_BitScanReverse(&iBit, (uint32_t)u64))
5341 iBit++;
5342 else
5343 iBit = 0;
5344# endif
5345# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5346 uint64_t iBit;
5347 __asm__ __volatile__("bsrq %1, %0\n\t"
5348 "jnz 1f\n\t"
5349 "xorl %0, %0\n\t"
5350 "jmp 2f\n"
5351 "1:\n\t"
5352 "incl %0\n"
5353 "2:\n\t"
5354 : "=r" (iBit)
5355 : "rm" (u64));
5356# else
5357 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
5358 if (iBit)
5359 iBit += 32;
5360 else
5361 iBit = ASMBitLastSetU32((uint32_t)u64);
5362#endif
5363 return (unsigned)iBit;
5364}
5365#endif
5366
5367
5368/**
5369 * Finds the last bit which is set in the given 16-bit integer.
5370 *
5371 * Bits are numbered from 1 (least significant) to 16.
5372 *
5373 * @returns index [1..16] of the last set bit.
5374 * @returns 0 if all bits are cleared.
5375 * @param u16 Integer to search for set bits.
5376 * @remarks For 16-bit bs3kit code.
5377 */
5378#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5379DECLASM(unsigned) ASMBitLastSetU16(uint16_t u16);
5380#else
5381DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16)
5382{
5383 return ASMBitLastSetU32((uint32_t)u16);
5384}
5385#endif
5386
5387
5388/**
5389 * Reverse the byte order of the given 16-bit integer.
5390 *
5391 * @returns Revert
5392 * @param u16 16-bit integer value.
5393 */
5394#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5395DECLASM(uint16_t) ASMByteSwapU16(uint16_t u16);
5396#else
5397DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
5398{
5399# if RT_INLINE_ASM_USES_INTRIN
5400 u16 = _byteswap_ushort(u16);
5401# elif RT_INLINE_ASM_GNU_STYLE
5402 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
5403# else
5404 _asm
5405 {
5406 mov ax, [u16]
5407 ror ax, 8
5408 mov [u16], ax
5409 }
5410# endif
5411 return u16;
5412}
5413#endif
5414
5415
5416/**
5417 * Reverse the byte order of the given 32-bit integer.
5418 *
5419 * @returns Revert
5420 * @param u32 32-bit integer value.
5421 */
5422#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5423DECLASM(uint32_t) ASMByteSwapU32(uint32_t u32);
5424#else
5425DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5426{
5427# if RT_INLINE_ASM_USES_INTRIN
5428 u32 = _byteswap_ulong(u32);
5429# elif RT_INLINE_ASM_GNU_STYLE
5430 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5431# else
5432 _asm
5433 {
5434 mov eax, [u32]
5435 bswap eax
5436 mov [u32], eax
5437 }
5438# endif
5439 return u32;
5440}
5441#endif
5442
5443
5444/**
5445 * Reverse the byte order of the given 64-bit integer.
5446 *
5447 * @returns Revert
5448 * @param u64 64-bit integer value.
5449 */
5450DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
5451{
5452#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
5453 u64 = _byteswap_uint64(u64);
5454#else
5455 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
5456 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
5457#endif
5458 return u64;
5459}
5460
5461
5462/**
5463 * Rotate 32-bit unsigned value to the left by @a cShift.
5464 *
5465 * @returns Rotated value.
5466 * @param u32 The value to rotate.
5467 * @param cShift How many bits to rotate by.
5468 */
5469#ifdef __WATCOMC__
5470DECLASM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift);
5471#else
5472DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift)
5473{
5474# if RT_INLINE_ASM_USES_INTRIN
5475 return _rotl(u32, cShift);
5476# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5477 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5478 return u32;
5479# else
5480 cShift &= 31;
5481 return (u32 << cShift) | (u32 >> (32 - cShift));
5482# endif
5483}
5484#endif
5485
5486
5487/**
5488 * Rotate 32-bit unsigned value to the right by @a cShift.
5489 *
5490 * @returns Rotated value.
5491 * @param u32 The value to rotate.
5492 * @param cShift How many bits to rotate by.
5493 */
5494#ifdef __WATCOMC__
5495DECLASM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift);
5496#else
5497DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift)
5498{
5499# if RT_INLINE_ASM_USES_INTRIN
5500 return _rotr(u32, cShift);
5501# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5502 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5503 return u32;
5504# else
5505 cShift &= 31;
5506 return (u32 >> cShift) | (u32 << (32 - cShift));
5507# endif
5508}
5509#endif
5510
5511
5512/**
5513 * Rotate 64-bit unsigned value to the left by @a cShift.
5514 *
5515 * @returns Rotated value.
5516 * @param u64 The value to rotate.
5517 * @param cShift How many bits to rotate by.
5518 */
5519DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift)
5520{
5521#if RT_INLINE_ASM_USES_INTRIN
5522 return _rotl64(u64, cShift);
5523#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5524 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5525 return u64;
5526#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5527 uint32_t uSpill;
5528 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5529 "jz 1f\n\t"
5530 "xchgl %%eax, %%edx\n\t"
5531 "1:\n\t"
5532 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5533 "jz 2f\n\t"
5534 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5535 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
5536 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
5537 "2:\n\t" /* } */
5538 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5539 : "0" (u64),
5540 "1" (cShift));
5541 return u64;
5542#else
5543 cShift &= 63;
5544 return (u64 << cShift) | (u64 >> (64 - cShift));
5545#endif
5546}
5547
5548
5549/**
5550 * Rotate 64-bit unsigned value to the right by @a cShift.
5551 *
5552 * @returns Rotated value.
5553 * @param u64 The value to rotate.
5554 * @param cShift How many bits to rotate by.
5555 */
5556DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift)
5557{
5558#if RT_INLINE_ASM_USES_INTRIN
5559 return _rotr64(u64, cShift);
5560#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5561 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5562 return u64;
5563#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5564 uint32_t uSpill;
5565 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5566 "jz 1f\n\t"
5567 "xchgl %%eax, %%edx\n\t"
5568 "1:\n\t"
5569 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5570 "jz 2f\n\t"
5571 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5572 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
5573 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
5574 "2:\n\t" /* } */
5575 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5576 : "0" (u64),
5577 "1" (cShift));
5578 return u64;
5579#else
5580 cShift &= 63;
5581 return (u64 >> cShift) | (u64 << (64 - cShift));
5582#endif
5583}
5584
5585/** @} */
5586
5587
5588/** @} */
5589
5590#endif
5591
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette