VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 59747

Last change on this file since 59747 was 59747, checked in by vboxsync, 9 years ago

iprt/asm.h: Cleaned up the ASMMemIsAll8/U32 mess and implmeneted the former in assembly. (Found inverted usage due to bad naming in copyUtf8Block, but it is fortunately an unused method.) Replaces the complicated ASMBitFirstSet based scanning in RTSgBufIsZero with a simple call to the new ASMMemIsZero function.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 157.5 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2015 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37/* Solaris 10 header ugliness */
38#ifdef u
39# undef u
40#endif
41
42#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
43# include <intrin.h>
44 /* Emit the intrinsics at all optimization levels. */
45# pragma intrinsic(_ReadWriteBarrier)
46# pragma intrinsic(__cpuid)
47# pragma intrinsic(__stosd)
48# pragma intrinsic(__stosw)
49# pragma intrinsic(__stosb)
50# pragma intrinsic(_BitScanForward)
51# pragma intrinsic(_BitScanReverse)
52# pragma intrinsic(_bittest)
53# pragma intrinsic(_bittestandset)
54# pragma intrinsic(_bittestandreset)
55# pragma intrinsic(_bittestandcomplement)
56# pragma intrinsic(_byteswap_ushort)
57# pragma intrinsic(_byteswap_ulong)
58# pragma intrinsic(_interlockedbittestandset)
59# pragma intrinsic(_interlockedbittestandreset)
60# pragma intrinsic(_InterlockedAnd)
61# pragma intrinsic(_InterlockedOr)
62# pragma intrinsic(_InterlockedIncrement)
63# pragma intrinsic(_InterlockedDecrement)
64# pragma intrinsic(_InterlockedExchange)
65# pragma intrinsic(_InterlockedExchangeAdd)
66# pragma intrinsic(_InterlockedCompareExchange)
67# pragma intrinsic(_InterlockedCompareExchange64)
68# pragma intrinsic(_rotl)
69# pragma intrinsic(_rotr)
70# pragma intrinsic(_rotl64)
71# pragma intrinsic(_rotr64)
72# ifdef RT_ARCH_AMD64
73# pragma intrinsic(__stosq)
74# pragma intrinsic(_byteswap_uint64)
75# pragma intrinsic(_InterlockedExchange64)
76# pragma intrinsic(_InterlockedExchangeAdd64)
77# pragma intrinsic(_InterlockedAnd64)
78# pragma intrinsic(_InterlockedOr64)
79# pragma intrinsic(_InterlockedIncrement64)
80# pragma intrinsic(_InterlockedDecrement64)
81# endif
82#endif
83
84/*
85 * Include #pragma aux definitions for Watcom C/C++.
86 */
87#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
88# include "asm-watcom-x86-16.h"
89#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
90# include "asm-watcom-x86-32.h"
91#endif
92
93
94
95/** @defgroup grp_rt_asm ASM - Assembly Routines
96 * @ingroup grp_rt
97 *
98 * @remarks The difference between ordered and unordered atomic operations are that
99 * the former will complete outstanding reads and writes before continuing
100 * while the latter doesn't make any promises about the order. Ordered
101 * operations doesn't, it seems, make any 100% promise wrt to whether
102 * the operation will complete before any subsequent memory access.
103 * (please, correct if wrong.)
104 *
105 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
106 * are unordered (note the Uo).
107 *
108 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
109 * or even optimize assembler instructions away. For instance, in the following code
110 * the second rdmsr instruction is optimized away because gcc treats that instruction
111 * as deterministic:
112 *
113 * @code
114 * static inline uint64_t rdmsr_low(int idx)
115 * {
116 * uint32_t low;
117 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
118 * }
119 * ...
120 * uint32_t msr1 = rdmsr_low(1);
121 * foo(msr1);
122 * msr1 = rdmsr_low(1);
123 * bar(msr1);
124 * @endcode
125 *
126 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
127 * use the result of the first call as input parameter for bar() as well. For rdmsr this
128 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
129 * machine status information in general.
130 *
131 * @{
132 */
133
134
135/** @def RT_INLINE_ASM_GCC_4_3_X_X86
136 * Used to work around some 4.3.x register allocation issues in this version of
137 * the compiler. So far this workaround is still required for 4.4 and 4.5. */
138#ifdef __GNUC__
139# define RT_INLINE_ASM_GCC_4_3_X_X86 (__GNUC__ == 4 && __GNUC_MINOR__ >= 3 && defined(__i386__))
140#endif
141#ifndef RT_INLINE_ASM_GCC_4_3_X_X86
142# define RT_INLINE_ASM_GCC_4_3_X_X86 0
143#endif
144
145/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
146 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
147 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
148 * mode, x86.
149 *
150 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
151 * when in PIC mode on x86.
152 */
153#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
154# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
155# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
156# else
157# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC \
158 ( (defined(PIC) || defined(__PIC__)) \
159 && defined(RT_ARCH_X86) \
160 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
161 || defined(RT_OS_DARWIN)) )
162# endif
163#endif
164
165
166/** @def ASMReturnAddress
167 * Gets the return address of the current (or calling if you like) function or method.
168 */
169#ifdef _MSC_VER
170# ifdef __cplusplus
171extern "C"
172# endif
173void * _ReturnAddress(void);
174# pragma intrinsic(_ReturnAddress)
175# define ASMReturnAddress() _ReturnAddress()
176#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
177# define ASMReturnAddress() __builtin_return_address(0)
178#elif defined(__WATCOMC__)
179# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
180#else
181# error "Unsupported compiler."
182#endif
183
184
185/**
186 * Compiler memory barrier.
187 *
188 * Ensure that the compiler does not use any cached (register/tmp stack) memory
189 * values or any outstanding writes when returning from this function.
190 *
191 * This function must be used if non-volatile data is modified by a
192 * device or the VMM. Typical cases are port access, MMIO access,
193 * trapping instruction, etc.
194 */
195#if RT_INLINE_ASM_GNU_STYLE
196# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
197#elif RT_INLINE_ASM_USES_INTRIN
198# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
199#elif defined(__WATCOMC__)
200void ASMCompilerBarrier(void);
201#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
202DECLINLINE(void) ASMCompilerBarrier(void)
203{
204 __asm
205 {
206 }
207}
208#endif
209
210
211/** @def ASMBreakpoint
212 * Debugger Breakpoint.
213 * @deprecated Use RT_BREAKPOINT instead.
214 * @internal
215 */
216#define ASMBreakpoint() RT_BREAKPOINT()
217
218
219/**
220 * Spinloop hint for platforms that have these, empty function on the other
221 * platforms.
222 *
223 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
224 * spin locks.
225 */
226#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
227DECLASM(void) ASMNopPause(void);
228#else
229DECLINLINE(void) ASMNopPause(void)
230{
231# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
232# if RT_INLINE_ASM_GNU_STYLE
233 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
234# else
235 __asm {
236 _emit 0f3h
237 _emit 090h
238 }
239# endif
240# else
241 /* dummy */
242# endif
243}
244#endif
245
246
247/**
248 * Atomically Exchange an unsigned 8-bit value, ordered.
249 *
250 * @returns Current *pu8 value
251 * @param pu8 Pointer to the 8-bit variable to update.
252 * @param u8 The 8-bit value to assign to *pu8.
253 */
254#if RT_INLINE_ASM_EXTERNAL
255DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
256#else
257DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
258{
259# if RT_INLINE_ASM_GNU_STYLE
260 __asm__ __volatile__("xchgb %0, %1\n\t"
261 : "=m" (*pu8),
262 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
263 : "1" (u8),
264 "m" (*pu8));
265# else
266 __asm
267 {
268# ifdef RT_ARCH_AMD64
269 mov rdx, [pu8]
270 mov al, [u8]
271 xchg [rdx], al
272 mov [u8], al
273# else
274 mov edx, [pu8]
275 mov al, [u8]
276 xchg [edx], al
277 mov [u8], al
278# endif
279 }
280# endif
281 return u8;
282}
283#endif
284
285
286/**
287 * Atomically Exchange a signed 8-bit value, ordered.
288 *
289 * @returns Current *pu8 value
290 * @param pi8 Pointer to the 8-bit variable to update.
291 * @param i8 The 8-bit value to assign to *pi8.
292 */
293DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
294{
295 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
296}
297
298
299/**
300 * Atomically Exchange a bool value, ordered.
301 *
302 * @returns Current *pf value
303 * @param pf Pointer to the 8-bit variable to update.
304 * @param f The 8-bit value to assign to *pi8.
305 */
306DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
307{
308#ifdef _MSC_VER
309 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
310#else
311 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
312#endif
313}
314
315
316/**
317 * Atomically Exchange an unsigned 16-bit value, ordered.
318 *
319 * @returns Current *pu16 value
320 * @param pu16 Pointer to the 16-bit variable to update.
321 * @param u16 The 16-bit value to assign to *pu16.
322 */
323#if RT_INLINE_ASM_EXTERNAL
324DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
325#else
326DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
327{
328# if RT_INLINE_ASM_GNU_STYLE
329 __asm__ __volatile__("xchgw %0, %1\n\t"
330 : "=m" (*pu16),
331 "=r" (u16)
332 : "1" (u16),
333 "m" (*pu16));
334# else
335 __asm
336 {
337# ifdef RT_ARCH_AMD64
338 mov rdx, [pu16]
339 mov ax, [u16]
340 xchg [rdx], ax
341 mov [u16], ax
342# else
343 mov edx, [pu16]
344 mov ax, [u16]
345 xchg [edx], ax
346 mov [u16], ax
347# endif
348 }
349# endif
350 return u16;
351}
352#endif
353
354
355/**
356 * Atomically Exchange a signed 16-bit value, ordered.
357 *
358 * @returns Current *pu16 value
359 * @param pi16 Pointer to the 16-bit variable to update.
360 * @param i16 The 16-bit value to assign to *pi16.
361 */
362DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
363{
364 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
365}
366
367
368/**
369 * Atomically Exchange an unsigned 32-bit value, ordered.
370 *
371 * @returns Current *pu32 value
372 * @param pu32 Pointer to the 32-bit variable to update.
373 * @param u32 The 32-bit value to assign to *pu32.
374 *
375 * @remarks Does not work on 286 and earlier.
376 */
377#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
378DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
379#else
380DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
381{
382# if RT_INLINE_ASM_GNU_STYLE
383 __asm__ __volatile__("xchgl %0, %1\n\t"
384 : "=m" (*pu32),
385 "=r" (u32)
386 : "1" (u32),
387 "m" (*pu32));
388
389# elif RT_INLINE_ASM_USES_INTRIN
390 u32 = _InterlockedExchange((long *)pu32, u32);
391
392# else
393 __asm
394 {
395# ifdef RT_ARCH_AMD64
396 mov rdx, [pu32]
397 mov eax, u32
398 xchg [rdx], eax
399 mov [u32], eax
400# else
401 mov edx, [pu32]
402 mov eax, u32
403 xchg [edx], eax
404 mov [u32], eax
405# endif
406 }
407# endif
408 return u32;
409}
410#endif
411
412
413/**
414 * Atomically Exchange a signed 32-bit value, ordered.
415 *
416 * @returns Current *pu32 value
417 * @param pi32 Pointer to the 32-bit variable to update.
418 * @param i32 The 32-bit value to assign to *pi32.
419 */
420DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
421{
422 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
423}
424
425
426/**
427 * Atomically Exchange an unsigned 64-bit value, ordered.
428 *
429 * @returns Current *pu64 value
430 * @param pu64 Pointer to the 64-bit variable to update.
431 * @param u64 The 64-bit value to assign to *pu64.
432 *
433 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
434 */
435#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
436 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
437DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
438#else
439DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
440{
441# if defined(RT_ARCH_AMD64)
442# if RT_INLINE_ASM_USES_INTRIN
443 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
444
445# elif RT_INLINE_ASM_GNU_STYLE
446 __asm__ __volatile__("xchgq %0, %1\n\t"
447 : "=m" (*pu64),
448 "=r" (u64)
449 : "1" (u64),
450 "m" (*pu64));
451# else
452 __asm
453 {
454 mov rdx, [pu64]
455 mov rax, [u64]
456 xchg [rdx], rax
457 mov [u64], rax
458 }
459# endif
460# else /* !RT_ARCH_AMD64 */
461# if RT_INLINE_ASM_GNU_STYLE
462# if defined(PIC) || defined(__PIC__)
463 uint32_t u32EBX = (uint32_t)u64;
464 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
465 "xchgl %%ebx, %3\n\t"
466 "1:\n\t"
467 "lock; cmpxchg8b (%5)\n\t"
468 "jnz 1b\n\t"
469 "movl %3, %%ebx\n\t"
470 /*"xchgl %%esi, %5\n\t"*/
471 : "=A" (u64),
472 "=m" (*pu64)
473 : "0" (*pu64),
474 "m" ( u32EBX ),
475 "c" ( (uint32_t)(u64 >> 32) ),
476 "S" (pu64));
477# else /* !PIC */
478 __asm__ __volatile__("1:\n\t"
479 "lock; cmpxchg8b %1\n\t"
480 "jnz 1b\n\t"
481 : "=A" (u64),
482 "=m" (*pu64)
483 : "0" (*pu64),
484 "b" ( (uint32_t)u64 ),
485 "c" ( (uint32_t)(u64 >> 32) ));
486# endif
487# else
488 __asm
489 {
490 mov ebx, dword ptr [u64]
491 mov ecx, dword ptr [u64 + 4]
492 mov edi, pu64
493 mov eax, dword ptr [edi]
494 mov edx, dword ptr [edi + 4]
495 retry:
496 lock cmpxchg8b [edi]
497 jnz retry
498 mov dword ptr [u64], eax
499 mov dword ptr [u64 + 4], edx
500 }
501# endif
502# endif /* !RT_ARCH_AMD64 */
503 return u64;
504}
505#endif
506
507
508/**
509 * Atomically Exchange an signed 64-bit value, ordered.
510 *
511 * @returns Current *pi64 value
512 * @param pi64 Pointer to the 64-bit variable to update.
513 * @param i64 The 64-bit value to assign to *pi64.
514 */
515DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
516{
517 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
518}
519
520
521/**
522 * Atomically Exchange a pointer value, ordered.
523 *
524 * @returns Current *ppv value
525 * @param ppv Pointer to the pointer variable to update.
526 * @param pv The pointer value to assign to *ppv.
527 */
528DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
529{
530#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
531 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
532#elif ARCH_BITS == 64
533 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
534#else
535# error "ARCH_BITS is bogus"
536#endif
537}
538
539
540/**
541 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
542 *
543 * @returns Current *pv value
544 * @param ppv Pointer to the pointer variable to update.
545 * @param pv The pointer value to assign to *ppv.
546 * @param Type The type of *ppv, sans volatile.
547 */
548#ifdef __GNUC__
549# define ASMAtomicXchgPtrT(ppv, pv, Type) \
550 __extension__ \
551 ({\
552 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
553 Type const pvTypeChecked = (pv); \
554 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
555 pvTypeCheckedRet; \
556 })
557#else
558# define ASMAtomicXchgPtrT(ppv, pv, Type) \
559 (Type)ASMAtomicXchgPtr((void * volatile *)(ppv), (void *)(pv))
560#endif
561
562
563/**
564 * Atomically Exchange a raw-mode context pointer value, ordered.
565 *
566 * @returns Current *ppv value
567 * @param ppvRC Pointer to the pointer variable to update.
568 * @param pvRC The pointer value to assign to *ppv.
569 */
570DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
571{
572 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
573}
574
575
576/**
577 * Atomically Exchange a ring-0 pointer value, ordered.
578 *
579 * @returns Current *ppv value
580 * @param ppvR0 Pointer to the pointer variable to update.
581 * @param pvR0 The pointer value to assign to *ppv.
582 */
583DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
584{
585#if R0_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
586 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
587#elif R0_ARCH_BITS == 64
588 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
589#else
590# error "R0_ARCH_BITS is bogus"
591#endif
592}
593
594
595/**
596 * Atomically Exchange a ring-3 pointer value, ordered.
597 *
598 * @returns Current *ppv value
599 * @param ppvR3 Pointer to the pointer variable to update.
600 * @param pvR3 The pointer value to assign to *ppv.
601 */
602DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
603{
604#if R3_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
605 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
606#elif R3_ARCH_BITS == 64
607 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
608#else
609# error "R3_ARCH_BITS is bogus"
610#endif
611}
612
613
614/** @def ASMAtomicXchgHandle
615 * Atomically Exchange a typical IPRT handle value, ordered.
616 *
617 * @param ph Pointer to the value to update.
618 * @param hNew The new value to assigned to *pu.
619 * @param phRes Where to store the current *ph value.
620 *
621 * @remarks This doesn't currently work for all handles (like RTFILE).
622 */
623#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
624# define ASMAtomicXchgHandle(ph, hNew, phRes) \
625 do { \
626 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
627 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
628 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
629 } while (0)
630#elif HC_ARCH_BITS == 64
631# define ASMAtomicXchgHandle(ph, hNew, phRes) \
632 do { \
633 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
634 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
635 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
636 } while (0)
637#else
638# error HC_ARCH_BITS
639#endif
640
641
642/**
643 * Atomically Exchange a value which size might differ
644 * between platforms or compilers, ordered.
645 *
646 * @param pu Pointer to the variable to update.
647 * @param uNew The value to assign to *pu.
648 * @todo This is busted as its missing the result argument.
649 */
650#define ASMAtomicXchgSize(pu, uNew) \
651 do { \
652 switch (sizeof(*(pu))) { \
653 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
654 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
655 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
656 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
657 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
658 } \
659 } while (0)
660
661/**
662 * Atomically Exchange a value which size might differ
663 * between platforms or compilers, ordered.
664 *
665 * @param pu Pointer to the variable to update.
666 * @param uNew The value to assign to *pu.
667 * @param puRes Where to store the current *pu value.
668 */
669#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
670 do { \
671 switch (sizeof(*(pu))) { \
672 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
673 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
674 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
675 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
676 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
677 } \
678 } while (0)
679
680
681
682/**
683 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
684 *
685 * @returns true if xchg was done.
686 * @returns false if xchg wasn't done.
687 *
688 * @param pu8 Pointer to the value to update.
689 * @param u8New The new value to assigned to *pu8.
690 * @param u8Old The old value to *pu8 compare with.
691 *
692 * @remarks x86: Requires a 486 or later.
693 */
694#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
695DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, const uint8_t u8Old);
696#else
697DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, uint8_t u8Old)
698{
699 uint8_t u8Ret;
700 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
701 "setz %1\n\t"
702 : "=m" (*pu8),
703 "=qm" (u8Ret),
704 "=a" (u8Old)
705 : "q" (u8New),
706 "2" (u8Old),
707 "m" (*pu8));
708 return (bool)u8Ret;
709}
710#endif
711
712
713/**
714 * Atomically Compare and Exchange a signed 8-bit value, ordered.
715 *
716 * @returns true if xchg was done.
717 * @returns false if xchg wasn't done.
718 *
719 * @param pi8 Pointer to the value to update.
720 * @param i8New The new value to assigned to *pi8.
721 * @param i8Old The old value to *pi8 compare with.
722 *
723 * @remarks x86: Requires a 486 or later.
724 */
725DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t *pi8, const int8_t i8New, const int8_t i8Old)
726{
727 return ASMAtomicCmpXchgU8((volatile uint8_t *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old);
728}
729
730
731/**
732 * Atomically Compare and Exchange a bool value, ordered.
733 *
734 * @returns true if xchg was done.
735 * @returns false if xchg wasn't done.
736 *
737 * @param pf Pointer to the value to update.
738 * @param fNew The new value to assigned to *pf.
739 * @param fOld The old value to *pf compare with.
740 *
741 * @remarks x86: Requires a 486 or later.
742 */
743DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool *pf, const bool fNew, const bool fOld)
744{
745 return ASMAtomicCmpXchgU8((volatile uint8_t *)pf, (const uint8_t)fNew, (const uint8_t)fOld);
746}
747
748
749/**
750 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
751 *
752 * @returns true if xchg was done.
753 * @returns false if xchg wasn't done.
754 *
755 * @param pu32 Pointer to the value to update.
756 * @param u32New The new value to assigned to *pu32.
757 * @param u32Old The old value to *pu32 compare with.
758 *
759 * @remarks x86: Requires a 486 or later.
760 */
761#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
762DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
763#else
764DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
765{
766# if RT_INLINE_ASM_GNU_STYLE
767 uint8_t u8Ret;
768 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
769 "setz %1\n\t"
770 : "=m" (*pu32),
771 "=qm" (u8Ret),
772 "=a" (u32Old)
773 : "r" (u32New),
774 "2" (u32Old),
775 "m" (*pu32));
776 return (bool)u8Ret;
777
778# elif RT_INLINE_ASM_USES_INTRIN
779 return (uint32_t)_InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
780
781# else
782 uint32_t u32Ret;
783 __asm
784 {
785# ifdef RT_ARCH_AMD64
786 mov rdx, [pu32]
787# else
788 mov edx, [pu32]
789# endif
790 mov eax, [u32Old]
791 mov ecx, [u32New]
792# ifdef RT_ARCH_AMD64
793 lock cmpxchg [rdx], ecx
794# else
795 lock cmpxchg [edx], ecx
796# endif
797 setz al
798 movzx eax, al
799 mov [u32Ret], eax
800 }
801 return !!u32Ret;
802# endif
803}
804#endif
805
806
807/**
808 * Atomically Compare and Exchange a signed 32-bit value, ordered.
809 *
810 * @returns true if xchg was done.
811 * @returns false if xchg wasn't done.
812 *
813 * @param pi32 Pointer to the value to update.
814 * @param i32New The new value to assigned to *pi32.
815 * @param i32Old The old value to *pi32 compare with.
816 *
817 * @remarks x86: Requires a 486 or later.
818 */
819DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
820{
821 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
822}
823
824
825/**
826 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
827 *
828 * @returns true if xchg was done.
829 * @returns false if xchg wasn't done.
830 *
831 * @param pu64 Pointer to the 64-bit variable to update.
832 * @param u64New The 64-bit value to assign to *pu64.
833 * @param u64Old The value to compare with.
834 *
835 * @remarks x86: Requires a Pentium or later.
836 */
837#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
838 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
839DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
840#else
841DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, uint64_t u64New, uint64_t u64Old)
842{
843# if RT_INLINE_ASM_USES_INTRIN
844 return (uint64_t)_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
845
846# elif defined(RT_ARCH_AMD64)
847# if RT_INLINE_ASM_GNU_STYLE
848 uint8_t u8Ret;
849 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
850 "setz %1\n\t"
851 : "=m" (*pu64),
852 "=qm" (u8Ret),
853 "=a" (u64Old)
854 : "r" (u64New),
855 "2" (u64Old),
856 "m" (*pu64));
857 return (bool)u8Ret;
858# else
859 bool fRet;
860 __asm
861 {
862 mov rdx, [pu32]
863 mov rax, [u64Old]
864 mov rcx, [u64New]
865 lock cmpxchg [rdx], rcx
866 setz al
867 mov [fRet], al
868 }
869 return fRet;
870# endif
871# else /* !RT_ARCH_AMD64 */
872 uint32_t u32Ret;
873# if RT_INLINE_ASM_GNU_STYLE
874# if defined(PIC) || defined(__PIC__)
875 uint32_t u32EBX = (uint32_t)u64New;
876 uint32_t u32Spill;
877 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
878 "lock; cmpxchg8b (%6)\n\t"
879 "setz %%al\n\t"
880 "movl %4, %%ebx\n\t"
881 "movzbl %%al, %%eax\n\t"
882 : "=a" (u32Ret),
883 "=d" (u32Spill),
884# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
885 "+m" (*pu64)
886# else
887 "=m" (*pu64)
888# endif
889 : "A" (u64Old),
890 "m" ( u32EBX ),
891 "c" ( (uint32_t)(u64New >> 32) ),
892 "S" (pu64));
893# else /* !PIC */
894 uint32_t u32Spill;
895 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
896 "setz %%al\n\t"
897 "movzbl %%al, %%eax\n\t"
898 : "=a" (u32Ret),
899 "=d" (u32Spill),
900 "+m" (*pu64)
901 : "A" (u64Old),
902 "b" ( (uint32_t)u64New ),
903 "c" ( (uint32_t)(u64New >> 32) ));
904# endif
905 return (bool)u32Ret;
906# else
907 __asm
908 {
909 mov ebx, dword ptr [u64New]
910 mov ecx, dword ptr [u64New + 4]
911 mov edi, [pu64]
912 mov eax, dword ptr [u64Old]
913 mov edx, dword ptr [u64Old + 4]
914 lock cmpxchg8b [edi]
915 setz al
916 movzx eax, al
917 mov dword ptr [u32Ret], eax
918 }
919 return !!u32Ret;
920# endif
921# endif /* !RT_ARCH_AMD64 */
922}
923#endif
924
925
926/**
927 * Atomically Compare and exchange a signed 64-bit value, ordered.
928 *
929 * @returns true if xchg was done.
930 * @returns false if xchg wasn't done.
931 *
932 * @param pi64 Pointer to the 64-bit variable to update.
933 * @param i64 The 64-bit value to assign to *pu64.
934 * @param i64Old The value to compare with.
935 *
936 * @remarks x86: Requires a Pentium or later.
937 */
938DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
939{
940 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
941}
942
943
944/**
945 * Atomically Compare and Exchange a pointer value, ordered.
946 *
947 * @returns true if xchg was done.
948 * @returns false if xchg wasn't done.
949 *
950 * @param ppv Pointer to the value to update.
951 * @param pvNew The new value to assigned to *ppv.
952 * @param pvOld The old value to *ppv compare with.
953 *
954 * @remarks x86: Requires a 486 or later.
955 */
956DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld)
957{
958#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
959 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
960#elif ARCH_BITS == 64
961 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
962#else
963# error "ARCH_BITS is bogus"
964#endif
965}
966
967
968/**
969 * Atomically Compare and Exchange a pointer value, ordered.
970 *
971 * @returns true if xchg was done.
972 * @returns false if xchg wasn't done.
973 *
974 * @param ppv Pointer to the value to update.
975 * @param pvNew The new value to assigned to *ppv.
976 * @param pvOld The old value to *ppv compare with.
977 *
978 * @remarks This is relatively type safe on GCC platforms.
979 * @remarks x86: Requires a 486 or later.
980 */
981#ifdef __GNUC__
982# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
983 __extension__ \
984 ({\
985 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
986 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
987 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
988 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
989 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
990 fMacroRet; \
991 })
992#else
993# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
994 ASMAtomicCmpXchgPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld))
995#endif
996
997
998/** @def ASMAtomicCmpXchgHandle
999 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1000 *
1001 * @param ph Pointer to the value to update.
1002 * @param hNew The new value to assigned to *pu.
1003 * @param hOld The old value to *pu compare with.
1004 * @param fRc Where to store the result.
1005 *
1006 * @remarks This doesn't currently work for all handles (like RTFILE).
1007 * @remarks x86: Requires a 486 or later.
1008 */
1009#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1010# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1011 do { \
1012 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1013 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1014 } while (0)
1015#elif HC_ARCH_BITS == 64
1016# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1017 do { \
1018 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1019 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1020 } while (0)
1021#else
1022# error HC_ARCH_BITS
1023#endif
1024
1025
1026/** @def ASMAtomicCmpXchgSize
1027 * Atomically Compare and Exchange a value which size might differ
1028 * between platforms or compilers, ordered.
1029 *
1030 * @param pu Pointer to the value to update.
1031 * @param uNew The new value to assigned to *pu.
1032 * @param uOld The old value to *pu compare with.
1033 * @param fRc Where to store the result.
1034 *
1035 * @remarks x86: Requires a 486 or later.
1036 */
1037#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1038 do { \
1039 switch (sizeof(*(pu))) { \
1040 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1041 break; \
1042 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1043 break; \
1044 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1045 (fRc) = false; \
1046 break; \
1047 } \
1048 } while (0)
1049
1050
1051/**
1052 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1053 * passes back old value, ordered.
1054 *
1055 * @returns true if xchg was done.
1056 * @returns false if xchg wasn't done.
1057 *
1058 * @param pu32 Pointer to the value to update.
1059 * @param u32New The new value to assigned to *pu32.
1060 * @param u32Old The old value to *pu32 compare with.
1061 * @param pu32Old Pointer store the old value at.
1062 *
1063 * @remarks x86: Requires a 486 or later.
1064 */
1065#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1066DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
1067#else
1068DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
1069{
1070# if RT_INLINE_ASM_GNU_STYLE
1071 uint8_t u8Ret;
1072 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1073 "setz %1\n\t"
1074 : "=m" (*pu32),
1075 "=qm" (u8Ret),
1076 "=a" (*pu32Old)
1077 : "r" (u32New),
1078 "a" (u32Old),
1079 "m" (*pu32));
1080 return (bool)u8Ret;
1081
1082# elif RT_INLINE_ASM_USES_INTRIN
1083 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
1084
1085# else
1086 uint32_t u32Ret;
1087 __asm
1088 {
1089# ifdef RT_ARCH_AMD64
1090 mov rdx, [pu32]
1091# else
1092 mov edx, [pu32]
1093# endif
1094 mov eax, [u32Old]
1095 mov ecx, [u32New]
1096# ifdef RT_ARCH_AMD64
1097 lock cmpxchg [rdx], ecx
1098 mov rdx, [pu32Old]
1099 mov [rdx], eax
1100# else
1101 lock cmpxchg [edx], ecx
1102 mov edx, [pu32Old]
1103 mov [edx], eax
1104# endif
1105 setz al
1106 movzx eax, al
1107 mov [u32Ret], eax
1108 }
1109 return !!u32Ret;
1110# endif
1111}
1112#endif
1113
1114
1115/**
1116 * Atomically Compare and Exchange a signed 32-bit value, additionally
1117 * passes back old value, ordered.
1118 *
1119 * @returns true if xchg was done.
1120 * @returns false if xchg wasn't done.
1121 *
1122 * @param pi32 Pointer to the value to update.
1123 * @param i32New The new value to assigned to *pi32.
1124 * @param i32Old The old value to *pi32 compare with.
1125 * @param pi32Old Pointer store the old value at.
1126 *
1127 * @remarks x86: Requires a 486 or later.
1128 */
1129DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
1130{
1131 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
1132}
1133
1134
1135/**
1136 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1137 * passing back old value, ordered.
1138 *
1139 * @returns true if xchg was done.
1140 * @returns false if xchg wasn't done.
1141 *
1142 * @param pu64 Pointer to the 64-bit variable to update.
1143 * @param u64New The 64-bit value to assign to *pu64.
1144 * @param u64Old The value to compare with.
1145 * @param pu64Old Pointer store the old value at.
1146 *
1147 * @remarks x86: Requires a Pentium or later.
1148 */
1149#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1150 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1151DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
1152#else
1153DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
1154{
1155# if RT_INLINE_ASM_USES_INTRIN
1156 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
1157
1158# elif defined(RT_ARCH_AMD64)
1159# if RT_INLINE_ASM_GNU_STYLE
1160 uint8_t u8Ret;
1161 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1162 "setz %1\n\t"
1163 : "=m" (*pu64),
1164 "=qm" (u8Ret),
1165 "=a" (*pu64Old)
1166 : "r" (u64New),
1167 "a" (u64Old),
1168 "m" (*pu64));
1169 return (bool)u8Ret;
1170# else
1171 bool fRet;
1172 __asm
1173 {
1174 mov rdx, [pu32]
1175 mov rax, [u64Old]
1176 mov rcx, [u64New]
1177 lock cmpxchg [rdx], rcx
1178 mov rdx, [pu64Old]
1179 mov [rdx], rax
1180 setz al
1181 mov [fRet], al
1182 }
1183 return fRet;
1184# endif
1185# else /* !RT_ARCH_AMD64 */
1186# if RT_INLINE_ASM_GNU_STYLE
1187 uint64_t u64Ret;
1188# if defined(PIC) || defined(__PIC__)
1189 /* NB: this code uses a memory clobber description, because the clean
1190 * solution with an output value for *pu64 makes gcc run out of registers.
1191 * This will cause suboptimal code, and anyone with a better solution is
1192 * welcome to improve this. */
1193 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1194 "lock; cmpxchg8b %3\n\t"
1195 "xchgl %%ebx, %1\n\t"
1196 : "=A" (u64Ret)
1197 : "DS" ((uint32_t)u64New),
1198 "c" ((uint32_t)(u64New >> 32)),
1199 "m" (*pu64),
1200 "0" (u64Old)
1201 : "memory" );
1202# else /* !PIC */
1203 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1204 : "=A" (u64Ret),
1205 "=m" (*pu64)
1206 : "b" ((uint32_t)u64New),
1207 "c" ((uint32_t)(u64New >> 32)),
1208 "m" (*pu64),
1209 "0" (u64Old));
1210# endif
1211 *pu64Old = u64Ret;
1212 return u64Ret == u64Old;
1213# else
1214 uint32_t u32Ret;
1215 __asm
1216 {
1217 mov ebx, dword ptr [u64New]
1218 mov ecx, dword ptr [u64New + 4]
1219 mov edi, [pu64]
1220 mov eax, dword ptr [u64Old]
1221 mov edx, dword ptr [u64Old + 4]
1222 lock cmpxchg8b [edi]
1223 mov ebx, [pu64Old]
1224 mov [ebx], eax
1225 setz al
1226 movzx eax, al
1227 add ebx, 4
1228 mov [ebx], edx
1229 mov dword ptr [u32Ret], eax
1230 }
1231 return !!u32Ret;
1232# endif
1233# endif /* !RT_ARCH_AMD64 */
1234}
1235#endif
1236
1237
1238/**
1239 * Atomically Compare and exchange a signed 64-bit value, additionally
1240 * passing back old value, ordered.
1241 *
1242 * @returns true if xchg was done.
1243 * @returns false if xchg wasn't done.
1244 *
1245 * @param pi64 Pointer to the 64-bit variable to update.
1246 * @param i64 The 64-bit value to assign to *pu64.
1247 * @param i64Old The value to compare with.
1248 * @param pi64Old Pointer store the old value at.
1249 *
1250 * @remarks x86: Requires a Pentium or later.
1251 */
1252DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
1253{
1254 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
1255}
1256
1257/** @def ASMAtomicCmpXchgExHandle
1258 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1259 *
1260 * @param ph Pointer to the value to update.
1261 * @param hNew The new value to assigned to *pu.
1262 * @param hOld The old value to *pu compare with.
1263 * @param fRc Where to store the result.
1264 * @param phOldVal Pointer to where to store the old value.
1265 *
1266 * @remarks This doesn't currently work for all handles (like RTFILE).
1267 */
1268#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1269# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1270 do { \
1271 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1272 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1273 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
1274 } while (0)
1275#elif HC_ARCH_BITS == 64
1276# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1277 do { \
1278 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1279 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1280 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
1281 } while (0)
1282#else
1283# error HC_ARCH_BITS
1284#endif
1285
1286
1287/** @def ASMAtomicCmpXchgExSize
1288 * Atomically Compare and Exchange a value which size might differ
1289 * between platforms or compilers. Additionally passes back old value.
1290 *
1291 * @param pu Pointer to the value to update.
1292 * @param uNew The new value to assigned to *pu.
1293 * @param uOld The old value to *pu compare with.
1294 * @param fRc Where to store the result.
1295 * @param puOldVal Pointer to where to store the old value.
1296 *
1297 * @remarks x86: Requires a 486 or later.
1298 */
1299#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1300 do { \
1301 switch (sizeof(*(pu))) { \
1302 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
1303 break; \
1304 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
1305 break; \
1306 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1307 (fRc) = false; \
1308 (uOldVal) = 0; \
1309 break; \
1310 } \
1311 } while (0)
1312
1313
1314/**
1315 * Atomically Compare and Exchange a pointer value, additionally
1316 * passing back old value, ordered.
1317 *
1318 * @returns true if xchg was done.
1319 * @returns false if xchg wasn't done.
1320 *
1321 * @param ppv Pointer to the value to update.
1322 * @param pvNew The new value to assigned to *ppv.
1323 * @param pvOld The old value to *ppv compare with.
1324 * @param ppvOld Pointer store the old value at.
1325 *
1326 * @remarks x86: Requires a 486 or later.
1327 */
1328DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
1329{
1330#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1331 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
1332#elif ARCH_BITS == 64
1333 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
1334#else
1335# error "ARCH_BITS is bogus"
1336#endif
1337}
1338
1339
1340/**
1341 * Atomically Compare and Exchange a pointer value, additionally
1342 * passing back old value, ordered.
1343 *
1344 * @returns true if xchg was done.
1345 * @returns false if xchg wasn't done.
1346 *
1347 * @param ppv Pointer to the value to update.
1348 * @param pvNew The new value to assigned to *ppv.
1349 * @param pvOld The old value to *ppv compare with.
1350 * @param ppvOld Pointer store the old value at.
1351 *
1352 * @remarks This is relatively type safe on GCC platforms.
1353 * @remarks x86: Requires a 486 or later.
1354 */
1355#ifdef __GNUC__
1356# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1357 __extension__ \
1358 ({\
1359 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1360 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1361 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1362 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1363 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1364 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1365 (void **)ppvOldTypeChecked); \
1366 fMacroRet; \
1367 })
1368#else
1369# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1370 ASMAtomicCmpXchgExPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld), (void **)(ppvOld))
1371#endif
1372
1373
1374/**
1375 * Virtualization unfriendly serializing instruction, always exits.
1376 */
1377#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1378DECLASM(void) ASMSerializeInstructionCpuId(void);
1379#else
1380DECLINLINE(void) ASMSerializeInstructionCpuId(void)
1381{
1382# if RT_INLINE_ASM_GNU_STYLE
1383 RTCCUINTREG xAX = 0;
1384# ifdef RT_ARCH_AMD64
1385 __asm__ __volatile__ ("cpuid"
1386 : "=a" (xAX)
1387 : "0" (xAX)
1388 : "rbx", "rcx", "rdx", "memory");
1389# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1390 __asm__ __volatile__ ("push %%ebx\n\t"
1391 "cpuid\n\t"
1392 "pop %%ebx\n\t"
1393 : "=a" (xAX)
1394 : "0" (xAX)
1395 : "ecx", "edx", "memory");
1396# else
1397 __asm__ __volatile__ ("cpuid"
1398 : "=a" (xAX)
1399 : "0" (xAX)
1400 : "ebx", "ecx", "edx", "memory");
1401# endif
1402
1403# elif RT_INLINE_ASM_USES_INTRIN
1404 int aInfo[4];
1405 _ReadWriteBarrier();
1406 __cpuid(aInfo, 0);
1407
1408# else
1409 __asm
1410 {
1411 push ebx
1412 xor eax, eax
1413 cpuid
1414 pop ebx
1415 }
1416# endif
1417}
1418#endif
1419
1420/**
1421 * Virtualization friendly serializing instruction, though more expensive.
1422 */
1423#if RT_INLINE_ASM_EXTERNAL || defined(_MSC_VER) /** @todo fix 32-bit inline MSC asm */
1424DECLASM(void) ASMSerializeInstructionIRet(void);
1425#else
1426DECLINLINE(void) ASMSerializeInstructionIRet(void)
1427{
1428# if RT_INLINE_ASM_GNU_STYLE
1429# ifdef RT_ARCH_AMD64
1430 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
1431 "subq $128, %%rsp\n\t" /*redzone*/
1432 "mov %%ss, %%eax\n\t"
1433 "pushq %%rax\n\t"
1434 "pushq %%r10\n\t"
1435 "pushfq\n\t"
1436 "movl %%cs, %%eax\n\t"
1437 "pushq %%rax\n\t"
1438 "leaq 1f(%%rip), %%rax\n\t"
1439 "pushq %%rax\n\t"
1440 "iretq\n\t"
1441 "1:\n\t"
1442 ::: "rax", "r10", "memory");
1443# else
1444 __asm__ __volatile__ ("pushfl\n\t"
1445 "pushl %%cs\n\t"
1446 "pushl $1f\n\t"
1447 "iretl\n\t"
1448 "1:\n\t"
1449 ::: "memory");
1450# endif
1451
1452# else
1453 __asm
1454 {
1455 pushfd
1456 push cs
1457 push la_ret
1458 retd
1459 la_ret:
1460 }
1461# endif
1462}
1463#endif
1464
1465/**
1466 * Virtualization friendlier serializing instruction, may still cause exits.
1467 */
1468#if RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < 15
1469DECLASM(void) ASMSerializeInstructionRdTscp(void);
1470#else
1471DECLINLINE(void) ASMSerializeInstructionRdTscp(void)
1472{
1473# if RT_INLINE_ASM_GNU_STYLE
1474 /* rdtscp is not supported by ancient linux build VM of course :-( */
1475# ifdef RT_ARCH_AMD64
1476 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
1477 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
1478# else
1479 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
1480 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
1481# endif
1482# else
1483# if RT_INLINE_ASM_USES_INTRIN >= 15
1484 uint32_t uIgnore;
1485 _ReadWriteBarrier();
1486 (void)__rdtscp(&uIgnore);
1487 (void)uIgnore;
1488# else
1489 __asm
1490 {
1491 rdtscp
1492 }
1493# endif
1494# endif
1495}
1496#endif
1497
1498
1499/**
1500 * Serialize Instruction.
1501 */
1502#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
1503# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
1504#else
1505# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
1506#endif
1507
1508
1509/**
1510 * Memory fence, waits for any pending writes and reads to complete.
1511 */
1512DECLINLINE(void) ASMMemoryFence(void)
1513{
1514 /** @todo use mfence? check if all cpus we care for support it. */
1515 uint32_t volatile u32;
1516 ASMAtomicXchgU32(&u32, 0);
1517}
1518
1519
1520/**
1521 * Write fence, waits for any pending writes to complete.
1522 */
1523DECLINLINE(void) ASMWriteFence(void)
1524{
1525 /** @todo use sfence? check if all cpus we care for support it. */
1526 ASMMemoryFence();
1527}
1528
1529
1530/**
1531 * Read fence, waits for any pending reads to complete.
1532 */
1533DECLINLINE(void) ASMReadFence(void)
1534{
1535 /** @todo use lfence? check if all cpus we care for support it. */
1536 ASMMemoryFence();
1537}
1538
1539
1540/**
1541 * Atomically reads an unsigned 8-bit value, ordered.
1542 *
1543 * @returns Current *pu8 value
1544 * @param pu8 Pointer to the 8-bit variable to read.
1545 */
1546DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
1547{
1548 ASMMemoryFence();
1549 return *pu8; /* byte reads are atomic on x86 */
1550}
1551
1552
1553/**
1554 * Atomically reads an unsigned 8-bit value, unordered.
1555 *
1556 * @returns Current *pu8 value
1557 * @param pu8 Pointer to the 8-bit variable to read.
1558 */
1559DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
1560{
1561 return *pu8; /* byte reads are atomic on x86 */
1562}
1563
1564
1565/**
1566 * Atomically reads a signed 8-bit value, ordered.
1567 *
1568 * @returns Current *pi8 value
1569 * @param pi8 Pointer to the 8-bit variable to read.
1570 */
1571DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
1572{
1573 ASMMemoryFence();
1574 return *pi8; /* byte reads are atomic on x86 */
1575}
1576
1577
1578/**
1579 * Atomically reads a signed 8-bit value, unordered.
1580 *
1581 * @returns Current *pi8 value
1582 * @param pi8 Pointer to the 8-bit variable to read.
1583 */
1584DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
1585{
1586 return *pi8; /* byte reads are atomic on x86 */
1587}
1588
1589
1590/**
1591 * Atomically reads an unsigned 16-bit value, ordered.
1592 *
1593 * @returns Current *pu16 value
1594 * @param pu16 Pointer to the 16-bit variable to read.
1595 */
1596DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
1597{
1598 ASMMemoryFence();
1599 Assert(!((uintptr_t)pu16 & 1));
1600 return *pu16;
1601}
1602
1603
1604/**
1605 * Atomically reads an unsigned 16-bit value, unordered.
1606 *
1607 * @returns Current *pu16 value
1608 * @param pu16 Pointer to the 16-bit variable to read.
1609 */
1610DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
1611{
1612 Assert(!((uintptr_t)pu16 & 1));
1613 return *pu16;
1614}
1615
1616
1617/**
1618 * Atomically reads a signed 16-bit value, ordered.
1619 *
1620 * @returns Current *pi16 value
1621 * @param pi16 Pointer to the 16-bit variable to read.
1622 */
1623DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
1624{
1625 ASMMemoryFence();
1626 Assert(!((uintptr_t)pi16 & 1));
1627 return *pi16;
1628}
1629
1630
1631/**
1632 * Atomically reads a signed 16-bit value, unordered.
1633 *
1634 * @returns Current *pi16 value
1635 * @param pi16 Pointer to the 16-bit variable to read.
1636 */
1637DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
1638{
1639 Assert(!((uintptr_t)pi16 & 1));
1640 return *pi16;
1641}
1642
1643
1644/**
1645 * Atomically reads an unsigned 32-bit value, ordered.
1646 *
1647 * @returns Current *pu32 value
1648 * @param pu32 Pointer to the 32-bit variable to read.
1649 */
1650DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
1651{
1652 ASMMemoryFence();
1653 Assert(!((uintptr_t)pu32 & 3));
1654 return *pu32;
1655}
1656
1657
1658/**
1659 * Atomically reads an unsigned 32-bit value, unordered.
1660 *
1661 * @returns Current *pu32 value
1662 * @param pu32 Pointer to the 32-bit variable to read.
1663 */
1664DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
1665{
1666 Assert(!((uintptr_t)pu32 & 3));
1667 return *pu32;
1668}
1669
1670
1671/**
1672 * Atomically reads a signed 32-bit value, ordered.
1673 *
1674 * @returns Current *pi32 value
1675 * @param pi32 Pointer to the 32-bit variable to read.
1676 */
1677DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
1678{
1679 ASMMemoryFence();
1680 Assert(!((uintptr_t)pi32 & 3));
1681 return *pi32;
1682}
1683
1684
1685/**
1686 * Atomically reads a signed 32-bit value, unordered.
1687 *
1688 * @returns Current *pi32 value
1689 * @param pi32 Pointer to the 32-bit variable to read.
1690 */
1691DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
1692{
1693 Assert(!((uintptr_t)pi32 & 3));
1694 return *pi32;
1695}
1696
1697
1698/**
1699 * Atomically reads an unsigned 64-bit value, ordered.
1700 *
1701 * @returns Current *pu64 value
1702 * @param pu64 Pointer to the 64-bit variable to read.
1703 * The memory pointed to must be writable.
1704 *
1705 * @remarks This may fault if the memory is read-only!
1706 * @remarks x86: Requires a Pentium or later.
1707 */
1708#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1709 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1710DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
1711#else
1712DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
1713{
1714 uint64_t u64;
1715# ifdef RT_ARCH_AMD64
1716 Assert(!((uintptr_t)pu64 & 7));
1717/*# if RT_INLINE_ASM_GNU_STYLE
1718 __asm__ __volatile__( "mfence\n\t"
1719 "movq %1, %0\n\t"
1720 : "=r" (u64)
1721 : "m" (*pu64));
1722# else
1723 __asm
1724 {
1725 mfence
1726 mov rdx, [pu64]
1727 mov rax, [rdx]
1728 mov [u64], rax
1729 }
1730# endif*/
1731 ASMMemoryFence();
1732 u64 = *pu64;
1733# else /* !RT_ARCH_AMD64 */
1734# if RT_INLINE_ASM_GNU_STYLE
1735# if defined(PIC) || defined(__PIC__)
1736 uint32_t u32EBX = 0;
1737 Assert(!((uintptr_t)pu64 & 7));
1738 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1739 "lock; cmpxchg8b (%5)\n\t"
1740 "movl %3, %%ebx\n\t"
1741 : "=A" (u64),
1742# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1743 "+m" (*pu64)
1744# else
1745 "=m" (*pu64)
1746# endif
1747 : "0" (0ULL),
1748 "m" (u32EBX),
1749 "c" (0),
1750 "S" (pu64));
1751# else /* !PIC */
1752 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1753 : "=A" (u64),
1754 "+m" (*pu64)
1755 : "0" (0ULL),
1756 "b" (0),
1757 "c" (0));
1758# endif
1759# else
1760 Assert(!((uintptr_t)pu64 & 7));
1761 __asm
1762 {
1763 xor eax, eax
1764 xor edx, edx
1765 mov edi, pu64
1766 xor ecx, ecx
1767 xor ebx, ebx
1768 lock cmpxchg8b [edi]
1769 mov dword ptr [u64], eax
1770 mov dword ptr [u64 + 4], edx
1771 }
1772# endif
1773# endif /* !RT_ARCH_AMD64 */
1774 return u64;
1775}
1776#endif
1777
1778
1779/**
1780 * Atomically reads an unsigned 64-bit value, unordered.
1781 *
1782 * @returns Current *pu64 value
1783 * @param pu64 Pointer to the 64-bit variable to read.
1784 * The memory pointed to must be writable.
1785 *
1786 * @remarks This may fault if the memory is read-only!
1787 * @remarks x86: Requires a Pentium or later.
1788 */
1789#if !defined(RT_ARCH_AMD64) \
1790 && ( (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1791 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
1792DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
1793#else
1794DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
1795{
1796 uint64_t u64;
1797# ifdef RT_ARCH_AMD64
1798 Assert(!((uintptr_t)pu64 & 7));
1799/*# if RT_INLINE_ASM_GNU_STYLE
1800 Assert(!((uintptr_t)pu64 & 7));
1801 __asm__ __volatile__("movq %1, %0\n\t"
1802 : "=r" (u64)
1803 : "m" (*pu64));
1804# else
1805 __asm
1806 {
1807 mov rdx, [pu64]
1808 mov rax, [rdx]
1809 mov [u64], rax
1810 }
1811# endif */
1812 u64 = *pu64;
1813# else /* !RT_ARCH_AMD64 */
1814# if RT_INLINE_ASM_GNU_STYLE
1815# if defined(PIC) || defined(__PIC__)
1816 uint32_t u32EBX = 0;
1817 uint32_t u32Spill;
1818 Assert(!((uintptr_t)pu64 & 7));
1819 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1820 "xor %%ecx,%%ecx\n\t"
1821 "xor %%edx,%%edx\n\t"
1822 "xchgl %%ebx, %3\n\t"
1823 "lock; cmpxchg8b (%4)\n\t"
1824 "movl %3, %%ebx\n\t"
1825 : "=A" (u64),
1826# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1827 "+m" (*pu64),
1828# else
1829 "=m" (*pu64),
1830# endif
1831 "=c" (u32Spill)
1832 : "m" (u32EBX),
1833 "S" (pu64));
1834# else /* !PIC */
1835 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1836 : "=A" (u64),
1837 "+m" (*pu64)
1838 : "0" (0ULL),
1839 "b" (0),
1840 "c" (0));
1841# endif
1842# else
1843 Assert(!((uintptr_t)pu64 & 7));
1844 __asm
1845 {
1846 xor eax, eax
1847 xor edx, edx
1848 mov edi, pu64
1849 xor ecx, ecx
1850 xor ebx, ebx
1851 lock cmpxchg8b [edi]
1852 mov dword ptr [u64], eax
1853 mov dword ptr [u64 + 4], edx
1854 }
1855# endif
1856# endif /* !RT_ARCH_AMD64 */
1857 return u64;
1858}
1859#endif
1860
1861
1862/**
1863 * Atomically reads a signed 64-bit value, ordered.
1864 *
1865 * @returns Current *pi64 value
1866 * @param pi64 Pointer to the 64-bit variable to read.
1867 * The memory pointed to must be writable.
1868 *
1869 * @remarks This may fault if the memory is read-only!
1870 * @remarks x86: Requires a Pentium or later.
1871 */
1872DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
1873{
1874 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
1875}
1876
1877
1878/**
1879 * Atomically reads a signed 64-bit value, unordered.
1880 *
1881 * @returns Current *pi64 value
1882 * @param pi64 Pointer to the 64-bit variable to read.
1883 * The memory pointed to must be writable.
1884 *
1885 * @remarks This will fault if the memory is read-only!
1886 * @remarks x86: Requires a Pentium or later.
1887 */
1888DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
1889{
1890 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
1891}
1892
1893
1894/**
1895 * Atomically reads a size_t value, ordered.
1896 *
1897 * @returns Current *pcb value
1898 * @param pcb Pointer to the size_t variable to read.
1899 */
1900DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile *pcb)
1901{
1902#if ARCH_BITS == 64
1903 return ASMAtomicReadU64((uint64_t volatile *)pcb);
1904#elif ARCH_BITS == 32
1905 return ASMAtomicReadU32((uint32_t volatile *)pcb);
1906#elif ARCH_BITS == 16
1907 AssertCompileSize(size_t, 2);
1908 return ASMAtomicReadU16((uint16_t volatile *)pcb);
1909#else
1910# error "Unsupported ARCH_BITS value"
1911#endif
1912}
1913
1914
1915/**
1916 * Atomically reads a size_t value, unordered.
1917 *
1918 * @returns Current *pcb value
1919 * @param pcb Pointer to the size_t variable to read.
1920 */
1921DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile *pcb)
1922{
1923#if ARCH_BITS == 64 || (ARCH_BITS == 16 && RT_FAR_DATA)
1924 return ASMAtomicUoReadU64((uint64_t volatile *)pcb);
1925#elif ARCH_BITS == 32
1926 return ASMAtomicUoReadU32((uint32_t volatile *)pcb);
1927#elif ARCH_BITS == 16
1928 AssertCompileSize(size_t, 2);
1929 return ASMAtomicUoReadU16((uint16_t volatile *)pcb);
1930#else
1931# error "Unsupported ARCH_BITS value"
1932#endif
1933}
1934
1935
1936/**
1937 * Atomically reads a pointer value, ordered.
1938 *
1939 * @returns Current *pv value
1940 * @param ppv Pointer to the pointer variable to read.
1941 *
1942 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
1943 * requires less typing (no casts).
1944 */
1945DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
1946{
1947#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1948 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
1949#elif ARCH_BITS == 64
1950 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
1951#else
1952# error "ARCH_BITS is bogus"
1953#endif
1954}
1955
1956/**
1957 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
1958 *
1959 * @returns Current *pv value
1960 * @param ppv Pointer to the pointer variable to read.
1961 * @param Type The type of *ppv, sans volatile.
1962 */
1963#ifdef __GNUC__
1964# define ASMAtomicReadPtrT(ppv, Type) \
1965 __extension__ \
1966 ({\
1967 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
1968 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
1969 pvTypeChecked; \
1970 })
1971#else
1972# define ASMAtomicReadPtrT(ppv, Type) \
1973 (Type)ASMAtomicReadPtr((void * volatile *)(ppv))
1974#endif
1975
1976
1977/**
1978 * Atomically reads a pointer value, unordered.
1979 *
1980 * @returns Current *pv value
1981 * @param ppv Pointer to the pointer variable to read.
1982 *
1983 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
1984 * requires less typing (no casts).
1985 */
1986DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
1987{
1988#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1989 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
1990#elif ARCH_BITS == 64
1991 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
1992#else
1993# error "ARCH_BITS is bogus"
1994#endif
1995}
1996
1997
1998/**
1999 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
2000 *
2001 * @returns Current *pv value
2002 * @param ppv Pointer to the pointer variable to read.
2003 * @param Type The type of *ppv, sans volatile.
2004 */
2005#ifdef __GNUC__
2006# define ASMAtomicUoReadPtrT(ppv, Type) \
2007 __extension__ \
2008 ({\
2009 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2010 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
2011 pvTypeChecked; \
2012 })
2013#else
2014# define ASMAtomicUoReadPtrT(ppv, Type) \
2015 (Type)ASMAtomicUoReadPtr((void * volatile *)(ppv))
2016#endif
2017
2018
2019/**
2020 * Atomically reads a boolean value, ordered.
2021 *
2022 * @returns Current *pf value
2023 * @param pf Pointer to the boolean variable to read.
2024 */
2025DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
2026{
2027 ASMMemoryFence();
2028 return *pf; /* byte reads are atomic on x86 */
2029}
2030
2031
2032/**
2033 * Atomically reads a boolean value, unordered.
2034 *
2035 * @returns Current *pf value
2036 * @param pf Pointer to the boolean variable to read.
2037 */
2038DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
2039{
2040 return *pf; /* byte reads are atomic on x86 */
2041}
2042
2043
2044/**
2045 * Atomically read a typical IPRT handle value, ordered.
2046 *
2047 * @param ph Pointer to the handle variable to read.
2048 * @param phRes Where to store the result.
2049 *
2050 * @remarks This doesn't currently work for all handles (like RTFILE).
2051 */
2052#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2053# define ASMAtomicReadHandle(ph, phRes) \
2054 do { \
2055 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2056 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2057 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
2058 } while (0)
2059#elif HC_ARCH_BITS == 64
2060# define ASMAtomicReadHandle(ph, phRes) \
2061 do { \
2062 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2063 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2064 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
2065 } while (0)
2066#else
2067# error HC_ARCH_BITS
2068#endif
2069
2070
2071/**
2072 * Atomically read a typical IPRT handle value, unordered.
2073 *
2074 * @param ph Pointer to the handle variable to read.
2075 * @param phRes Where to store the result.
2076 *
2077 * @remarks This doesn't currently work for all handles (like RTFILE).
2078 */
2079#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2080# define ASMAtomicUoReadHandle(ph, phRes) \
2081 do { \
2082 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2083 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2084 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
2085 } while (0)
2086#elif HC_ARCH_BITS == 64
2087# define ASMAtomicUoReadHandle(ph, phRes) \
2088 do { \
2089 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2090 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2091 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
2092 } while (0)
2093#else
2094# error HC_ARCH_BITS
2095#endif
2096
2097
2098/**
2099 * Atomically read a value which size might differ
2100 * between platforms or compilers, ordered.
2101 *
2102 * @param pu Pointer to the variable to read.
2103 * @param puRes Where to store the result.
2104 */
2105#define ASMAtomicReadSize(pu, puRes) \
2106 do { \
2107 switch (sizeof(*(pu))) { \
2108 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
2109 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
2110 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
2111 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
2112 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2113 } \
2114 } while (0)
2115
2116
2117/**
2118 * Atomically read a value which size might differ
2119 * between platforms or compilers, unordered.
2120 *
2121 * @param pu Pointer to the variable to read.
2122 * @param puRes Where to store the result.
2123 */
2124#define ASMAtomicUoReadSize(pu, puRes) \
2125 do { \
2126 switch (sizeof(*(pu))) { \
2127 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
2128 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
2129 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
2130 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
2131 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2132 } \
2133 } while (0)
2134
2135
2136/**
2137 * Atomically writes an unsigned 8-bit value, ordered.
2138 *
2139 * @param pu8 Pointer to the 8-bit variable.
2140 * @param u8 The 8-bit value to assign to *pu8.
2141 */
2142DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
2143{
2144 ASMAtomicXchgU8(pu8, u8);
2145}
2146
2147
2148/**
2149 * Atomically writes an unsigned 8-bit value, unordered.
2150 *
2151 * @param pu8 Pointer to the 8-bit variable.
2152 * @param u8 The 8-bit value to assign to *pu8.
2153 */
2154DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
2155{
2156 *pu8 = u8; /* byte writes are atomic on x86 */
2157}
2158
2159
2160/**
2161 * Atomically writes a signed 8-bit value, ordered.
2162 *
2163 * @param pi8 Pointer to the 8-bit variable to read.
2164 * @param i8 The 8-bit value to assign to *pi8.
2165 */
2166DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
2167{
2168 ASMAtomicXchgS8(pi8, i8);
2169}
2170
2171
2172/**
2173 * Atomically writes a signed 8-bit value, unordered.
2174 *
2175 * @param pi8 Pointer to the 8-bit variable to write.
2176 * @param i8 The 8-bit value to assign to *pi8.
2177 */
2178DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
2179{
2180 *pi8 = i8; /* byte writes are atomic on x86 */
2181}
2182
2183
2184/**
2185 * Atomically writes an unsigned 16-bit value, ordered.
2186 *
2187 * @param pu16 Pointer to the 16-bit variable to write.
2188 * @param u16 The 16-bit value to assign to *pu16.
2189 */
2190DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
2191{
2192 ASMAtomicXchgU16(pu16, u16);
2193}
2194
2195
2196/**
2197 * Atomically writes an unsigned 16-bit value, unordered.
2198 *
2199 * @param pu16 Pointer to the 16-bit variable to write.
2200 * @param u16 The 16-bit value to assign to *pu16.
2201 */
2202DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
2203{
2204 Assert(!((uintptr_t)pu16 & 1));
2205 *pu16 = u16;
2206}
2207
2208
2209/**
2210 * Atomically writes a signed 16-bit value, ordered.
2211 *
2212 * @param pi16 Pointer to the 16-bit variable to write.
2213 * @param i16 The 16-bit value to assign to *pi16.
2214 */
2215DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
2216{
2217 ASMAtomicXchgS16(pi16, i16);
2218}
2219
2220
2221/**
2222 * Atomically writes a signed 16-bit value, unordered.
2223 *
2224 * @param pi16 Pointer to the 16-bit variable to write.
2225 * @param i16 The 16-bit value to assign to *pi16.
2226 */
2227DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
2228{
2229 Assert(!((uintptr_t)pi16 & 1));
2230 *pi16 = i16;
2231}
2232
2233
2234/**
2235 * Atomically writes an unsigned 32-bit value, ordered.
2236 *
2237 * @param pu32 Pointer to the 32-bit variable to write.
2238 * @param u32 The 32-bit value to assign to *pu32.
2239 */
2240DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
2241{
2242 ASMAtomicXchgU32(pu32, u32);
2243}
2244
2245
2246/**
2247 * Atomically writes an unsigned 32-bit value, unordered.
2248 *
2249 * @param pu32 Pointer to the 32-bit variable to write.
2250 * @param u32 The 32-bit value to assign to *pu32.
2251 */
2252DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
2253{
2254 Assert(!((uintptr_t)pu32 & 3));
2255 *pu32 = u32;
2256}
2257
2258
2259/**
2260 * Atomically writes a signed 32-bit value, ordered.
2261 *
2262 * @param pi32 Pointer to the 32-bit variable to write.
2263 * @param i32 The 32-bit value to assign to *pi32.
2264 */
2265DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
2266{
2267 ASMAtomicXchgS32(pi32, i32);
2268}
2269
2270
2271/**
2272 * Atomically writes a signed 32-bit value, unordered.
2273 *
2274 * @param pi32 Pointer to the 32-bit variable to write.
2275 * @param i32 The 32-bit value to assign to *pi32.
2276 */
2277DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
2278{
2279 Assert(!((uintptr_t)pi32 & 3));
2280 *pi32 = i32;
2281}
2282
2283
2284/**
2285 * Atomically writes an unsigned 64-bit value, ordered.
2286 *
2287 * @param pu64 Pointer to the 64-bit variable to write.
2288 * @param u64 The 64-bit value to assign to *pu64.
2289 */
2290DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
2291{
2292 ASMAtomicXchgU64(pu64, u64);
2293}
2294
2295
2296/**
2297 * Atomically writes an unsigned 64-bit value, unordered.
2298 *
2299 * @param pu64 Pointer to the 64-bit variable to write.
2300 * @param u64 The 64-bit value to assign to *pu64.
2301 */
2302DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
2303{
2304 Assert(!((uintptr_t)pu64 & 7));
2305#if ARCH_BITS == 64
2306 *pu64 = u64;
2307#else
2308 ASMAtomicXchgU64(pu64, u64);
2309#endif
2310}
2311
2312
2313/**
2314 * Atomically writes a signed 64-bit value, ordered.
2315 *
2316 * @param pi64 Pointer to the 64-bit variable to write.
2317 * @param i64 The 64-bit value to assign to *pi64.
2318 */
2319DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
2320{
2321 ASMAtomicXchgS64(pi64, i64);
2322}
2323
2324
2325/**
2326 * Atomically writes a signed 64-bit value, unordered.
2327 *
2328 * @param pi64 Pointer to the 64-bit variable to write.
2329 * @param i64 The 64-bit value to assign to *pi64.
2330 */
2331DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
2332{
2333 Assert(!((uintptr_t)pi64 & 7));
2334#if ARCH_BITS == 64
2335 *pi64 = i64;
2336#else
2337 ASMAtomicXchgS64(pi64, i64);
2338#endif
2339}
2340
2341
2342/**
2343 * Atomically writes a boolean value, unordered.
2344 *
2345 * @param pf Pointer to the boolean variable to write.
2346 * @param f The boolean value to assign to *pf.
2347 */
2348DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
2349{
2350 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
2351}
2352
2353
2354/**
2355 * Atomically writes a boolean value, unordered.
2356 *
2357 * @param pf Pointer to the boolean variable to write.
2358 * @param f The boolean value to assign to *pf.
2359 */
2360DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
2361{
2362 *pf = f; /* byte writes are atomic on x86 */
2363}
2364
2365
2366/**
2367 * Atomically writes a pointer value, ordered.
2368 *
2369 * @param ppv Pointer to the pointer variable to write.
2370 * @param pv The pointer value to assign to *ppv.
2371 */
2372DECLINLINE(void) ASMAtomicWritePtrVoid(void * volatile *ppv, const void *pv)
2373{
2374#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2375 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2376#elif ARCH_BITS == 64
2377 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2378#else
2379# error "ARCH_BITS is bogus"
2380#endif
2381}
2382
2383
2384/**
2385 * Atomically writes a pointer value, ordered.
2386 *
2387 * @param ppv Pointer to the pointer variable to write.
2388 * @param pv The pointer value to assign to *ppv. If NULL use
2389 * ASMAtomicWriteNullPtr or you'll land in trouble.
2390 *
2391 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2392 * NULL.
2393 */
2394#ifdef __GNUC__
2395# define ASMAtomicWritePtr(ppv, pv) \
2396 do \
2397 { \
2398 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2399 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2400 \
2401 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2402 AssertCompile(sizeof(pv) == sizeof(void *)); \
2403 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2404 \
2405 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), (void *)(pvTypeChecked)); \
2406 } while (0)
2407#else
2408# define ASMAtomicWritePtr(ppv, pv) \
2409 do \
2410 { \
2411 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2412 AssertCompile(sizeof(pv) == sizeof(void *)); \
2413 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2414 \
2415 ASMAtomicWritePtrVoid((void * volatile *)(ppv), (void *)(pv)); \
2416 } while (0)
2417#endif
2418
2419
2420/**
2421 * Atomically sets a pointer to NULL, ordered.
2422 *
2423 * @param ppv Pointer to the pointer variable that should be set to NULL.
2424 *
2425 * @remarks This is relatively type safe on GCC platforms.
2426 */
2427#ifdef __GNUC__
2428# define ASMAtomicWriteNullPtr(ppv) \
2429 do \
2430 { \
2431 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
2432 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2433 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2434 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), NULL); \
2435 } while (0)
2436#else
2437# define ASMAtomicWriteNullPtr(ppv) \
2438 do \
2439 { \
2440 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2441 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2442 ASMAtomicWritePtrVoid((void * volatile *)(ppv), NULL); \
2443 } while (0)
2444#endif
2445
2446
2447/**
2448 * Atomically writes a pointer value, unordered.
2449 *
2450 * @returns Current *pv value
2451 * @param ppv Pointer to the pointer variable.
2452 * @param pv The pointer value to assign to *ppv. If NULL use
2453 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2454 *
2455 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2456 * NULL.
2457 */
2458#ifdef __GNUC__
2459# define ASMAtomicUoWritePtr(ppv, pv) \
2460 do \
2461 { \
2462 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2463 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2464 \
2465 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2466 AssertCompile(sizeof(pv) == sizeof(void *)); \
2467 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2468 \
2469 *(ppvTypeChecked) = pvTypeChecked; \
2470 } while (0)
2471#else
2472# define ASMAtomicUoWritePtr(ppv, pv) \
2473 do \
2474 { \
2475 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2476 AssertCompile(sizeof(pv) == sizeof(void *)); \
2477 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2478 *(ppv) = pv; \
2479 } while (0)
2480#endif
2481
2482
2483/**
2484 * Atomically sets a pointer to NULL, unordered.
2485 *
2486 * @param ppv Pointer to the pointer variable that should be set to NULL.
2487 *
2488 * @remarks This is relatively type safe on GCC platforms.
2489 */
2490#ifdef __GNUC__
2491# define ASMAtomicUoWriteNullPtr(ppv) \
2492 do \
2493 { \
2494 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2495 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2496 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2497 *(ppvTypeChecked) = NULL; \
2498 } while (0)
2499#else
2500# define ASMAtomicUoWriteNullPtr(ppv) \
2501 do \
2502 { \
2503 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2504 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2505 *(ppv) = NULL; \
2506 } while (0)
2507#endif
2508
2509
2510/**
2511 * Atomically write a typical IPRT handle value, ordered.
2512 *
2513 * @param ph Pointer to the variable to update.
2514 * @param hNew The value to assign to *ph.
2515 *
2516 * @remarks This doesn't currently work for all handles (like RTFILE).
2517 */
2518#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2519# define ASMAtomicWriteHandle(ph, hNew) \
2520 do { \
2521 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2522 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2523 } while (0)
2524#elif HC_ARCH_BITS == 64
2525# define ASMAtomicWriteHandle(ph, hNew) \
2526 do { \
2527 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2528 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
2529 } while (0)
2530#else
2531# error HC_ARCH_BITS
2532#endif
2533
2534
2535/**
2536 * Atomically write a typical IPRT handle value, unordered.
2537 *
2538 * @param ph Pointer to the variable to update.
2539 * @param hNew The value to assign to *ph.
2540 *
2541 * @remarks This doesn't currently work for all handles (like RTFILE).
2542 */
2543#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2544# define ASMAtomicUoWriteHandle(ph, hNew) \
2545 do { \
2546 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2547 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
2548 } while (0)
2549#elif HC_ARCH_BITS == 64
2550# define ASMAtomicUoWriteHandle(ph, hNew) \
2551 do { \
2552 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2553 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
2554 } while (0)
2555#else
2556# error HC_ARCH_BITS
2557#endif
2558
2559
2560/**
2561 * Atomically write a value which size might differ
2562 * between platforms or compilers, ordered.
2563 *
2564 * @param pu Pointer to the variable to update.
2565 * @param uNew The value to assign to *pu.
2566 */
2567#define ASMAtomicWriteSize(pu, uNew) \
2568 do { \
2569 switch (sizeof(*(pu))) { \
2570 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2571 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2572 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2573 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2574 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2575 } \
2576 } while (0)
2577
2578/**
2579 * Atomically write a value which size might differ
2580 * between platforms or compilers, unordered.
2581 *
2582 * @param pu Pointer to the variable to update.
2583 * @param uNew The value to assign to *pu.
2584 */
2585#define ASMAtomicUoWriteSize(pu, uNew) \
2586 do { \
2587 switch (sizeof(*(pu))) { \
2588 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2589 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2590 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2591 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2592 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2593 } \
2594 } while (0)
2595
2596
2597
2598/**
2599 * Atomically exchanges and adds to a 16-bit value, ordered.
2600 *
2601 * @returns The old value.
2602 * @param pu16 Pointer to the value.
2603 * @param u16 Number to add.
2604 *
2605 * @remarks Currently not implemented, just to make 16-bit code happy.
2606 * @remarks x86: Requires a 486 or later.
2607 */
2608DECLASM(uint16_t) ASMAtomicAddU16(uint16_t volatile *pu16, uint32_t u16);
2609
2610
2611/**
2612 * Atomically exchanges and adds to a 32-bit value, ordered.
2613 *
2614 * @returns The old value.
2615 * @param pu32 Pointer to the value.
2616 * @param u32 Number to add.
2617 *
2618 * @remarks x86: Requires a 486 or later.
2619 */
2620#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2621DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
2622#else
2623DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
2624{
2625# if RT_INLINE_ASM_USES_INTRIN
2626 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
2627 return u32;
2628
2629# elif RT_INLINE_ASM_GNU_STYLE
2630 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2631 : "=r" (u32),
2632 "=m" (*pu32)
2633 : "0" (u32),
2634 "m" (*pu32)
2635 : "memory");
2636 return u32;
2637# else
2638 __asm
2639 {
2640 mov eax, [u32]
2641# ifdef RT_ARCH_AMD64
2642 mov rdx, [pu32]
2643 lock xadd [rdx], eax
2644# else
2645 mov edx, [pu32]
2646 lock xadd [edx], eax
2647# endif
2648 mov [u32], eax
2649 }
2650 return u32;
2651# endif
2652}
2653#endif
2654
2655
2656/**
2657 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2658 *
2659 * @returns The old value.
2660 * @param pi32 Pointer to the value.
2661 * @param i32 Number to add.
2662 *
2663 * @remarks x86: Requires a 486 or later.
2664 */
2665DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
2666{
2667 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
2668}
2669
2670
2671/**
2672 * Atomically exchanges and adds to a 64-bit value, ordered.
2673 *
2674 * @returns The old value.
2675 * @param pu64 Pointer to the value.
2676 * @param u64 Number to add.
2677 *
2678 * @remarks x86: Requires a Pentium or later.
2679 */
2680#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2681DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64);
2682#else
2683DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64)
2684{
2685# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2686 u64 = _InterlockedExchangeAdd64((__int64 *)pu64, u64);
2687 return u64;
2688
2689# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2690 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2691 : "=r" (u64),
2692 "=m" (*pu64)
2693 : "0" (u64),
2694 "m" (*pu64)
2695 : "memory");
2696 return u64;
2697# else
2698 uint64_t u64Old;
2699 for (;;)
2700 {
2701 uint64_t u64New;
2702 u64Old = ASMAtomicUoReadU64(pu64);
2703 u64New = u64Old + u64;
2704 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2705 break;
2706 ASMNopPause();
2707 }
2708 return u64Old;
2709# endif
2710}
2711#endif
2712
2713
2714/**
2715 * Atomically exchanges and adds to a signed 64-bit value, ordered.
2716 *
2717 * @returns The old value.
2718 * @param pi64 Pointer to the value.
2719 * @param i64 Number to add.
2720 *
2721 * @remarks x86: Requires a Pentium or later.
2722 */
2723DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile *pi64, int64_t i64)
2724{
2725 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)i64);
2726}
2727
2728
2729/**
2730 * Atomically exchanges and adds to a size_t value, ordered.
2731 *
2732 * @returns The old value.
2733 * @param pcb Pointer to the size_t value.
2734 * @param cb Number to add.
2735 */
2736DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile *pcb, size_t cb)
2737{
2738#if ARCH_BITS == 64
2739 AssertCompileSize(size_t, 8);
2740 return ASMAtomicAddU64((uint64_t volatile *)pcb, cb);
2741#elif ARCH_BITS == 32
2742 AssertCompileSize(size_t, 4);
2743 return ASMAtomicAddU32((uint32_t volatile *)pcb, cb);
2744#elif ARCH_BITS == 16
2745 AssertCompileSize(size_t, 2);
2746 return ASMAtomicAddU16((uint16_t volatile *)pcb, cb);
2747#else
2748# error "Unsupported ARCH_BITS value"
2749#endif
2750}
2751
2752
2753/**
2754 * Atomically exchanges and adds a value which size might differ between
2755 * platforms or compilers, ordered.
2756 *
2757 * @param pu Pointer to the variable to update.
2758 * @param uNew The value to add to *pu.
2759 * @param puOld Where to store the old value.
2760 */
2761#define ASMAtomicAddSize(pu, uNew, puOld) \
2762 do { \
2763 switch (sizeof(*(pu))) { \
2764 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2765 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2766 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
2767 } \
2768 } while (0)
2769
2770
2771
2772/**
2773 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
2774 *
2775 * @returns The old value.
2776 * @param pu16 Pointer to the value.
2777 * @param u16 Number to subtract.
2778 *
2779 * @remarks x86: Requires a 486 or later.
2780 */
2781DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile *pu16, uint32_t u16)
2782{
2783 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
2784}
2785
2786
2787/**
2788 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
2789 *
2790 * @returns The old value.
2791 * @param pi16 Pointer to the value.
2792 * @param i16 Number to subtract.
2793 *
2794 * @remarks x86: Requires a 486 or later.
2795 */
2796DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile *pi16, int16_t i16)
2797{
2798 return (int16_t)ASMAtomicAddU16((uint16_t volatile *)pi16, (uint16_t)-i16);
2799}
2800
2801
2802/**
2803 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
2804 *
2805 * @returns The old value.
2806 * @param pu32 Pointer to the value.
2807 * @param u32 Number to subtract.
2808 *
2809 * @remarks x86: Requires a 486 or later.
2810 */
2811DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile *pu32, uint32_t u32)
2812{
2813 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
2814}
2815
2816
2817/**
2818 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
2819 *
2820 * @returns The old value.
2821 * @param pi32 Pointer to the value.
2822 * @param i32 Number to subtract.
2823 *
2824 * @remarks x86: Requires a 486 or later.
2825 */
2826DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
2827{
2828 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
2829}
2830
2831
2832/**
2833 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
2834 *
2835 * @returns The old value.
2836 * @param pu64 Pointer to the value.
2837 * @param u64 Number to subtract.
2838 *
2839 * @remarks x86: Requires a Pentium or later.
2840 */
2841DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile *pu64, uint64_t u64)
2842{
2843 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
2844}
2845
2846
2847/**
2848 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
2849 *
2850 * @returns The old value.
2851 * @param pi64 Pointer to the value.
2852 * @param i64 Number to subtract.
2853 *
2854 * @remarks x86: Requires a Pentium or later.
2855 */
2856DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile *pi64, int64_t i64)
2857{
2858 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)-i64);
2859}
2860
2861
2862/**
2863 * Atomically exchanges and subtracts to a size_t value, ordered.
2864 *
2865 * @returns The old value.
2866 * @param pcb Pointer to the size_t value.
2867 * @param cb Number to subtract.
2868 *
2869 * @remarks x86: Requires a 486 or later.
2870 */
2871DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile *pcb, size_t cb)
2872{
2873#if ARCH_BITS == 64
2874 return ASMAtomicSubU64((uint64_t volatile *)pcb, cb);
2875#elif ARCH_BITS == 32
2876 return ASMAtomicSubU32((uint32_t volatile *)pcb, cb);
2877#elif ARCH_BITS == 16
2878 AssertCompileSize(size_t, 2);
2879 return ASMAtomicSubU16((uint16_t volatile *)pcb, cb);
2880#else
2881# error "Unsupported ARCH_BITS value"
2882#endif
2883}
2884
2885
2886/**
2887 * Atomically exchanges and subtracts a value which size might differ between
2888 * platforms or compilers, ordered.
2889 *
2890 * @param pu Pointer to the variable to update.
2891 * @param uNew The value to subtract to *pu.
2892 * @param puOld Where to store the old value.
2893 *
2894 * @remarks x86: Requires a 486 or later.
2895 */
2896#define ASMAtomicSubSize(pu, uNew, puOld) \
2897 do { \
2898 switch (sizeof(*(pu))) { \
2899 case 4: *(uint32_t *)(puOld) = ASMAtomicSubU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2900 case 8: *(uint64_t *)(puOld) = ASMAtomicSubU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2901 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
2902 } \
2903 } while (0)
2904
2905
2906
2907/**
2908 * Atomically increment a 16-bit value, ordered.
2909 *
2910 * @returns The new value.
2911 * @param pu16 Pointer to the value to increment.
2912 * @remarks Not implemented. Just to make 16-bit code happy.
2913 *
2914 * @remarks x86: Requires a 486 or later.
2915 */
2916DECLASM(uint16_t) ASMAtomicIncU16(uint16_t volatile *pu16);
2917
2918
2919/**
2920 * Atomically increment a 32-bit value, ordered.
2921 *
2922 * @returns The new value.
2923 * @param pu32 Pointer to the value to increment.
2924 *
2925 * @remarks x86: Requires a 486 or later.
2926 */
2927#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2928DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2929#else
2930DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2931{
2932 uint32_t u32;
2933# if RT_INLINE_ASM_USES_INTRIN
2934 u32 = _InterlockedIncrement((long *)pu32);
2935 return u32;
2936
2937# elif RT_INLINE_ASM_GNU_STYLE
2938 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2939 : "=r" (u32),
2940 "=m" (*pu32)
2941 : "0" (1),
2942 "m" (*pu32)
2943 : "memory");
2944 return u32+1;
2945# else
2946 __asm
2947 {
2948 mov eax, 1
2949# ifdef RT_ARCH_AMD64
2950 mov rdx, [pu32]
2951 lock xadd [rdx], eax
2952# else
2953 mov edx, [pu32]
2954 lock xadd [edx], eax
2955# endif
2956 mov u32, eax
2957 }
2958 return u32+1;
2959# endif
2960}
2961#endif
2962
2963
2964/**
2965 * Atomically increment a signed 32-bit value, ordered.
2966 *
2967 * @returns The new value.
2968 * @param pi32 Pointer to the value to increment.
2969 *
2970 * @remarks x86: Requires a 486 or later.
2971 */
2972DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2973{
2974 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2975}
2976
2977
2978/**
2979 * Atomically increment a 64-bit value, ordered.
2980 *
2981 * @returns The new value.
2982 * @param pu64 Pointer to the value to increment.
2983 *
2984 * @remarks x86: Requires a Pentium or later.
2985 */
2986#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2987DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64);
2988#else
2989DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64)
2990{
2991# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2992 uint64_t u64;
2993 u64 = _InterlockedIncrement64((__int64 *)pu64);
2994 return u64;
2995
2996# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2997 uint64_t u64;
2998 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2999 : "=r" (u64),
3000 "=m" (*pu64)
3001 : "0" (1),
3002 "m" (*pu64)
3003 : "memory");
3004 return u64 + 1;
3005# else
3006 return ASMAtomicAddU64(pu64, 1) + 1;
3007# endif
3008}
3009#endif
3010
3011
3012/**
3013 * Atomically increment a signed 64-bit value, ordered.
3014 *
3015 * @returns The new value.
3016 * @param pi64 Pointer to the value to increment.
3017 *
3018 * @remarks x86: Requires a Pentium or later.
3019 */
3020DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile *pi64)
3021{
3022 return (int64_t)ASMAtomicIncU64((uint64_t volatile *)pi64);
3023}
3024
3025
3026/**
3027 * Atomically increment a size_t value, ordered.
3028 *
3029 * @returns The new value.
3030 * @param pcb Pointer to the value to increment.
3031 *
3032 * @remarks x86: Requires a 486 or later.
3033 */
3034DECLINLINE(int64_t) ASMAtomicIncZ(size_t volatile *pcb)
3035{
3036#if ARCH_BITS == 64
3037 return ASMAtomicIncU64((uint64_t volatile *)pcb);
3038#elif ARCH_BITS == 32
3039 return ASMAtomicIncU32((uint32_t volatile *)pcb);
3040#elif ARCH_BITS == 16
3041 return ASMAtomicIncU16((uint16_t volatile *)pcb);
3042#else
3043# error "Unsupported ARCH_BITS value"
3044#endif
3045}
3046
3047
3048
3049/**
3050 * Atomically decrement an unsigned 32-bit value, ordered.
3051 *
3052 * @returns The new value.
3053 * @param pu16 Pointer to the value to decrement.
3054 * @remarks Not implemented. Just to make 16-bit code happy.
3055 *
3056 * @remarks x86: Requires a 486 or later.
3057 */
3058DECLASM(uint32_t) ASMAtomicDecU16(uint16_t volatile *pu16);
3059
3060
3061/**
3062 * Atomically decrement an unsigned 32-bit value, ordered.
3063 *
3064 * @returns The new value.
3065 * @param pu32 Pointer to the value to decrement.
3066 *
3067 * @remarks x86: Requires a 486 or later.
3068 */
3069#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3070DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3071#else
3072DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3073{
3074 uint32_t u32;
3075# if RT_INLINE_ASM_USES_INTRIN
3076 u32 = _InterlockedDecrement((long *)pu32);
3077 return u32;
3078
3079# elif RT_INLINE_ASM_GNU_STYLE
3080 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3081 : "=r" (u32),
3082 "=m" (*pu32)
3083 : "0" (-1),
3084 "m" (*pu32)
3085 : "memory");
3086 return u32-1;
3087# else
3088 __asm
3089 {
3090 mov eax, -1
3091# ifdef RT_ARCH_AMD64
3092 mov rdx, [pu32]
3093 lock xadd [rdx], eax
3094# else
3095 mov edx, [pu32]
3096 lock xadd [edx], eax
3097# endif
3098 mov u32, eax
3099 }
3100 return u32-1;
3101# endif
3102}
3103#endif
3104
3105
3106/**
3107 * Atomically decrement a signed 32-bit value, ordered.
3108 *
3109 * @returns The new value.
3110 * @param pi32 Pointer to the value to decrement.
3111 *
3112 * @remarks x86: Requires a 486 or later.
3113 */
3114DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3115{
3116 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3117}
3118
3119
3120/**
3121 * Atomically decrement an unsigned 64-bit value, ordered.
3122 *
3123 * @returns The new value.
3124 * @param pu64 Pointer to the value to decrement.
3125 *
3126 * @remarks x86: Requires a Pentium or later.
3127 */
3128#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3129DECLASM(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64);
3130#else
3131DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64)
3132{
3133# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3134 uint64_t u64 = _InterlockedDecrement64((__int64 volatile *)pu64);
3135 return u64;
3136
3137# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3138 uint64_t u64;
3139 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
3140 : "=r" (u64),
3141 "=m" (*pu64)
3142 : "0" (~(uint64_t)0),
3143 "m" (*pu64)
3144 : "memory");
3145 return u64-1;
3146# else
3147 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
3148# endif
3149}
3150#endif
3151
3152
3153/**
3154 * Atomically decrement a signed 64-bit value, ordered.
3155 *
3156 * @returns The new value.
3157 * @param pi64 Pointer to the value to decrement.
3158 *
3159 * @remarks x86: Requires a Pentium or later.
3160 */
3161DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile *pi64)
3162{
3163 return (int64_t)ASMAtomicDecU64((uint64_t volatile *)pi64);
3164}
3165
3166
3167/**
3168 * Atomically decrement a size_t value, ordered.
3169 *
3170 * @returns The new value.
3171 * @param pcb Pointer to the value to decrement.
3172 *
3173 * @remarks x86: Requires a 486 or later.
3174 */
3175DECLINLINE(int64_t) ASMAtomicDecZ(size_t volatile *pcb)
3176{
3177#if ARCH_BITS == 64
3178 return ASMAtomicDecU64((uint64_t volatile *)pcb);
3179#elif ARCH_BITS == 32
3180 return ASMAtomicDecU32((uint32_t volatile *)pcb);
3181#elif ARCH_BITS == 16
3182 return ASMAtomicDecU16((uint16_t volatile *)pcb);
3183#else
3184# error "Unsupported ARCH_BITS value"
3185#endif
3186}
3187
3188
3189/**
3190 * Atomically Or an unsigned 32-bit value, ordered.
3191 *
3192 * @param pu32 Pointer to the pointer variable to OR u32 with.
3193 * @param u32 The value to OR *pu32 with.
3194 *
3195 * @remarks x86: Requires a 386 or later.
3196 */
3197#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3198DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3199#else
3200DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3201{
3202# if RT_INLINE_ASM_USES_INTRIN
3203 _InterlockedOr((long volatile *)pu32, (long)u32);
3204
3205# elif RT_INLINE_ASM_GNU_STYLE
3206 __asm__ __volatile__("lock; orl %1, %0\n\t"
3207 : "=m" (*pu32)
3208 : "ir" (u32),
3209 "m" (*pu32));
3210# else
3211 __asm
3212 {
3213 mov eax, [u32]
3214# ifdef RT_ARCH_AMD64
3215 mov rdx, [pu32]
3216 lock or [rdx], eax
3217# else
3218 mov edx, [pu32]
3219 lock or [edx], eax
3220# endif
3221 }
3222# endif
3223}
3224#endif
3225
3226
3227/**
3228 * Atomically Or a signed 32-bit value, ordered.
3229 *
3230 * @param pi32 Pointer to the pointer variable to OR u32 with.
3231 * @param i32 The value to OR *pu32 with.
3232 *
3233 * @remarks x86: Requires a 386 or later.
3234 */
3235DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3236{
3237 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3238}
3239
3240
3241/**
3242 * Atomically Or an unsigned 64-bit value, ordered.
3243 *
3244 * @param pu64 Pointer to the pointer variable to OR u64 with.
3245 * @param u64 The value to OR *pu64 with.
3246 *
3247 * @remarks x86: Requires a Pentium or later.
3248 */
3249#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3250DECLASM(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64);
3251#else
3252DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64)
3253{
3254# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3255 _InterlockedOr64((__int64 volatile *)pu64, (__int64)u64);
3256
3257# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3258 __asm__ __volatile__("lock; orq %1, %q0\n\t"
3259 : "=m" (*pu64)
3260 : "r" (u64),
3261 "m" (*pu64));
3262# else
3263 for (;;)
3264 {
3265 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3266 uint64_t u64New = u64Old | u64;
3267 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3268 break;
3269 ASMNopPause();
3270 }
3271# endif
3272}
3273#endif
3274
3275
3276/**
3277 * Atomically Or a signed 64-bit value, ordered.
3278 *
3279 * @param pi64 Pointer to the pointer variable to OR u64 with.
3280 * @param i64 The value to OR *pu64 with.
3281 *
3282 * @remarks x86: Requires a Pentium or later.
3283 */
3284DECLINLINE(void) ASMAtomicOrS64(int64_t volatile *pi64, int64_t i64)
3285{
3286 ASMAtomicOrU64((uint64_t volatile *)pi64, i64);
3287}
3288
3289
3290/**
3291 * Atomically And an unsigned 32-bit value, ordered.
3292 *
3293 * @param pu32 Pointer to the pointer variable to AND u32 with.
3294 * @param u32 The value to AND *pu32 with.
3295 *
3296 * @remarks x86: Requires a 386 or later.
3297 */
3298#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3299DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3300#else
3301DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3302{
3303# if RT_INLINE_ASM_USES_INTRIN
3304 _InterlockedAnd((long volatile *)pu32, u32);
3305
3306# elif RT_INLINE_ASM_GNU_STYLE
3307 __asm__ __volatile__("lock; andl %1, %0\n\t"
3308 : "=m" (*pu32)
3309 : "ir" (u32),
3310 "m" (*pu32));
3311# else
3312 __asm
3313 {
3314 mov eax, [u32]
3315# ifdef RT_ARCH_AMD64
3316 mov rdx, [pu32]
3317 lock and [rdx], eax
3318# else
3319 mov edx, [pu32]
3320 lock and [edx], eax
3321# endif
3322 }
3323# endif
3324}
3325#endif
3326
3327
3328/**
3329 * Atomically And a signed 32-bit value, ordered.
3330 *
3331 * @param pi32 Pointer to the pointer variable to AND i32 with.
3332 * @param i32 The value to AND *pi32 with.
3333 *
3334 * @remarks x86: Requires a 386 or later.
3335 */
3336DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3337{
3338 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3339}
3340
3341
3342/**
3343 * Atomically And an unsigned 64-bit value, ordered.
3344 *
3345 * @param pu64 Pointer to the pointer variable to AND u64 with.
3346 * @param u64 The value to AND *pu64 with.
3347 *
3348 * @remarks x86: Requires a Pentium or later.
3349 */
3350#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3351DECLASM(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64);
3352#else
3353DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64)
3354{
3355# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3356 _InterlockedAnd64((__int64 volatile *)pu64, u64);
3357
3358# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3359 __asm__ __volatile__("lock; andq %1, %0\n\t"
3360 : "=m" (*pu64)
3361 : "r" (u64),
3362 "m" (*pu64));
3363# else
3364 for (;;)
3365 {
3366 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3367 uint64_t u64New = u64Old & u64;
3368 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3369 break;
3370 ASMNopPause();
3371 }
3372# endif
3373}
3374#endif
3375
3376
3377/**
3378 * Atomically And a signed 64-bit value, ordered.
3379 *
3380 * @param pi64 Pointer to the pointer variable to AND i64 with.
3381 * @param i64 The value to AND *pi64 with.
3382 *
3383 * @remarks x86: Requires a Pentium or later.
3384 */
3385DECLINLINE(void) ASMAtomicAndS64(int64_t volatile *pi64, int64_t i64)
3386{
3387 ASMAtomicAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3388}
3389
3390
3391/**
3392 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
3393 *
3394 * @param pu32 Pointer to the pointer variable to OR u32 with.
3395 * @param u32 The value to OR *pu32 with.
3396 *
3397 * @remarks x86: Requires a 386 or later.
3398 */
3399#if RT_INLINE_ASM_EXTERNAL
3400DECLASM(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32);
3401#else
3402DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32)
3403{
3404# if RT_INLINE_ASM_GNU_STYLE
3405 __asm__ __volatile__("orl %1, %0\n\t"
3406 : "=m" (*pu32)
3407 : "ir" (u32),
3408 "m" (*pu32));
3409# else
3410 __asm
3411 {
3412 mov eax, [u32]
3413# ifdef RT_ARCH_AMD64
3414 mov rdx, [pu32]
3415 or [rdx], eax
3416# else
3417 mov edx, [pu32]
3418 or [edx], eax
3419# endif
3420 }
3421# endif
3422}
3423#endif
3424
3425
3426/**
3427 * Atomically OR a signed 32-bit value, unordered.
3428 *
3429 * @param pi32 Pointer to the pointer variable to OR u32 with.
3430 * @param i32 The value to OR *pu32 with.
3431 *
3432 * @remarks x86: Requires a 386 or later.
3433 */
3434DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile *pi32, int32_t i32)
3435{
3436 ASMAtomicUoOrU32((uint32_t volatile *)pi32, i32);
3437}
3438
3439
3440/**
3441 * Atomically OR an unsigned 64-bit value, unordered.
3442 *
3443 * @param pu64 Pointer to the pointer variable to OR u64 with.
3444 * @param u64 The value to OR *pu64 with.
3445 *
3446 * @remarks x86: Requires a Pentium or later.
3447 */
3448#if RT_INLINE_ASM_EXTERNAL
3449DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64);
3450#else
3451DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64)
3452{
3453# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3454 __asm__ __volatile__("orq %1, %q0\n\t"
3455 : "=m" (*pu64)
3456 : "r" (u64),
3457 "m" (*pu64));
3458# else
3459 for (;;)
3460 {
3461 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3462 uint64_t u64New = u64Old | u64;
3463 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3464 break;
3465 ASMNopPause();
3466 }
3467# endif
3468}
3469#endif
3470
3471
3472/**
3473 * Atomically Or a signed 64-bit value, unordered.
3474 *
3475 * @param pi64 Pointer to the pointer variable to OR u64 with.
3476 * @param i64 The value to OR *pu64 with.
3477 *
3478 * @remarks x86: Requires a Pentium or later.
3479 */
3480DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile *pi64, int64_t i64)
3481{
3482 ASMAtomicUoOrU64((uint64_t volatile *)pi64, i64);
3483}
3484
3485
3486/**
3487 * Atomically And an unsigned 32-bit value, unordered.
3488 *
3489 * @param pu32 Pointer to the pointer variable to AND u32 with.
3490 * @param u32 The value to AND *pu32 with.
3491 *
3492 * @remarks x86: Requires a 386 or later.
3493 */
3494#if RT_INLINE_ASM_EXTERNAL
3495DECLASM(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32);
3496#else
3497DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32)
3498{
3499# if RT_INLINE_ASM_GNU_STYLE
3500 __asm__ __volatile__("andl %1, %0\n\t"
3501 : "=m" (*pu32)
3502 : "ir" (u32),
3503 "m" (*pu32));
3504# else
3505 __asm
3506 {
3507 mov eax, [u32]
3508# ifdef RT_ARCH_AMD64
3509 mov rdx, [pu32]
3510 and [rdx], eax
3511# else
3512 mov edx, [pu32]
3513 and [edx], eax
3514# endif
3515 }
3516# endif
3517}
3518#endif
3519
3520
3521/**
3522 * Atomically And a signed 32-bit value, unordered.
3523 *
3524 * @param pi32 Pointer to the pointer variable to AND i32 with.
3525 * @param i32 The value to AND *pi32 with.
3526 *
3527 * @remarks x86: Requires a 386 or later.
3528 */
3529DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile *pi32, int32_t i32)
3530{
3531 ASMAtomicUoAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3532}
3533
3534
3535/**
3536 * Atomically And an unsigned 64-bit value, unordered.
3537 *
3538 * @param pu64 Pointer to the pointer variable to AND u64 with.
3539 * @param u64 The value to AND *pu64 with.
3540 *
3541 * @remarks x86: Requires a Pentium or later.
3542 */
3543#if RT_INLINE_ASM_EXTERNAL
3544DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64);
3545#else
3546DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64)
3547{
3548# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3549 __asm__ __volatile__("andq %1, %0\n\t"
3550 : "=m" (*pu64)
3551 : "r" (u64),
3552 "m" (*pu64));
3553# else
3554 for (;;)
3555 {
3556 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3557 uint64_t u64New = u64Old & u64;
3558 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3559 break;
3560 ASMNopPause();
3561 }
3562# endif
3563}
3564#endif
3565
3566
3567/**
3568 * Atomically And a signed 64-bit value, unordered.
3569 *
3570 * @param pi64 Pointer to the pointer variable to AND i64 with.
3571 * @param i64 The value to AND *pi64 with.
3572 *
3573 * @remarks x86: Requires a Pentium or later.
3574 */
3575DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile *pi64, int64_t i64)
3576{
3577 ASMAtomicUoAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3578}
3579
3580
3581/**
3582 * Atomically increment an unsigned 32-bit value, unordered.
3583 *
3584 * @returns the new value.
3585 * @param pu32 Pointer to the variable to increment.
3586 *
3587 * @remarks x86: Requires a 486 or later.
3588 */
3589#if RT_INLINE_ASM_EXTERNAL
3590DECLASM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile *pu32);
3591#else
3592DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile *pu32)
3593{
3594 uint32_t u32;
3595# if RT_INLINE_ASM_GNU_STYLE
3596 __asm__ __volatile__("xaddl %0, %1\n\t"
3597 : "=r" (u32),
3598 "=m" (*pu32)
3599 : "0" (1),
3600 "m" (*pu32)
3601 : "memory");
3602 return u32 + 1;
3603# else
3604 __asm
3605 {
3606 mov eax, 1
3607# ifdef RT_ARCH_AMD64
3608 mov rdx, [pu32]
3609 xadd [rdx], eax
3610# else
3611 mov edx, [pu32]
3612 xadd [edx], eax
3613# endif
3614 mov u32, eax
3615 }
3616 return u32 + 1;
3617# endif
3618}
3619#endif
3620
3621
3622/**
3623 * Atomically decrement an unsigned 32-bit value, unordered.
3624 *
3625 * @returns the new value.
3626 * @param pu32 Pointer to the variable to decrement.
3627 *
3628 * @remarks x86: Requires a 486 or later.
3629 */
3630#if RT_INLINE_ASM_EXTERNAL
3631DECLASM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile *pu32);
3632#else
3633DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile *pu32)
3634{
3635 uint32_t u32;
3636# if RT_INLINE_ASM_GNU_STYLE
3637 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3638 : "=r" (u32),
3639 "=m" (*pu32)
3640 : "0" (-1),
3641 "m" (*pu32)
3642 : "memory");
3643 return u32 - 1;
3644# else
3645 __asm
3646 {
3647 mov eax, -1
3648# ifdef RT_ARCH_AMD64
3649 mov rdx, [pu32]
3650 xadd [rdx], eax
3651# else
3652 mov edx, [pu32]
3653 xadd [edx], eax
3654# endif
3655 mov u32, eax
3656 }
3657 return u32 - 1;
3658# endif
3659}
3660#endif
3661
3662
3663/** @def RT_ASM_PAGE_SIZE
3664 * We try avoid dragging in iprt/param.h here.
3665 * @internal
3666 */
3667#if defined(RT_ARCH_SPARC64)
3668# define RT_ASM_PAGE_SIZE 0x2000
3669# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3670# if PAGE_SIZE != 0x2000
3671# error "PAGE_SIZE is not 0x2000!"
3672# endif
3673# endif
3674#else
3675# define RT_ASM_PAGE_SIZE 0x1000
3676# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3677# if PAGE_SIZE != 0x1000
3678# error "PAGE_SIZE is not 0x1000!"
3679# endif
3680# endif
3681#endif
3682
3683/**
3684 * Zeros a 4K memory page.
3685 *
3686 * @param pv Pointer to the memory block. This must be page aligned.
3687 */
3688#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3689DECLASM(void) ASMMemZeroPage(volatile void *pv);
3690# else
3691DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
3692{
3693# if RT_INLINE_ASM_USES_INTRIN
3694# ifdef RT_ARCH_AMD64
3695 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
3696# else
3697 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
3698# endif
3699
3700# elif RT_INLINE_ASM_GNU_STYLE
3701 RTCCUINTREG uDummy;
3702# ifdef RT_ARCH_AMD64
3703 __asm__ __volatile__("rep stosq"
3704 : "=D" (pv),
3705 "=c" (uDummy)
3706 : "0" (pv),
3707 "c" (RT_ASM_PAGE_SIZE >> 3),
3708 "a" (0)
3709 : "memory");
3710# else
3711 __asm__ __volatile__("rep stosl"
3712 : "=D" (pv),
3713 "=c" (uDummy)
3714 : "0" (pv),
3715 "c" (RT_ASM_PAGE_SIZE >> 2),
3716 "a" (0)
3717 : "memory");
3718# endif
3719# else
3720 __asm
3721 {
3722# ifdef RT_ARCH_AMD64
3723 xor rax, rax
3724 mov ecx, 0200h
3725 mov rdi, [pv]
3726 rep stosq
3727# else
3728 xor eax, eax
3729 mov ecx, 0400h
3730 mov edi, [pv]
3731 rep stosd
3732# endif
3733 }
3734# endif
3735}
3736# endif
3737
3738
3739/**
3740 * Zeros a memory block with a 32-bit aligned size.
3741 *
3742 * @param pv Pointer to the memory block.
3743 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3744 */
3745#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3746DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3747#else
3748DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3749{
3750# if RT_INLINE_ASM_USES_INTRIN
3751# ifdef RT_ARCH_AMD64
3752 if (!(cb & 7))
3753 __stosq((unsigned __int64 *)pv, 0, cb / 8);
3754 else
3755# endif
3756 __stosd((unsigned long *)pv, 0, cb / 4);
3757
3758# elif RT_INLINE_ASM_GNU_STYLE
3759 __asm__ __volatile__("rep stosl"
3760 : "=D" (pv),
3761 "=c" (cb)
3762 : "0" (pv),
3763 "1" (cb >> 2),
3764 "a" (0)
3765 : "memory");
3766# else
3767 __asm
3768 {
3769 xor eax, eax
3770# ifdef RT_ARCH_AMD64
3771 mov rcx, [cb]
3772 shr rcx, 2
3773 mov rdi, [pv]
3774# else
3775 mov ecx, [cb]
3776 shr ecx, 2
3777 mov edi, [pv]
3778# endif
3779 rep stosd
3780 }
3781# endif
3782}
3783#endif
3784
3785
3786/**
3787 * Fills a memory block with a 32-bit aligned size.
3788 *
3789 * @param pv Pointer to the memory block.
3790 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3791 * @param u32 The value to fill with.
3792 */
3793#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3794DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3795#else
3796DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3797{
3798# if RT_INLINE_ASM_USES_INTRIN
3799# ifdef RT_ARCH_AMD64
3800 if (!(cb & 7))
3801 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3802 else
3803# endif
3804 __stosd((unsigned long *)pv, u32, cb / 4);
3805
3806# elif RT_INLINE_ASM_GNU_STYLE
3807 __asm__ __volatile__("rep stosl"
3808 : "=D" (pv),
3809 "=c" (cb)
3810 : "0" (pv),
3811 "1" (cb >> 2),
3812 "a" (u32)
3813 : "memory");
3814# else
3815 __asm
3816 {
3817# ifdef RT_ARCH_AMD64
3818 mov rcx, [cb]
3819 shr rcx, 2
3820 mov rdi, [pv]
3821# else
3822 mov ecx, [cb]
3823 shr ecx, 2
3824 mov edi, [pv]
3825# endif
3826 mov eax, [u32]
3827 rep stosd
3828 }
3829# endif
3830}
3831#endif
3832
3833
3834/**
3835 * Checks if a memory block is all zeros.
3836 *
3837 * @returns Pointer to the first non-zero byte.
3838 * @returns NULL if all zero.
3839 *
3840 * @param pv Pointer to the memory block.
3841 * @param cb Number of bytes in the block.
3842 *
3843 * @todo Fix name, it is a predicate function but it's not returning boolean!
3844 */
3845#if !defined(RT_OS_LINUX) || !defined(__KERNEL__)
3846DECLASM(void *) ASMMemFirstNonZero(void const *pv, size_t cb);
3847#else
3848DECLINLINE(void *) ASMMemFirstNonZero(void const *pv, size_t cb)
3849{
3850 uint8_t const *pb = (uint8_t const *)pv;
3851 for (; cb; cb--, pb++)
3852 if (RT_LIKELY(*pb == u8))
3853 { /* likely */ }
3854 else
3855 return (void *)pb;
3856 return NULL;
3857}
3858#endif
3859
3860
3861/**
3862 * Checks if a memory block is all zeros.
3863 *
3864 * @returns true if zero, false if not.
3865 *
3866 * @param pv Pointer to the memory block.
3867 * @param cb Number of bytes in the block.
3868 *
3869 * @sa ASMMemFirstNonZero
3870 */
3871DECLINLINE(bool) ASMMemIsZero(void const *pv, size_t cb)
3872{
3873 return ASMMemFirstNonZero(pv, cb) == NULL;
3874}
3875
3876
3877/**
3878 * Checks if a memory page is all zeros.
3879 *
3880 * @returns true / false.
3881 *
3882 * @param pvPage Pointer to the page. Must be aligned on 16 byte
3883 * boundary
3884 */
3885DECLINLINE(bool) ASMMemIsZeroPage(void const *pvPage)
3886{
3887# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
3888 union { RTCCUINTREG r; bool f; } uAX;
3889 RTCCUINTREG xCX, xDI;
3890 Assert(!((uintptr_t)pvPage & 15));
3891 __asm__ __volatile__("repe; "
3892# ifdef RT_ARCH_AMD64
3893 "scasq\n\t"
3894# else
3895 "scasl\n\t"
3896# endif
3897 "setnc %%al\n\t"
3898 : "=&c" (xCX),
3899 "=&D" (xDI),
3900 "=&a" (uAX.r)
3901 : "mr" (pvPage),
3902# ifdef RT_ARCH_AMD64
3903 "0" (RT_ASM_PAGE_SIZE/8),
3904# else
3905 "0" (RT_ASM_PAGE_SIZE/4),
3906# endif
3907 "1" (pvPage),
3908 "2" (0));
3909 return uAX.f;
3910# else
3911 uintptr_t const *puPtr = (uintptr_t const *)pvPage;
3912 int cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
3913 Assert(!((uintptr_t)pvPage & 15));
3914 for (;;)
3915 {
3916 if (puPtr[0]) return false;
3917 if (puPtr[4]) return false;
3918
3919 if (puPtr[2]) return false;
3920 if (puPtr[6]) return false;
3921
3922 if (puPtr[1]) return false;
3923 if (puPtr[5]) return false;
3924
3925 if (puPtr[3]) return false;
3926 if (puPtr[7]) return false;
3927
3928 if (!--cLeft)
3929 return true;
3930 puPtr += 8;
3931 }
3932 return true;
3933# endif
3934}
3935
3936
3937/**
3938 * Checks if a memory block is filled with the specified byte, returning the
3939 * first mismatch.
3940 *
3941 * This is sort of an inverted memchr.
3942 *
3943 * @returns Pointer to the byte which doesn't equal u8.
3944 * @returns NULL if all equal to u8.
3945 *
3946 * @param pv Pointer to the memory block.
3947 * @param cb Number of bytes in the block.
3948 * @param u8 The value it's supposed to be filled with.
3949 *
3950 * @remarks No alignment requirements.
3951 */
3952#if !defined(RT_OS_LINUX) || !defined(__KERNEL__)
3953DECLASM(void *) ASMMemFirstMismatchingU8(void const *pv, size_t cb, uint8_t u8);
3954#else
3955DECLINLINE(void *) ASMMemFirstMismatchingU8(void const *pv, size_t cb, uint8_t u8)
3956{
3957 uint8_t const *pb = (uint8_t const *)pv;
3958 for (; cb; cb--, pb++)
3959 if (RT_LIKELY(*pb == u8))
3960 { /* likely */ }
3961 else
3962 return (void *)pb;
3963 return NULL;
3964}
3965#endif
3966
3967
3968/**
3969 * Checks if a memory block is filled with the specified byte.
3970 *
3971 * @returns true if all matching, false if not.
3972 *
3973 * @param pv Pointer to the memory block.
3974 * @param cb Number of bytes in the block.
3975 * @param u8 The value it's supposed to be filled with.
3976 *
3977 * @remarks No alignment requirements.
3978 */
3979DECLINLINE(bool) ASMMemIsAllU8(void const *pv, size_t cb, uint8_t u8)
3980{
3981 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
3982}
3983
3984
3985/**
3986 * Checks if a memory block is filled with the specified 32-bit value.
3987 *
3988 * This is a sort of inverted memchr.
3989 *
3990 * @returns Pointer to the first value which doesn't equal u32.
3991 * @returns NULL if all equal to u32.
3992 *
3993 * @param pv Pointer to the memory block.
3994 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3995 * @param u32 The value it's supposed to be filled with.
3996 */
3997DECLINLINE(uint32_t *) ASMMemFirstMismatchingU32(void const *pv, size_t cb, uint32_t u32)
3998{
3999/** @todo rewrite this in inline assembly? */
4000 uint32_t const *pu32 = (uint32_t const *)pv;
4001 for (; cb; cb -= 4, pu32++)
4002 if (RT_LIKELY(*pu32 == u32))
4003 { /* likely */ }
4004 else
4005 return (uint32_t *)pu32;
4006 return NULL;
4007}
4008
4009
4010/**
4011 * Probes a byte pointer for read access.
4012 *
4013 * While the function will not fault if the byte is not read accessible,
4014 * the idea is to do this in a safe place like before acquiring locks
4015 * and such like.
4016 *
4017 * Also, this functions guarantees that an eager compiler is not going
4018 * to optimize the probing away.
4019 *
4020 * @param pvByte Pointer to the byte.
4021 */
4022#if RT_INLINE_ASM_EXTERNAL
4023DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
4024#else
4025DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
4026{
4027 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4028 uint8_t u8;
4029# if RT_INLINE_ASM_GNU_STYLE
4030 __asm__ __volatile__("movb (%1), %0\n\t"
4031 : "=r" (u8)
4032 : "r" (pvByte));
4033# else
4034 __asm
4035 {
4036# ifdef RT_ARCH_AMD64
4037 mov rax, [pvByte]
4038 mov al, [rax]
4039# else
4040 mov eax, [pvByte]
4041 mov al, [eax]
4042# endif
4043 mov [u8], al
4044 }
4045# endif
4046 return u8;
4047}
4048#endif
4049
4050/**
4051 * Probes a buffer for read access page by page.
4052 *
4053 * While the function will fault if the buffer is not fully read
4054 * accessible, the idea is to do this in a safe place like before
4055 * acquiring locks and such like.
4056 *
4057 * Also, this functions guarantees that an eager compiler is not going
4058 * to optimize the probing away.
4059 *
4060 * @param pvBuf Pointer to the buffer.
4061 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4062 */
4063DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
4064{
4065 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4066 /* the first byte */
4067 const uint8_t *pu8 = (const uint8_t *)pvBuf;
4068 ASMProbeReadByte(pu8);
4069
4070 /* the pages in between pages. */
4071 while (cbBuf > RT_ASM_PAGE_SIZE)
4072 {
4073 ASMProbeReadByte(pu8);
4074 cbBuf -= RT_ASM_PAGE_SIZE;
4075 pu8 += RT_ASM_PAGE_SIZE;
4076 }
4077
4078 /* the last byte */
4079 ASMProbeReadByte(pu8 + cbBuf - 1);
4080}
4081
4082
4083
4084/** @defgroup grp_inline_bits Bit Operations
4085 * @{
4086 */
4087
4088
4089/**
4090 * Sets a bit in a bitmap.
4091 *
4092 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
4093 * @param iBit The bit to set.
4094 *
4095 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4096 * However, doing so will yield better performance as well as avoiding
4097 * traps accessing the last bits in the bitmap.
4098 */
4099#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4100DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
4101#else
4102DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
4103{
4104# if RT_INLINE_ASM_USES_INTRIN
4105 _bittestandset((long *)pvBitmap, iBit);
4106
4107# elif RT_INLINE_ASM_GNU_STYLE
4108 __asm__ __volatile__("btsl %1, %0"
4109 : "=m" (*(volatile long *)pvBitmap)
4110 : "Ir" (iBit),
4111 "m" (*(volatile long *)pvBitmap)
4112 : "memory");
4113# else
4114 __asm
4115 {
4116# ifdef RT_ARCH_AMD64
4117 mov rax, [pvBitmap]
4118 mov edx, [iBit]
4119 bts [rax], edx
4120# else
4121 mov eax, [pvBitmap]
4122 mov edx, [iBit]
4123 bts [eax], edx
4124# endif
4125 }
4126# endif
4127}
4128#endif
4129
4130
4131/**
4132 * Atomically sets a bit in a bitmap, ordered.
4133 *
4134 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4135 * the memory access isn't atomic!
4136 * @param iBit The bit to set.
4137 *
4138 * @remarks x86: Requires a 386 or later.
4139 */
4140#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4141DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
4142#else
4143DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
4144{
4145 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4146# if RT_INLINE_ASM_USES_INTRIN
4147 _interlockedbittestandset((long *)pvBitmap, iBit);
4148# elif RT_INLINE_ASM_GNU_STYLE
4149 __asm__ __volatile__("lock; btsl %1, %0"
4150 : "=m" (*(volatile long *)pvBitmap)
4151 : "Ir" (iBit),
4152 "m" (*(volatile long *)pvBitmap)
4153 : "memory");
4154# else
4155 __asm
4156 {
4157# ifdef RT_ARCH_AMD64
4158 mov rax, [pvBitmap]
4159 mov edx, [iBit]
4160 lock bts [rax], edx
4161# else
4162 mov eax, [pvBitmap]
4163 mov edx, [iBit]
4164 lock bts [eax], edx
4165# endif
4166 }
4167# endif
4168}
4169#endif
4170
4171
4172/**
4173 * Clears a bit in a bitmap.
4174 *
4175 * @param pvBitmap Pointer to the bitmap.
4176 * @param iBit The bit to clear.
4177 *
4178 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4179 * However, doing so will yield better performance as well as avoiding
4180 * traps accessing the last bits in the bitmap.
4181 */
4182#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4183DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
4184#else
4185DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
4186{
4187# if RT_INLINE_ASM_USES_INTRIN
4188 _bittestandreset((long *)pvBitmap, iBit);
4189
4190# elif RT_INLINE_ASM_GNU_STYLE
4191 __asm__ __volatile__("btrl %1, %0"
4192 : "=m" (*(volatile long *)pvBitmap)
4193 : "Ir" (iBit),
4194 "m" (*(volatile long *)pvBitmap)
4195 : "memory");
4196# else
4197 __asm
4198 {
4199# ifdef RT_ARCH_AMD64
4200 mov rax, [pvBitmap]
4201 mov edx, [iBit]
4202 btr [rax], edx
4203# else
4204 mov eax, [pvBitmap]
4205 mov edx, [iBit]
4206 btr [eax], edx
4207# endif
4208 }
4209# endif
4210}
4211#endif
4212
4213
4214/**
4215 * Atomically clears a bit in a bitmap, ordered.
4216 *
4217 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4218 * the memory access isn't atomic!
4219 * @param iBit The bit to toggle set.
4220 *
4221 * @remarks No memory barrier, take care on smp.
4222 * @remarks x86: Requires a 386 or later.
4223 */
4224#if RT_INLINE_ASM_EXTERNAL
4225DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
4226#else
4227DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
4228{
4229 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4230# if RT_INLINE_ASM_GNU_STYLE
4231 __asm__ __volatile__("lock; btrl %1, %0"
4232 : "=m" (*(volatile long *)pvBitmap)
4233 : "Ir" (iBit),
4234 "m" (*(volatile long *)pvBitmap)
4235 : "memory");
4236# else
4237 __asm
4238 {
4239# ifdef RT_ARCH_AMD64
4240 mov rax, [pvBitmap]
4241 mov edx, [iBit]
4242 lock btr [rax], edx
4243# else
4244 mov eax, [pvBitmap]
4245 mov edx, [iBit]
4246 lock btr [eax], edx
4247# endif
4248 }
4249# endif
4250}
4251#endif
4252
4253
4254/**
4255 * Toggles a bit in a bitmap.
4256 *
4257 * @param pvBitmap Pointer to the bitmap.
4258 * @param iBit The bit to toggle.
4259 *
4260 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4261 * However, doing so will yield better performance as well as avoiding
4262 * traps accessing the last bits in the bitmap.
4263 */
4264#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4265DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
4266#else
4267DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
4268{
4269# if RT_INLINE_ASM_USES_INTRIN
4270 _bittestandcomplement((long *)pvBitmap, iBit);
4271# elif RT_INLINE_ASM_GNU_STYLE
4272 __asm__ __volatile__("btcl %1, %0"
4273 : "=m" (*(volatile long *)pvBitmap)
4274 : "Ir" (iBit),
4275 "m" (*(volatile long *)pvBitmap)
4276 : "memory");
4277# else
4278 __asm
4279 {
4280# ifdef RT_ARCH_AMD64
4281 mov rax, [pvBitmap]
4282 mov edx, [iBit]
4283 btc [rax], edx
4284# else
4285 mov eax, [pvBitmap]
4286 mov edx, [iBit]
4287 btc [eax], edx
4288# endif
4289 }
4290# endif
4291}
4292#endif
4293
4294
4295/**
4296 * Atomically toggles a bit in a bitmap, ordered.
4297 *
4298 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4299 * the memory access isn't atomic!
4300 * @param iBit The bit to test and set.
4301 *
4302 * @remarks x86: Requires a 386 or later.
4303 */
4304#if RT_INLINE_ASM_EXTERNAL
4305DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
4306#else
4307DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
4308{
4309 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4310# if RT_INLINE_ASM_GNU_STYLE
4311 __asm__ __volatile__("lock; btcl %1, %0"
4312 : "=m" (*(volatile long *)pvBitmap)
4313 : "Ir" (iBit),
4314 "m" (*(volatile long *)pvBitmap)
4315 : "memory");
4316# else
4317 __asm
4318 {
4319# ifdef RT_ARCH_AMD64
4320 mov rax, [pvBitmap]
4321 mov edx, [iBit]
4322 lock btc [rax], edx
4323# else
4324 mov eax, [pvBitmap]
4325 mov edx, [iBit]
4326 lock btc [eax], edx
4327# endif
4328 }
4329# endif
4330}
4331#endif
4332
4333
4334/**
4335 * Tests and sets a bit in a bitmap.
4336 *
4337 * @returns true if the bit was set.
4338 * @returns false if the bit was clear.
4339 *
4340 * @param pvBitmap Pointer to the bitmap.
4341 * @param iBit The bit to test and set.
4342 *
4343 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4344 * However, doing so will yield better performance as well as avoiding
4345 * traps accessing the last bits in the bitmap.
4346 */
4347#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4348DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4349#else
4350DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4351{
4352 union { bool f; uint32_t u32; uint8_t u8; } rc;
4353# if RT_INLINE_ASM_USES_INTRIN
4354 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
4355
4356# elif RT_INLINE_ASM_GNU_STYLE
4357 __asm__ __volatile__("btsl %2, %1\n\t"
4358 "setc %b0\n\t"
4359 "andl $1, %0\n\t"
4360 : "=q" (rc.u32),
4361 "=m" (*(volatile long *)pvBitmap)
4362 : "Ir" (iBit),
4363 "m" (*(volatile long *)pvBitmap)
4364 : "memory");
4365# else
4366 __asm
4367 {
4368 mov edx, [iBit]
4369# ifdef RT_ARCH_AMD64
4370 mov rax, [pvBitmap]
4371 bts [rax], edx
4372# else
4373 mov eax, [pvBitmap]
4374 bts [eax], edx
4375# endif
4376 setc al
4377 and eax, 1
4378 mov [rc.u32], eax
4379 }
4380# endif
4381 return rc.f;
4382}
4383#endif
4384
4385
4386/**
4387 * Atomically tests and sets a bit in a bitmap, ordered.
4388 *
4389 * @returns true if the bit was set.
4390 * @returns false if the bit was clear.
4391 *
4392 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4393 * the memory access isn't atomic!
4394 * @param iBit The bit to set.
4395 *
4396 * @remarks x86: Requires a 386 or later.
4397 */
4398#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4399DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4400#else
4401DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4402{
4403 union { bool f; uint32_t u32; uint8_t u8; } rc;
4404 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4405# if RT_INLINE_ASM_USES_INTRIN
4406 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
4407# elif RT_INLINE_ASM_GNU_STYLE
4408 __asm__ __volatile__("lock; btsl %2, %1\n\t"
4409 "setc %b0\n\t"
4410 "andl $1, %0\n\t"
4411 : "=q" (rc.u32),
4412 "=m" (*(volatile long *)pvBitmap)
4413 : "Ir" (iBit),
4414 "m" (*(volatile long *)pvBitmap)
4415 : "memory");
4416# else
4417 __asm
4418 {
4419 mov edx, [iBit]
4420# ifdef RT_ARCH_AMD64
4421 mov rax, [pvBitmap]
4422 lock bts [rax], edx
4423# else
4424 mov eax, [pvBitmap]
4425 lock bts [eax], edx
4426# endif
4427 setc al
4428 and eax, 1
4429 mov [rc.u32], eax
4430 }
4431# endif
4432 return rc.f;
4433}
4434#endif
4435
4436
4437/**
4438 * Tests and clears a bit in a bitmap.
4439 *
4440 * @returns true if the bit was set.
4441 * @returns false if the bit was clear.
4442 *
4443 * @param pvBitmap Pointer to the bitmap.
4444 * @param iBit The bit to test and clear.
4445 *
4446 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4447 * However, doing so will yield better performance as well as avoiding
4448 * traps accessing the last bits in the bitmap.
4449 */
4450#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4451DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4452#else
4453DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4454{
4455 union { bool f; uint32_t u32; uint8_t u8; } rc;
4456# if RT_INLINE_ASM_USES_INTRIN
4457 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
4458
4459# elif RT_INLINE_ASM_GNU_STYLE
4460 __asm__ __volatile__("btrl %2, %1\n\t"
4461 "setc %b0\n\t"
4462 "andl $1, %0\n\t"
4463 : "=q" (rc.u32),
4464 "=m" (*(volatile long *)pvBitmap)
4465 : "Ir" (iBit),
4466 "m" (*(volatile long *)pvBitmap)
4467 : "memory");
4468# else
4469 __asm
4470 {
4471 mov edx, [iBit]
4472# ifdef RT_ARCH_AMD64
4473 mov rax, [pvBitmap]
4474 btr [rax], edx
4475# else
4476 mov eax, [pvBitmap]
4477 btr [eax], edx
4478# endif
4479 setc al
4480 and eax, 1
4481 mov [rc.u32], eax
4482 }
4483# endif
4484 return rc.f;
4485}
4486#endif
4487
4488
4489/**
4490 * Atomically tests and clears a bit in a bitmap, ordered.
4491 *
4492 * @returns true if the bit was set.
4493 * @returns false if the bit was clear.
4494 *
4495 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4496 * the memory access isn't atomic!
4497 * @param iBit The bit to test and clear.
4498 *
4499 * @remarks No memory barrier, take care on smp.
4500 * @remarks x86: Requires a 386 or later.
4501 */
4502#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4503DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4504#else
4505DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4506{
4507 union { bool f; uint32_t u32; uint8_t u8; } rc;
4508 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4509# if RT_INLINE_ASM_USES_INTRIN
4510 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
4511
4512# elif RT_INLINE_ASM_GNU_STYLE
4513 __asm__ __volatile__("lock; btrl %2, %1\n\t"
4514 "setc %b0\n\t"
4515 "andl $1, %0\n\t"
4516 : "=q" (rc.u32),
4517 "=m" (*(volatile long *)pvBitmap)
4518 : "Ir" (iBit),
4519 "m" (*(volatile long *)pvBitmap)
4520 : "memory");
4521# else
4522 __asm
4523 {
4524 mov edx, [iBit]
4525# ifdef RT_ARCH_AMD64
4526 mov rax, [pvBitmap]
4527 lock btr [rax], edx
4528# else
4529 mov eax, [pvBitmap]
4530 lock btr [eax], edx
4531# endif
4532 setc al
4533 and eax, 1
4534 mov [rc.u32], eax
4535 }
4536# endif
4537 return rc.f;
4538}
4539#endif
4540
4541
4542/**
4543 * Tests and toggles a bit in a bitmap.
4544 *
4545 * @returns true if the bit was set.
4546 * @returns false if the bit was clear.
4547 *
4548 * @param pvBitmap Pointer to the bitmap.
4549 * @param iBit The bit to test and toggle.
4550 *
4551 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4552 * However, doing so will yield better performance as well as avoiding
4553 * traps accessing the last bits in the bitmap.
4554 */
4555#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4556DECLASM(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4557#else
4558DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4559{
4560 union { bool f; uint32_t u32; uint8_t u8; } rc;
4561# if RT_INLINE_ASM_USES_INTRIN
4562 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
4563
4564# elif RT_INLINE_ASM_GNU_STYLE
4565 __asm__ __volatile__("btcl %2, %1\n\t"
4566 "setc %b0\n\t"
4567 "andl $1, %0\n\t"
4568 : "=q" (rc.u32),
4569 "=m" (*(volatile long *)pvBitmap)
4570 : "Ir" (iBit),
4571 "m" (*(volatile long *)pvBitmap)
4572 : "memory");
4573# else
4574 __asm
4575 {
4576 mov edx, [iBit]
4577# ifdef RT_ARCH_AMD64
4578 mov rax, [pvBitmap]
4579 btc [rax], edx
4580# else
4581 mov eax, [pvBitmap]
4582 btc [eax], edx
4583# endif
4584 setc al
4585 and eax, 1
4586 mov [rc.u32], eax
4587 }
4588# endif
4589 return rc.f;
4590}
4591#endif
4592
4593
4594/**
4595 * Atomically tests and toggles a bit in a bitmap, ordered.
4596 *
4597 * @returns true if the bit was set.
4598 * @returns false if the bit was clear.
4599 *
4600 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4601 * the memory access isn't atomic!
4602 * @param iBit The bit to test and toggle.
4603 *
4604 * @remarks x86: Requires a 386 or later.
4605 */
4606#if RT_INLINE_ASM_EXTERNAL
4607DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4608#else
4609DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4610{
4611 union { bool f; uint32_t u32; uint8_t u8; } rc;
4612 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4613# if RT_INLINE_ASM_GNU_STYLE
4614 __asm__ __volatile__("lock; btcl %2, %1\n\t"
4615 "setc %b0\n\t"
4616 "andl $1, %0\n\t"
4617 : "=q" (rc.u32),
4618 "=m" (*(volatile long *)pvBitmap)
4619 : "Ir" (iBit),
4620 "m" (*(volatile long *)pvBitmap)
4621 : "memory");
4622# else
4623 __asm
4624 {
4625 mov edx, [iBit]
4626# ifdef RT_ARCH_AMD64
4627 mov rax, [pvBitmap]
4628 lock btc [rax], edx
4629# else
4630 mov eax, [pvBitmap]
4631 lock btc [eax], edx
4632# endif
4633 setc al
4634 and eax, 1
4635 mov [rc.u32], eax
4636 }
4637# endif
4638 return rc.f;
4639}
4640#endif
4641
4642
4643/**
4644 * Tests if a bit in a bitmap is set.
4645 *
4646 * @returns true if the bit is set.
4647 * @returns false if the bit is clear.
4648 *
4649 * @param pvBitmap Pointer to the bitmap.
4650 * @param iBit The bit to test.
4651 *
4652 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4653 * However, doing so will yield better performance as well as avoiding
4654 * traps accessing the last bits in the bitmap.
4655 */
4656#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4657DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
4658#else
4659DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
4660{
4661 union { bool f; uint32_t u32; uint8_t u8; } rc;
4662# if RT_INLINE_ASM_USES_INTRIN
4663 rc.u32 = _bittest((long *)pvBitmap, iBit);
4664# elif RT_INLINE_ASM_GNU_STYLE
4665
4666 __asm__ __volatile__("btl %2, %1\n\t"
4667 "setc %b0\n\t"
4668 "andl $1, %0\n\t"
4669 : "=q" (rc.u32)
4670 : "m" (*(const volatile long *)pvBitmap),
4671 "Ir" (iBit)
4672 : "memory");
4673# else
4674 __asm
4675 {
4676 mov edx, [iBit]
4677# ifdef RT_ARCH_AMD64
4678 mov rax, [pvBitmap]
4679 bt [rax], edx
4680# else
4681 mov eax, [pvBitmap]
4682 bt [eax], edx
4683# endif
4684 setc al
4685 and eax, 1
4686 mov [rc.u32], eax
4687 }
4688# endif
4689 return rc.f;
4690}
4691#endif
4692
4693
4694/**
4695 * Clears a bit range within a bitmap.
4696 *
4697 * @param pvBitmap Pointer to the bitmap.
4698 * @param iBitStart The First bit to clear.
4699 * @param iBitEnd The first bit not to clear.
4700 */
4701DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4702{
4703 if (iBitStart < iBitEnd)
4704 {
4705 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4706 int32_t iStart = iBitStart & ~31;
4707 int32_t iEnd = iBitEnd & ~31;
4708 if (iStart == iEnd)
4709 *pu32 &= ((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4710 else
4711 {
4712 /* bits in first dword. */
4713 if (iBitStart & 31)
4714 {
4715 *pu32 &= (UINT32_C(1) << (iBitStart & 31)) - 1;
4716 pu32++;
4717 iBitStart = iStart + 32;
4718 }
4719
4720 /* whole dword. */
4721 if (iBitStart != iEnd)
4722 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4723
4724 /* bits in last dword. */
4725 if (iBitEnd & 31)
4726 {
4727 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4728 *pu32 &= ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4729 }
4730 }
4731 }
4732}
4733
4734
4735/**
4736 * Sets a bit range within a bitmap.
4737 *
4738 * @param pvBitmap Pointer to the bitmap.
4739 * @param iBitStart The First bit to set.
4740 * @param iBitEnd The first bit not to set.
4741 */
4742DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4743{
4744 if (iBitStart < iBitEnd)
4745 {
4746 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4747 int32_t iStart = iBitStart & ~31;
4748 int32_t iEnd = iBitEnd & ~31;
4749 if (iStart == iEnd)
4750 *pu32 |= ((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31);
4751 else
4752 {
4753 /* bits in first dword. */
4754 if (iBitStart & 31)
4755 {
4756 *pu32 |= ~((UINT32_C(1) << (iBitStart & 31)) - 1);
4757 pu32++;
4758 iBitStart = iStart + 32;
4759 }
4760
4761 /* whole dword. */
4762 if (iBitStart != iEnd)
4763 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
4764
4765 /* bits in last dword. */
4766 if (iBitEnd & 31)
4767 {
4768 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4769 *pu32 |= (UINT32_C(1) << (iBitEnd & 31)) - 1;
4770 }
4771 }
4772 }
4773}
4774
4775
4776/**
4777 * Finds the first clear bit in a bitmap.
4778 *
4779 * @returns Index of the first zero bit.
4780 * @returns -1 if no clear bit was found.
4781 * @param pvBitmap Pointer to the bitmap.
4782 * @param cBits The number of bits in the bitmap. Multiple of 32.
4783 */
4784#if RT_INLINE_ASM_EXTERNAL
4785DECLASM(int32_t) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
4786#else
4787DECLINLINE(int32_t) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
4788{
4789 if (cBits)
4790 {
4791 int32_t iBit;
4792# if RT_INLINE_ASM_GNU_STYLE
4793 RTCCUINTREG uEAX, uECX, uEDI;
4794 cBits = RT_ALIGN_32(cBits, 32);
4795 __asm__ __volatile__("repe; scasl\n\t"
4796 "je 1f\n\t"
4797# ifdef RT_ARCH_AMD64
4798 "lea -4(%%rdi), %%rdi\n\t"
4799 "xorl (%%rdi), %%eax\n\t"
4800 "subq %5, %%rdi\n\t"
4801# else
4802 "lea -4(%%edi), %%edi\n\t"
4803 "xorl (%%edi), %%eax\n\t"
4804 "subl %5, %%edi\n\t"
4805# endif
4806 "shll $3, %%edi\n\t"
4807 "bsfl %%eax, %%edx\n\t"
4808 "addl %%edi, %%edx\n\t"
4809 "1:\t\n"
4810 : "=d" (iBit),
4811 "=&c" (uECX),
4812 "=&D" (uEDI),
4813 "=&a" (uEAX)
4814 : "0" (0xffffffff),
4815 "mr" (pvBitmap),
4816 "1" (cBits >> 5),
4817 "2" (pvBitmap),
4818 "3" (0xffffffff));
4819# else
4820 cBits = RT_ALIGN_32(cBits, 32);
4821 __asm
4822 {
4823# ifdef RT_ARCH_AMD64
4824 mov rdi, [pvBitmap]
4825 mov rbx, rdi
4826# else
4827 mov edi, [pvBitmap]
4828 mov ebx, edi
4829# endif
4830 mov edx, 0ffffffffh
4831 mov eax, edx
4832 mov ecx, [cBits]
4833 shr ecx, 5
4834 repe scasd
4835 je done
4836
4837# ifdef RT_ARCH_AMD64
4838 lea rdi, [rdi - 4]
4839 xor eax, [rdi]
4840 sub rdi, rbx
4841# else
4842 lea edi, [edi - 4]
4843 xor eax, [edi]
4844 sub edi, ebx
4845# endif
4846 shl edi, 3
4847 bsf edx, eax
4848 add edx, edi
4849 done:
4850 mov [iBit], edx
4851 }
4852# endif
4853 return iBit;
4854 }
4855 return -1;
4856}
4857#endif
4858
4859
4860/**
4861 * Finds the next clear bit in a bitmap.
4862 *
4863 * @returns Index of the first zero bit.
4864 * @returns -1 if no clear bit was found.
4865 * @param pvBitmap Pointer to the bitmap.
4866 * @param cBits The number of bits in the bitmap. Multiple of 32.
4867 * @param iBitPrev The bit returned from the last search.
4868 * The search will start at iBitPrev + 1.
4869 */
4870#if RT_INLINE_ASM_EXTERNAL
4871DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4872#else
4873DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4874{
4875 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4876 int iBit = ++iBitPrev & 31;
4877 if (iBit)
4878 {
4879 /*
4880 * Inspect the 32-bit word containing the unaligned bit.
4881 */
4882 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
4883
4884# if RT_INLINE_ASM_USES_INTRIN
4885 unsigned long ulBit = 0;
4886 if (_BitScanForward(&ulBit, u32))
4887 return ulBit + iBitPrev;
4888# else
4889# if RT_INLINE_ASM_GNU_STYLE
4890 __asm__ __volatile__("bsf %1, %0\n\t"
4891 "jnz 1f\n\t"
4892 "movl $-1, %0\n\t"
4893 "1:\n\t"
4894 : "=r" (iBit)
4895 : "r" (u32));
4896# else
4897 __asm
4898 {
4899 mov edx, [u32]
4900 bsf eax, edx
4901 jnz done
4902 mov eax, 0ffffffffh
4903 done:
4904 mov [iBit], eax
4905 }
4906# endif
4907 if (iBit >= 0)
4908 return iBit + iBitPrev;
4909# endif
4910
4911 /*
4912 * Skip ahead and see if there is anything left to search.
4913 */
4914 iBitPrev |= 31;
4915 iBitPrev++;
4916 if (cBits <= (uint32_t)iBitPrev)
4917 return -1;
4918 }
4919
4920 /*
4921 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4922 */
4923 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4924 if (iBit >= 0)
4925 iBit += iBitPrev;
4926 return iBit;
4927}
4928#endif
4929
4930
4931/**
4932 * Finds the first set bit in a bitmap.
4933 *
4934 * @returns Index of the first set bit.
4935 * @returns -1 if no clear bit was found.
4936 * @param pvBitmap Pointer to the bitmap.
4937 * @param cBits The number of bits in the bitmap. Multiple of 32.
4938 */
4939#if RT_INLINE_ASM_EXTERNAL
4940DECLASM(int32_t) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
4941#else
4942DECLINLINE(int32_t) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
4943{
4944 if (cBits)
4945 {
4946 int32_t iBit;
4947# if RT_INLINE_ASM_GNU_STYLE
4948 RTCCUINTREG uEAX, uECX, uEDI;
4949 cBits = RT_ALIGN_32(cBits, 32);
4950 __asm__ __volatile__("repe; scasl\n\t"
4951 "je 1f\n\t"
4952# ifdef RT_ARCH_AMD64
4953 "lea -4(%%rdi), %%rdi\n\t"
4954 "movl (%%rdi), %%eax\n\t"
4955 "subq %5, %%rdi\n\t"
4956# else
4957 "lea -4(%%edi), %%edi\n\t"
4958 "movl (%%edi), %%eax\n\t"
4959 "subl %5, %%edi\n\t"
4960# endif
4961 "shll $3, %%edi\n\t"
4962 "bsfl %%eax, %%edx\n\t"
4963 "addl %%edi, %%edx\n\t"
4964 "1:\t\n"
4965 : "=d" (iBit),
4966 "=&c" (uECX),
4967 "=&D" (uEDI),
4968 "=&a" (uEAX)
4969 : "0" (0xffffffff),
4970 "mr" (pvBitmap),
4971 "1" (cBits >> 5),
4972 "2" (pvBitmap),
4973 "3" (0));
4974# else
4975 cBits = RT_ALIGN_32(cBits, 32);
4976 __asm
4977 {
4978# ifdef RT_ARCH_AMD64
4979 mov rdi, [pvBitmap]
4980 mov rbx, rdi
4981# else
4982 mov edi, [pvBitmap]
4983 mov ebx, edi
4984# endif
4985 mov edx, 0ffffffffh
4986 xor eax, eax
4987 mov ecx, [cBits]
4988 shr ecx, 5
4989 repe scasd
4990 je done
4991# ifdef RT_ARCH_AMD64
4992 lea rdi, [rdi - 4]
4993 mov eax, [rdi]
4994 sub rdi, rbx
4995# else
4996 lea edi, [edi - 4]
4997 mov eax, [edi]
4998 sub edi, ebx
4999# endif
5000 shl edi, 3
5001 bsf edx, eax
5002 add edx, edi
5003 done:
5004 mov [iBit], edx
5005 }
5006# endif
5007 return iBit;
5008 }
5009 return -1;
5010}
5011#endif
5012
5013
5014/**
5015 * Finds the next set bit in a bitmap.
5016 *
5017 * @returns Index of the next set bit.
5018 * @returns -1 if no set bit was found.
5019 * @param pvBitmap Pointer to the bitmap.
5020 * @param cBits The number of bits in the bitmap. Multiple of 32.
5021 * @param iBitPrev The bit returned from the last search.
5022 * The search will start at iBitPrev + 1.
5023 */
5024#if RT_INLINE_ASM_EXTERNAL
5025DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5026#else
5027DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5028{
5029 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
5030 int iBit = ++iBitPrev & 31;
5031 if (iBit)
5032 {
5033 /*
5034 * Inspect the 32-bit word containing the unaligned bit.
5035 */
5036 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
5037
5038# if RT_INLINE_ASM_USES_INTRIN
5039 unsigned long ulBit = 0;
5040 if (_BitScanForward(&ulBit, u32))
5041 return ulBit + iBitPrev;
5042# else
5043# if RT_INLINE_ASM_GNU_STYLE
5044 __asm__ __volatile__("bsf %1, %0\n\t"
5045 "jnz 1f\n\t"
5046 "movl $-1, %0\n\t"
5047 "1:\n\t"
5048 : "=r" (iBit)
5049 : "r" (u32));
5050# else
5051 __asm
5052 {
5053 mov edx, [u32]
5054 bsf eax, edx
5055 jnz done
5056 mov eax, 0ffffffffh
5057 done:
5058 mov [iBit], eax
5059 }
5060# endif
5061 if (iBit >= 0)
5062 return iBit + iBitPrev;
5063# endif
5064
5065 /*
5066 * Skip ahead and see if there is anything left to search.
5067 */
5068 iBitPrev |= 31;
5069 iBitPrev++;
5070 if (cBits <= (uint32_t)iBitPrev)
5071 return -1;
5072 }
5073
5074 /*
5075 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
5076 */
5077 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
5078 if (iBit >= 0)
5079 iBit += iBitPrev;
5080 return iBit;
5081}
5082#endif
5083
5084
5085/**
5086 * Finds the first bit which is set in the given 32-bit integer.
5087 * Bits are numbered from 1 (least significant) to 32.
5088 *
5089 * @returns index [1..32] of the first set bit.
5090 * @returns 0 if all bits are cleared.
5091 * @param u32 Integer to search for set bits.
5092 * @remarks Similar to ffs() in BSD.
5093 */
5094#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5095DECLASM(unsigned) ASMBitFirstSetU32(uint32_t u32);
5096#else
5097DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5098{
5099# if RT_INLINE_ASM_USES_INTRIN
5100 unsigned long iBit;
5101 if (_BitScanForward(&iBit, u32))
5102 iBit++;
5103 else
5104 iBit = 0;
5105# elif RT_INLINE_ASM_GNU_STYLE
5106 uint32_t iBit;
5107 __asm__ __volatile__("bsf %1, %0\n\t"
5108 "jnz 1f\n\t"
5109 "xorl %0, %0\n\t"
5110 "jmp 2f\n"
5111 "1:\n\t"
5112 "incl %0\n"
5113 "2:\n\t"
5114 : "=r" (iBit)
5115 : "rm" (u32));
5116# else
5117 uint32_t iBit;
5118 _asm
5119 {
5120 bsf eax, [u32]
5121 jnz found
5122 xor eax, eax
5123 jmp done
5124 found:
5125 inc eax
5126 done:
5127 mov [iBit], eax
5128 }
5129# endif
5130 return iBit;
5131}
5132#endif
5133
5134
5135/**
5136 * Finds the first bit which is set in the given 32-bit integer.
5137 * Bits are numbered from 1 (least significant) to 32.
5138 *
5139 * @returns index [1..32] of the first set bit.
5140 * @returns 0 if all bits are cleared.
5141 * @param i32 Integer to search for set bits.
5142 * @remark Similar to ffs() in BSD.
5143 */
5144DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5145{
5146 return ASMBitFirstSetU32((uint32_t)i32);
5147}
5148
5149
5150/**
5151 * Finds the first bit which is set in the given 64-bit integer.
5152 *
5153 * Bits are numbered from 1 (least significant) to 64.
5154 *
5155 * @returns index [1..64] of the first set bit.
5156 * @returns 0 if all bits are cleared.
5157 * @param u64 Integer to search for set bits.
5158 * @remarks Similar to ffs() in BSD.
5159 */
5160#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5161DECLASM(unsigned) ASMBitFirstSetU64(uint64_t u64);
5162#else
5163DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64)
5164{
5165# if RT_INLINE_ASM_USES_INTRIN
5166 unsigned long iBit;
5167# if ARCH_BITS == 64
5168 if (_BitScanForward64(&iBit, u64))
5169 iBit++;
5170 else
5171 iBit = 0;
5172# else
5173 if (_BitScanForward(&iBit, (uint32_t)u64))
5174 iBit++;
5175 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
5176 iBit += 33;
5177 else
5178 iBit = 0;
5179# endif
5180# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5181 uint64_t iBit;
5182 __asm__ __volatile__("bsfq %1, %0\n\t"
5183 "jnz 1f\n\t"
5184 "xorl %0, %0\n\t"
5185 "jmp 2f\n"
5186 "1:\n\t"
5187 "incl %0\n"
5188 "2:\n\t"
5189 : "=r" (iBit)
5190 : "rm" (u64));
5191# else
5192 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
5193 if (!iBit)
5194 {
5195 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
5196 if (iBit)
5197 iBit += 32;
5198 }
5199# endif
5200 return (unsigned)iBit;
5201}
5202#endif
5203
5204
5205/**
5206 * Finds the first bit which is set in the given 16-bit integer.
5207 *
5208 * Bits are numbered from 1 (least significant) to 16.
5209 *
5210 * @returns index [1..16] of the first set bit.
5211 * @returns 0 if all bits are cleared.
5212 * @param u16 Integer to search for set bits.
5213 * @remarks For 16-bit bs3kit code.
5214 */
5215#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5216DECLASM(unsigned) ASMBitFirstSetU16(uint16_t u16);
5217#else
5218DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16)
5219{
5220 return ASMBitFirstSetU32((uint32_t)u16);
5221}
5222#endif
5223
5224
5225/**
5226 * Finds the last bit which is set in the given 32-bit integer.
5227 * Bits are numbered from 1 (least significant) to 32.
5228 *
5229 * @returns index [1..32] of the last set bit.
5230 * @returns 0 if all bits are cleared.
5231 * @param u32 Integer to search for set bits.
5232 * @remark Similar to fls() in BSD.
5233 */
5234#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5235DECLASM(unsigned) ASMBitLastSetU32(uint32_t u32);
5236#else
5237DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5238{
5239# if RT_INLINE_ASM_USES_INTRIN
5240 unsigned long iBit;
5241 if (_BitScanReverse(&iBit, u32))
5242 iBit++;
5243 else
5244 iBit = 0;
5245# elif RT_INLINE_ASM_GNU_STYLE
5246 uint32_t iBit;
5247 __asm__ __volatile__("bsrl %1, %0\n\t"
5248 "jnz 1f\n\t"
5249 "xorl %0, %0\n\t"
5250 "jmp 2f\n"
5251 "1:\n\t"
5252 "incl %0\n"
5253 "2:\n\t"
5254 : "=r" (iBit)
5255 : "rm" (u32));
5256# else
5257 uint32_t iBit;
5258 _asm
5259 {
5260 bsr eax, [u32]
5261 jnz found
5262 xor eax, eax
5263 jmp done
5264 found:
5265 inc eax
5266 done:
5267 mov [iBit], eax
5268 }
5269# endif
5270 return iBit;
5271}
5272#endif
5273
5274
5275/**
5276 * Finds the last bit which is set in the given 32-bit integer.
5277 * Bits are numbered from 1 (least significant) to 32.
5278 *
5279 * @returns index [1..32] of the last set bit.
5280 * @returns 0 if all bits are cleared.
5281 * @param i32 Integer to search for set bits.
5282 * @remark Similar to fls() in BSD.
5283 */
5284DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5285{
5286 return ASMBitLastSetU32((uint32_t)i32);
5287}
5288
5289
5290/**
5291 * Finds the last bit which is set in the given 64-bit integer.
5292 *
5293 * Bits are numbered from 1 (least significant) to 64.
5294 *
5295 * @returns index [1..64] of the last set bit.
5296 * @returns 0 if all bits are cleared.
5297 * @param u64 Integer to search for set bits.
5298 * @remark Similar to fls() in BSD.
5299 */
5300#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5301DECLASM(unsigned) ASMBitLastSetU64(uint64_t u64);
5302#else
5303DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64)
5304{
5305# if RT_INLINE_ASM_USES_INTRIN
5306 unsigned long iBit;
5307# if ARCH_BITS == 64
5308 if (_BitScanReverse64(&iBit, u64))
5309 iBit++;
5310 else
5311 iBit = 0;
5312# else
5313 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
5314 iBit += 33;
5315 else if (_BitScanReverse(&iBit, (uint32_t)u64))
5316 iBit++;
5317 else
5318 iBit = 0;
5319# endif
5320# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5321 uint64_t iBit;
5322 __asm__ __volatile__("bsrq %1, %0\n\t"
5323 "jnz 1f\n\t"
5324 "xorl %0, %0\n\t"
5325 "jmp 2f\n"
5326 "1:\n\t"
5327 "incl %0\n"
5328 "2:\n\t"
5329 : "=r" (iBit)
5330 : "rm" (u64));
5331# else
5332 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
5333 if (iBit)
5334 iBit += 32;
5335 else
5336 iBit = ASMBitLastSetU32((uint32_t)u64);
5337#endif
5338 return (unsigned)iBit;
5339}
5340#endif
5341
5342
5343/**
5344 * Finds the last bit which is set in the given 16-bit integer.
5345 *
5346 * Bits are numbered from 1 (least significant) to 16.
5347 *
5348 * @returns index [1..16] of the last set bit.
5349 * @returns 0 if all bits are cleared.
5350 * @param u16 Integer to search for set bits.
5351 * @remarks For 16-bit bs3kit code.
5352 */
5353#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5354DECLASM(unsigned) ASMBitLastSetU16(uint16_t u16);
5355#else
5356DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16)
5357{
5358 return ASMBitLastSetU32((uint32_t)u16);
5359}
5360#endif
5361
5362
5363/**
5364 * Reverse the byte order of the given 16-bit integer.
5365 *
5366 * @returns Revert
5367 * @param u16 16-bit integer value.
5368 */
5369#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5370DECLASM(uint16_t) ASMByteSwapU16(uint16_t u16);
5371#else
5372DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
5373{
5374# if RT_INLINE_ASM_USES_INTRIN
5375 u16 = _byteswap_ushort(u16);
5376# elif RT_INLINE_ASM_GNU_STYLE
5377 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
5378# else
5379 _asm
5380 {
5381 mov ax, [u16]
5382 ror ax, 8
5383 mov [u16], ax
5384 }
5385# endif
5386 return u16;
5387}
5388#endif
5389
5390
5391/**
5392 * Reverse the byte order of the given 32-bit integer.
5393 *
5394 * @returns Revert
5395 * @param u32 32-bit integer value.
5396 */
5397#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5398DECLASM(uint32_t) ASMByteSwapU32(uint32_t u32);
5399#else
5400DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5401{
5402# if RT_INLINE_ASM_USES_INTRIN
5403 u32 = _byteswap_ulong(u32);
5404# elif RT_INLINE_ASM_GNU_STYLE
5405 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5406# else
5407 _asm
5408 {
5409 mov eax, [u32]
5410 bswap eax
5411 mov [u32], eax
5412 }
5413# endif
5414 return u32;
5415}
5416#endif
5417
5418
5419/**
5420 * Reverse the byte order of the given 64-bit integer.
5421 *
5422 * @returns Revert
5423 * @param u64 64-bit integer value.
5424 */
5425DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
5426{
5427#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
5428 u64 = _byteswap_uint64(u64);
5429#else
5430 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
5431 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
5432#endif
5433 return u64;
5434}
5435
5436
5437/**
5438 * Rotate 32-bit unsigned value to the left by @a cShift.
5439 *
5440 * @returns Rotated value.
5441 * @param u32 The value to rotate.
5442 * @param cShift How many bits to rotate by.
5443 */
5444#ifdef __WATCOMC__
5445DECLASM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift);
5446#else
5447DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift)
5448{
5449# if RT_INLINE_ASM_USES_INTRIN
5450 return _rotl(u32, cShift);
5451# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5452 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5453 return u32;
5454# else
5455 cShift &= 31;
5456 return (u32 << cShift) | (u32 >> (32 - cShift));
5457# endif
5458}
5459#endif
5460
5461
5462/**
5463 * Rotate 32-bit unsigned value to the right by @a cShift.
5464 *
5465 * @returns Rotated value.
5466 * @param u32 The value to rotate.
5467 * @param cShift How many bits to rotate by.
5468 */
5469#ifdef __WATCOMC__
5470DECLASM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift);
5471#else
5472DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift)
5473{
5474# if RT_INLINE_ASM_USES_INTRIN
5475 return _rotr(u32, cShift);
5476# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5477 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5478 return u32;
5479# else
5480 cShift &= 31;
5481 return (u32 >> cShift) | (u32 << (32 - cShift));
5482# endif
5483}
5484#endif
5485
5486
5487/**
5488 * Rotate 64-bit unsigned value to the left by @a cShift.
5489 *
5490 * @returns Rotated value.
5491 * @param u64 The value to rotate.
5492 * @param cShift How many bits to rotate by.
5493 */
5494DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift)
5495{
5496#if RT_INLINE_ASM_USES_INTRIN
5497 return _rotl64(u64, cShift);
5498#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5499 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5500 return u64;
5501#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5502 uint32_t uSpill;
5503 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5504 "jz 1f\n\t"
5505 "xchgl %%eax, %%edx\n\t"
5506 "1:\n\t"
5507 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5508 "jz 2f\n\t"
5509 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5510 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
5511 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
5512 "2:\n\t" /* } */
5513 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5514 : "0" (u64),
5515 "1" (cShift));
5516 return u64;
5517#else
5518 cShift &= 63;
5519 return (u64 << cShift) | (u64 >> (64 - cShift));
5520#endif
5521}
5522
5523
5524/**
5525 * Rotate 64-bit unsigned value to the right by @a cShift.
5526 *
5527 * @returns Rotated value.
5528 * @param u64 The value to rotate.
5529 * @param cShift How many bits to rotate by.
5530 */
5531DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift)
5532{
5533#if RT_INLINE_ASM_USES_INTRIN
5534 return _rotr64(u64, cShift);
5535#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5536 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5537 return u64;
5538#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5539 uint32_t uSpill;
5540 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5541 "jz 1f\n\t"
5542 "xchgl %%eax, %%edx\n\t"
5543 "1:\n\t"
5544 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5545 "jz 2f\n\t"
5546 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5547 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
5548 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
5549 "2:\n\t" /* } */
5550 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5551 : "0" (u64),
5552 "1" (cShift));
5553 return u64;
5554#else
5555 cShift &= 63;
5556 return (u64 >> cShift) | (u64 << (64 - cShift));
5557#endif
5558}
5559
5560/** @} */
5561
5562
5563/** @} */
5564
5565#endif
5566
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette