VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 36595

Last change on this file since 36595 was 36595, checked in by vboxsync, 14 years ago

iprt/asm.h: Added ASMAtomic[Dec|Inc|Add|Sub]Z.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 131.8 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2010 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37/* Solaris 10 header ugliness */
38#ifdef u
39# undef u
40#endif
41
42#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
43# include <intrin.h>
44 /* Emit the intrinsics at all optimization levels. */
45# pragma intrinsic(_ReadWriteBarrier)
46# pragma intrinsic(__cpuid)
47# pragma intrinsic(__stosd)
48# pragma intrinsic(__stosw)
49# pragma intrinsic(__stosb)
50# pragma intrinsic(_BitScanForward)
51# pragma intrinsic(_BitScanReverse)
52# pragma intrinsic(_bittest)
53# pragma intrinsic(_bittestandset)
54# pragma intrinsic(_bittestandreset)
55# pragma intrinsic(_bittestandcomplement)
56# pragma intrinsic(_byteswap_ushort)
57# pragma intrinsic(_byteswap_ulong)
58# pragma intrinsic(_interlockedbittestandset)
59# pragma intrinsic(_interlockedbittestandreset)
60# pragma intrinsic(_InterlockedAnd)
61# pragma intrinsic(_InterlockedOr)
62# pragma intrinsic(_InterlockedIncrement)
63# pragma intrinsic(_InterlockedDecrement)
64# pragma intrinsic(_InterlockedExchange)
65# pragma intrinsic(_InterlockedExchangeAdd)
66# pragma intrinsic(_InterlockedCompareExchange)
67# pragma intrinsic(_InterlockedCompareExchange64)
68# ifdef RT_ARCH_AMD64
69# pragma intrinsic(__stosq)
70# pragma intrinsic(_byteswap_uint64)
71# pragma intrinsic(_InterlockedExchange64)
72# pragma intrinsic(_InterlockedExchangeAdd64)
73# pragma intrinsic(_InterlockedAnd64)
74# pragma intrinsic(_InterlockedOr64)
75# pragma intrinsic(_InterlockedIncrement64)
76# pragma intrinsic(_InterlockedDecrement64)
77# endif
78#endif
79
80
81/** @defgroup grp_rt_asm ASM - Assembly Routines
82 * @ingroup grp_rt
83 *
84 * @remarks The difference between ordered and unordered atomic operations are that
85 * the former will complete outstanding reads and writes before continuing
86 * while the latter doesn't make any promisses about the order. Ordered
87 * operations doesn't, it seems, make any 100% promise wrt to whether
88 * the operation will complete before any subsequent memory access.
89 * (please, correct if wrong.)
90 *
91 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
92 * are unordered (note the Uo).
93 *
94 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
95 * or even optimize assembler instructions away. For instance, in the following code
96 * the second rdmsr instruction is optimized away because gcc treats that instruction
97 * as deterministic:
98 *
99 * @code
100 * static inline uint64_t rdmsr_low(int idx)
101 * {
102 * uint32_t low;
103 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
104 * }
105 * ...
106 * uint32_t msr1 = rdmsr_low(1);
107 * foo(msr1);
108 * msr1 = rdmsr_low(1);
109 * bar(msr1);
110 * @endcode
111 *
112 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
113 * use the result of the first call as input parameter for bar() as well. For rdmsr this
114 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
115 * machine status information in general.
116 *
117 * @{
118 */
119
120
121/** @def RT_INLINE_ASM_GCC_4_3_X_X86
122 * Used to work around some 4.3.x register allocation issues in this version of
123 * the compiler. So far this workaround is still required for 4.4 and 4.5. */
124#ifdef __GNUC__
125# define RT_INLINE_ASM_GCC_4_3_X_X86 (__GNUC__ == 4 && __GNUC_MINOR__ >= 3 && defined(__i386__))
126#endif
127#ifndef RT_INLINE_ASM_GCC_4_3_X_X86
128# define RT_INLINE_ASM_GCC_4_3_X_X86 0
129#endif
130
131/** @def RT_INLINE_DONT_USE_CMPXCHG8B
132 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
133 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
134 * mode, x86.
135 *
136 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
137 * when in PIC mode on x86.
138 */
139#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
140# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC \
141 ( (defined(PIC) || defined(__PIC__)) \
142 && defined(RT_ARCH_X86) \
143 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
144 || defined(RT_OS_DARWIN)) )
145#endif
146
147
148/** @def ASMReturnAddress
149 * Gets the return address of the current (or calling if you like) function or method.
150 */
151#ifdef _MSC_VER
152# ifdef __cplusplus
153extern "C"
154# endif
155void * _ReturnAddress(void);
156# pragma intrinsic(_ReturnAddress)
157# define ASMReturnAddress() _ReturnAddress()
158#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
159# define ASMReturnAddress() __builtin_return_address(0)
160#else
161# error "Unsupported compiler."
162#endif
163
164
165/**
166 * Compiler memory barrier.
167 *
168 * Ensure that the compiler does not use any cached (register/tmp stack) memory
169 * values or any outstanding writes when returning from this function.
170 *
171 * This function must be used if non-volatile data is modified by a
172 * device or the VMM. Typical cases are port access, MMIO access,
173 * trapping instruction, etc.
174 */
175#if RT_INLINE_ASM_GNU_STYLE
176# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
177#elif RT_INLINE_ASM_USES_INTRIN
178# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
179#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
180DECLINLINE(void) ASMCompilerBarrier(void)
181{
182 __asm
183 {
184 }
185}
186#endif
187
188
189/** @def ASMBreakpoint
190 * Debugger Breakpoint.
191 * @remark In the gnu world we add a nop instruction after the int3 to
192 * force gdb to remain at the int3 source line.
193 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
194 * @internal
195 */
196#if RT_INLINE_ASM_GNU_STYLE
197# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
198# ifndef __L4ENV__
199# define ASMBreakpoint() do { __asm__ __volatile__("int3\n\tnop"); } while (0)
200# else
201# define ASMBreakpoint() do { __asm__ __volatile__("int3; jmp 1f; 1:"); } while (0)
202# endif
203# elif defined(RT_ARCH_SPARC64)
204# define ASMBreakpoint() do { __asm__ __volatile__("illtrap 0\n\t") } while (0) /** @todo Sparc64: this is just a wild guess. */
205# elif defined(RT_ARCH_SPARC)
206# define ASMBreakpoint() do { __asm__ __volatile__("unimp 0\n\t"); } while (0) /** @todo Sparc: this is just a wild guess (same as Sparc64, just different name). */
207# else
208# error "PORTME"
209# endif
210#else
211# define ASMBreakpoint() __debugbreak()
212#endif
213
214
215/**
216 * Spinloop hint for platforms that have these, empty function on the other
217 * platforms.
218 *
219 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
220 * spin locks.
221 */
222#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
223DECLASM(void) ASMNopPause(void);
224#else
225DECLINLINE(void) ASMNopPause(void)
226{
227# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
228# if RT_INLINE_ASM_GNU_STYLE
229 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
230# else
231 __asm {
232 _emit 0f3h
233 _emit 090h
234 }
235# endif
236# else
237 /* dummy */
238# endif
239}
240#endif
241
242
243/**
244 * Atomically Exchange an unsigned 8-bit value, ordered.
245 *
246 * @returns Current *pu8 value
247 * @param pu8 Pointer to the 8-bit variable to update.
248 * @param u8 The 8-bit value to assign to *pu8.
249 */
250#if RT_INLINE_ASM_EXTERNAL
251DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
252#else
253DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
254{
255# if RT_INLINE_ASM_GNU_STYLE
256 __asm__ __volatile__("xchgb %0, %1\n\t"
257 : "=m" (*pu8),
258 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
259 : "1" (u8),
260 "m" (*pu8));
261# else
262 __asm
263 {
264# ifdef RT_ARCH_AMD64
265 mov rdx, [pu8]
266 mov al, [u8]
267 xchg [rdx], al
268 mov [u8], al
269# else
270 mov edx, [pu8]
271 mov al, [u8]
272 xchg [edx], al
273 mov [u8], al
274# endif
275 }
276# endif
277 return u8;
278}
279#endif
280
281
282/**
283 * Atomically Exchange a signed 8-bit value, ordered.
284 *
285 * @returns Current *pu8 value
286 * @param pi8 Pointer to the 8-bit variable to update.
287 * @param i8 The 8-bit value to assign to *pi8.
288 */
289DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
290{
291 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
292}
293
294
295/**
296 * Atomically Exchange a bool value, ordered.
297 *
298 * @returns Current *pf value
299 * @param pf Pointer to the 8-bit variable to update.
300 * @param f The 8-bit value to assign to *pi8.
301 */
302DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
303{
304#ifdef _MSC_VER
305 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
306#else
307 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
308#endif
309}
310
311
312/**
313 * Atomically Exchange an unsigned 16-bit value, ordered.
314 *
315 * @returns Current *pu16 value
316 * @param pu16 Pointer to the 16-bit variable to update.
317 * @param u16 The 16-bit value to assign to *pu16.
318 */
319#if RT_INLINE_ASM_EXTERNAL
320DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
321#else
322DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
323{
324# if RT_INLINE_ASM_GNU_STYLE
325 __asm__ __volatile__("xchgw %0, %1\n\t"
326 : "=m" (*pu16),
327 "=r" (u16)
328 : "1" (u16),
329 "m" (*pu16));
330# else
331 __asm
332 {
333# ifdef RT_ARCH_AMD64
334 mov rdx, [pu16]
335 mov ax, [u16]
336 xchg [rdx], ax
337 mov [u16], ax
338# else
339 mov edx, [pu16]
340 mov ax, [u16]
341 xchg [edx], ax
342 mov [u16], ax
343# endif
344 }
345# endif
346 return u16;
347}
348#endif
349
350
351/**
352 * Atomically Exchange a signed 16-bit value, ordered.
353 *
354 * @returns Current *pu16 value
355 * @param pi16 Pointer to the 16-bit variable to update.
356 * @param i16 The 16-bit value to assign to *pi16.
357 */
358DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
359{
360 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
361}
362
363
364/**
365 * Atomically Exchange an unsigned 32-bit value, ordered.
366 *
367 * @returns Current *pu32 value
368 * @param pu32 Pointer to the 32-bit variable to update.
369 * @param u32 The 32-bit value to assign to *pu32.
370 */
371#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
372DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
373#else
374DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
375{
376# if RT_INLINE_ASM_GNU_STYLE
377 __asm__ __volatile__("xchgl %0, %1\n\t"
378 : "=m" (*pu32),
379 "=r" (u32)
380 : "1" (u32),
381 "m" (*pu32));
382
383# elif RT_INLINE_ASM_USES_INTRIN
384 u32 = _InterlockedExchange((long *)pu32, u32);
385
386# else
387 __asm
388 {
389# ifdef RT_ARCH_AMD64
390 mov rdx, [pu32]
391 mov eax, u32
392 xchg [rdx], eax
393 mov [u32], eax
394# else
395 mov edx, [pu32]
396 mov eax, u32
397 xchg [edx], eax
398 mov [u32], eax
399# endif
400 }
401# endif
402 return u32;
403}
404#endif
405
406
407/**
408 * Atomically Exchange a signed 32-bit value, ordered.
409 *
410 * @returns Current *pu32 value
411 * @param pi32 Pointer to the 32-bit variable to update.
412 * @param i32 The 32-bit value to assign to *pi32.
413 */
414DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
415{
416 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
417}
418
419
420/**
421 * Atomically Exchange an unsigned 64-bit value, ordered.
422 *
423 * @returns Current *pu64 value
424 * @param pu64 Pointer to the 64-bit variable to update.
425 * @param u64 The 64-bit value to assign to *pu64.
426 */
427#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
428 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
429DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
430#else
431DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
432{
433# if defined(RT_ARCH_AMD64)
434# if RT_INLINE_ASM_USES_INTRIN
435 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
436
437# elif RT_INLINE_ASM_GNU_STYLE
438 __asm__ __volatile__("xchgq %0, %1\n\t"
439 : "=m" (*pu64),
440 "=r" (u64)
441 : "1" (u64),
442 "m" (*pu64));
443# else
444 __asm
445 {
446 mov rdx, [pu64]
447 mov rax, [u64]
448 xchg [rdx], rax
449 mov [u64], rax
450 }
451# endif
452# else /* !RT_ARCH_AMD64 */
453# if RT_INLINE_ASM_GNU_STYLE
454# if defined(PIC) || defined(__PIC__)
455 uint32_t u32EBX = (uint32_t)u64;
456 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
457 "xchgl %%ebx, %3\n\t"
458 "1:\n\t"
459 "lock; cmpxchg8b (%5)\n\t"
460 "jnz 1b\n\t"
461 "movl %3, %%ebx\n\t"
462 /*"xchgl %%esi, %5\n\t"*/
463 : "=A" (u64),
464 "=m" (*pu64)
465 : "0" (*pu64),
466 "m" ( u32EBX ),
467 "c" ( (uint32_t)(u64 >> 32) ),
468 "S" (pu64));
469# else /* !PIC */
470 __asm__ __volatile__("1:\n\t"
471 "lock; cmpxchg8b %1\n\t"
472 "jnz 1b\n\t"
473 : "=A" (u64),
474 "=m" (*pu64)
475 : "0" (*pu64),
476 "b" ( (uint32_t)u64 ),
477 "c" ( (uint32_t)(u64 >> 32) ));
478# endif
479# else
480 __asm
481 {
482 mov ebx, dword ptr [u64]
483 mov ecx, dword ptr [u64 + 4]
484 mov edi, pu64
485 mov eax, dword ptr [edi]
486 mov edx, dword ptr [edi + 4]
487 retry:
488 lock cmpxchg8b [edi]
489 jnz retry
490 mov dword ptr [u64], eax
491 mov dword ptr [u64 + 4], edx
492 }
493# endif
494# endif /* !RT_ARCH_AMD64 */
495 return u64;
496}
497#endif
498
499
500/**
501 * Atomically Exchange an signed 64-bit value, ordered.
502 *
503 * @returns Current *pi64 value
504 * @param pi64 Pointer to the 64-bit variable to update.
505 * @param i64 The 64-bit value to assign to *pi64.
506 */
507DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
508{
509 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
510}
511
512
513/**
514 * Atomically Exchange a pointer value, ordered.
515 *
516 * @returns Current *ppv value
517 * @param ppv Pointer to the pointer variable to update.
518 * @param pv The pointer value to assign to *ppv.
519 */
520DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
521{
522#if ARCH_BITS == 32
523 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
524#elif ARCH_BITS == 64
525 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
526#else
527# error "ARCH_BITS is bogus"
528#endif
529}
530
531
532/**
533 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
534 *
535 * @returns Current *pv value
536 * @param ppv Pointer to the pointer variable to update.
537 * @param pv The pointer value to assign to *ppv.
538 * @param Type The type of *ppv, sans volatile.
539 */
540#ifdef __GNUC__
541# define ASMAtomicXchgPtrT(ppv, pv, Type) \
542 __extension__ \
543 ({\
544 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
545 Type const pvTypeChecked = (pv); \
546 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
547 pvTypeCheckedRet; \
548 })
549#else
550# define ASMAtomicXchgPtrT(ppv, pv, Type) \
551 (Type)ASMAtomicXchgPtr((void * volatile *)(ppv), (void *)(pv))
552#endif
553
554
555/**
556 * Atomically Exchange a raw-mode context pointer value, ordered.
557 *
558 * @returns Current *ppv value
559 * @param ppvRC Pointer to the pointer variable to update.
560 * @param pvRC The pointer value to assign to *ppv.
561 */
562DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
563{
564 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
565}
566
567
568/**
569 * Atomically Exchange a ring-0 pointer value, ordered.
570 *
571 * @returns Current *ppv value
572 * @param ppvR0 Pointer to the pointer variable to update.
573 * @param pvR0 The pointer value to assign to *ppv.
574 */
575DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
576{
577#if R0_ARCH_BITS == 32
578 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
579#elif R0_ARCH_BITS == 64
580 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
581#else
582# error "R0_ARCH_BITS is bogus"
583#endif
584}
585
586
587/**
588 * Atomically Exchange a ring-3 pointer value, ordered.
589 *
590 * @returns Current *ppv value
591 * @param ppvR3 Pointer to the pointer variable to update.
592 * @param pvR3 The pointer value to assign to *ppv.
593 */
594DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
595{
596#if R3_ARCH_BITS == 32
597 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
598#elif R3_ARCH_BITS == 64
599 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
600#else
601# error "R3_ARCH_BITS is bogus"
602#endif
603}
604
605
606/** @def ASMAtomicXchgHandle
607 * Atomically Exchange a typical IPRT handle value, ordered.
608 *
609 * @param ph Pointer to the value to update.
610 * @param hNew The new value to assigned to *pu.
611 * @param phRes Where to store the current *ph value.
612 *
613 * @remarks This doesn't currently work for all handles (like RTFILE).
614 */
615#if HC_ARCH_BITS == 32
616# define ASMAtomicXchgHandle(ph, hNew, phRes) \
617 do { \
618 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
619 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
620 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
621 } while (0)
622#elif HC_ARCH_BITS == 64
623# define ASMAtomicXchgHandle(ph, hNew, phRes) \
624 do { \
625 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
626 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
627 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
628 } while (0)
629#else
630# error HC_ARCH_BITS
631#endif
632
633
634/**
635 * Atomically Exchange a value which size might differ
636 * between platforms or compilers, ordered.
637 *
638 * @param pu Pointer to the variable to update.
639 * @param uNew The value to assign to *pu.
640 * @todo This is busted as its missing the result argument.
641 */
642#define ASMAtomicXchgSize(pu, uNew) \
643 do { \
644 switch (sizeof(*(pu))) { \
645 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
646 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
647 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
648 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
649 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
650 } \
651 } while (0)
652
653/**
654 * Atomically Exchange a value which size might differ
655 * between platforms or compilers, ordered.
656 *
657 * @param pu Pointer to the variable to update.
658 * @param uNew The value to assign to *pu.
659 * @param puRes Where to store the current *pu value.
660 */
661#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
662 do { \
663 switch (sizeof(*(pu))) { \
664 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
665 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
666 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
667 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
668 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
669 } \
670 } while (0)
671
672
673
674/**
675 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
676 *
677 * @returns true if xchg was done.
678 * @returns false if xchg wasn't done.
679 *
680 * @param pu8 Pointer to the value to update.
681 * @param u8New The new value to assigned to *pu8.
682 * @param u8Old The old value to *pu8 compare with.
683 */
684#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
685DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, const uint8_t u8Old);
686#else
687DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, uint8_t u8Old)
688{
689 uint8_t u8Ret;
690 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
691 "setz %1\n\t"
692 : "=m" (*pu8),
693 "=qm" (u8Ret),
694 "=a" (u8Old)
695 : "q" (u8New),
696 "2" (u8Old),
697 "m" (*pu8));
698 return (bool)u8Ret;
699}
700#endif
701
702
703/**
704 * Atomically Compare and Exchange a signed 8-bit value, ordered.
705 *
706 * @returns true if xchg was done.
707 * @returns false if xchg wasn't done.
708 *
709 * @param pi8 Pointer to the value to update.
710 * @param i8New The new value to assigned to *pi8.
711 * @param i8Old The old value to *pi8 compare with.
712 */
713DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t *pi8, const int8_t i8New, const int8_t i8Old)
714{
715 return ASMAtomicCmpXchgU8((volatile uint8_t *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old);
716}
717
718
719/**
720 * Atomically Compare and Exchange a bool value, ordered.
721 *
722 * @returns true if xchg was done.
723 * @returns false if xchg wasn't done.
724 *
725 * @param pf Pointer to the value to update.
726 * @param fNew The new value to assigned to *pf.
727 * @param fOld The old value to *pf compare with.
728 */
729DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool *pf, const bool fNew, const bool fOld)
730{
731 return ASMAtomicCmpXchgU8((volatile uint8_t *)pf, (const uint8_t)fNew, (const uint8_t)fOld);
732}
733
734
735/**
736 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
737 *
738 * @returns true if xchg was done.
739 * @returns false if xchg wasn't done.
740 *
741 * @param pu32 Pointer to the value to update.
742 * @param u32New The new value to assigned to *pu32.
743 * @param u32Old The old value to *pu32 compare with.
744 */
745#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
746DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
747#else
748DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
749{
750# if RT_INLINE_ASM_GNU_STYLE
751 uint8_t u8Ret;
752 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
753 "setz %1\n\t"
754 : "=m" (*pu32),
755 "=qm" (u8Ret),
756 "=a" (u32Old)
757 : "r" (u32New),
758 "2" (u32Old),
759 "m" (*pu32));
760 return (bool)u8Ret;
761
762# elif RT_INLINE_ASM_USES_INTRIN
763 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
764
765# else
766 uint32_t u32Ret;
767 __asm
768 {
769# ifdef RT_ARCH_AMD64
770 mov rdx, [pu32]
771# else
772 mov edx, [pu32]
773# endif
774 mov eax, [u32Old]
775 mov ecx, [u32New]
776# ifdef RT_ARCH_AMD64
777 lock cmpxchg [rdx], ecx
778# else
779 lock cmpxchg [edx], ecx
780# endif
781 setz al
782 movzx eax, al
783 mov [u32Ret], eax
784 }
785 return !!u32Ret;
786# endif
787}
788#endif
789
790
791/**
792 * Atomically Compare and Exchange a signed 32-bit value, ordered.
793 *
794 * @returns true if xchg was done.
795 * @returns false if xchg wasn't done.
796 *
797 * @param pi32 Pointer to the value to update.
798 * @param i32New The new value to assigned to *pi32.
799 * @param i32Old The old value to *pi32 compare with.
800 */
801DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
802{
803 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
804}
805
806
807/**
808 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
809 *
810 * @returns true if xchg was done.
811 * @returns false if xchg wasn't done.
812 *
813 * @param pu64 Pointer to the 64-bit variable to update.
814 * @param u64New The 64-bit value to assign to *pu64.
815 * @param u64Old The value to compare with.
816 */
817#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
818 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
819DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
820#else
821DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, uint64_t u64New, uint64_t u64Old)
822{
823# if RT_INLINE_ASM_USES_INTRIN
824 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
825
826# elif defined(RT_ARCH_AMD64)
827# if RT_INLINE_ASM_GNU_STYLE
828 uint8_t u8Ret;
829 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
830 "setz %1\n\t"
831 : "=m" (*pu64),
832 "=qm" (u8Ret),
833 "=a" (u64Old)
834 : "r" (u64New),
835 "2" (u64Old),
836 "m" (*pu64));
837 return (bool)u8Ret;
838# else
839 bool fRet;
840 __asm
841 {
842 mov rdx, [pu32]
843 mov rax, [u64Old]
844 mov rcx, [u64New]
845 lock cmpxchg [rdx], rcx
846 setz al
847 mov [fRet], al
848 }
849 return fRet;
850# endif
851# else /* !RT_ARCH_AMD64 */
852 uint32_t u32Ret;
853# if RT_INLINE_ASM_GNU_STYLE
854# if defined(PIC) || defined(__PIC__)
855 uint32_t u32EBX = (uint32_t)u64New;
856 uint32_t u32Spill;
857 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
858 "lock; cmpxchg8b (%6)\n\t"
859 "setz %%al\n\t"
860 "movl %4, %%ebx\n\t"
861 "movzbl %%al, %%eax\n\t"
862 : "=a" (u32Ret),
863 "=d" (u32Spill),
864# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
865 "+m" (*pu64)
866# else
867 "=m" (*pu64)
868# endif
869 : "A" (u64Old),
870 "m" ( u32EBX ),
871 "c" ( (uint32_t)(u64New >> 32) ),
872 "S" (pu64));
873# else /* !PIC */
874 uint32_t u32Spill;
875 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
876 "setz %%al\n\t"
877 "movzbl %%al, %%eax\n\t"
878 : "=a" (u32Ret),
879 "=d" (u32Spill),
880 "+m" (*pu64)
881 : "A" (u64Old),
882 "b" ( (uint32_t)u64New ),
883 "c" ( (uint32_t)(u64New >> 32) ));
884# endif
885 return (bool)u32Ret;
886# else
887 __asm
888 {
889 mov ebx, dword ptr [u64New]
890 mov ecx, dword ptr [u64New + 4]
891 mov edi, [pu64]
892 mov eax, dword ptr [u64Old]
893 mov edx, dword ptr [u64Old + 4]
894 lock cmpxchg8b [edi]
895 setz al
896 movzx eax, al
897 mov dword ptr [u32Ret], eax
898 }
899 return !!u32Ret;
900# endif
901# endif /* !RT_ARCH_AMD64 */
902}
903#endif
904
905
906/**
907 * Atomically Compare and exchange a signed 64-bit value, ordered.
908 *
909 * @returns true if xchg was done.
910 * @returns false if xchg wasn't done.
911 *
912 * @param pi64 Pointer to the 64-bit variable to update.
913 * @param i64 The 64-bit value to assign to *pu64.
914 * @param i64Old The value to compare with.
915 */
916DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
917{
918 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
919}
920
921
922/**
923 * Atomically Compare and Exchange a pointer value, ordered.
924 *
925 * @returns true if xchg was done.
926 * @returns false if xchg wasn't done.
927 *
928 * @param ppv Pointer to the value to update.
929 * @param pvNew The new value to assigned to *ppv.
930 * @param pvOld The old value to *ppv compare with.
931 */
932DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld)
933{
934#if ARCH_BITS == 32
935 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
936#elif ARCH_BITS == 64
937 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
938#else
939# error "ARCH_BITS is bogus"
940#endif
941}
942
943
944/**
945 * Atomically Compare and Exchange a pointer value, ordered.
946 *
947 * @returns true if xchg was done.
948 * @returns false if xchg wasn't done.
949 *
950 * @param ppv Pointer to the value to update.
951 * @param pvNew The new value to assigned to *ppv.
952 * @param pvOld The old value to *ppv compare with.
953 *
954 * @remarks This is relatively type safe on GCC platforms.
955 */
956#ifdef __GNUC__
957# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
958 __extension__ \
959 ({\
960 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
961 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
962 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
963 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
964 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
965 fMacroRet; \
966 })
967#else
968# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
969 ASMAtomicCmpXchgPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld))
970#endif
971
972
973/** @def ASMAtomicCmpXchgHandle
974 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
975 *
976 * @param ph Pointer to the value to update.
977 * @param hNew The new value to assigned to *pu.
978 * @param hOld The old value to *pu compare with.
979 * @param fRc Where to store the result.
980 *
981 * @remarks This doesn't currently work for all handles (like RTFILE).
982 */
983#if HC_ARCH_BITS == 32
984# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
985 do { \
986 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
987 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
988 } while (0)
989#elif HC_ARCH_BITS == 64
990# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
991 do { \
992 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
993 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
994 } while (0)
995#else
996# error HC_ARCH_BITS
997#endif
998
999
1000/** @def ASMAtomicCmpXchgSize
1001 * Atomically Compare and Exchange a value which size might differ
1002 * between platforms or compilers, ordered.
1003 *
1004 * @param pu Pointer to the value to update.
1005 * @param uNew The new value to assigned to *pu.
1006 * @param uOld The old value to *pu compare with.
1007 * @param fRc Where to store the result.
1008 */
1009#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1010 do { \
1011 switch (sizeof(*(pu))) { \
1012 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1013 break; \
1014 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1015 break; \
1016 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1017 (fRc) = false; \
1018 break; \
1019 } \
1020 } while (0)
1021
1022
1023/**
1024 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1025 * passes back old value, ordered.
1026 *
1027 * @returns true if xchg was done.
1028 * @returns false if xchg wasn't done.
1029 *
1030 * @param pu32 Pointer to the value to update.
1031 * @param u32New The new value to assigned to *pu32.
1032 * @param u32Old The old value to *pu32 compare with.
1033 * @param pu32Old Pointer store the old value at.
1034 */
1035#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1036DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
1037#else
1038DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
1039{
1040# if RT_INLINE_ASM_GNU_STYLE
1041 uint8_t u8Ret;
1042 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1043 "setz %1\n\t"
1044 : "=m" (*pu32),
1045 "=qm" (u8Ret),
1046 "=a" (*pu32Old)
1047 : "r" (u32New),
1048 "a" (u32Old),
1049 "m" (*pu32));
1050 return (bool)u8Ret;
1051
1052# elif RT_INLINE_ASM_USES_INTRIN
1053 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
1054
1055# else
1056 uint32_t u32Ret;
1057 __asm
1058 {
1059# ifdef RT_ARCH_AMD64
1060 mov rdx, [pu32]
1061# else
1062 mov edx, [pu32]
1063# endif
1064 mov eax, [u32Old]
1065 mov ecx, [u32New]
1066# ifdef RT_ARCH_AMD64
1067 lock cmpxchg [rdx], ecx
1068 mov rdx, [pu32Old]
1069 mov [rdx], eax
1070# else
1071 lock cmpxchg [edx], ecx
1072 mov edx, [pu32Old]
1073 mov [edx], eax
1074# endif
1075 setz al
1076 movzx eax, al
1077 mov [u32Ret], eax
1078 }
1079 return !!u32Ret;
1080# endif
1081}
1082#endif
1083
1084
1085/**
1086 * Atomically Compare and Exchange a signed 32-bit value, additionally
1087 * passes back old value, ordered.
1088 *
1089 * @returns true if xchg was done.
1090 * @returns false if xchg wasn't done.
1091 *
1092 * @param pi32 Pointer to the value to update.
1093 * @param i32New The new value to assigned to *pi32.
1094 * @param i32Old The old value to *pi32 compare with.
1095 * @param pi32Old Pointer store the old value at.
1096 */
1097DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
1098{
1099 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
1100}
1101
1102
1103/**
1104 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1105 * passing back old value, ordered.
1106 *
1107 * @returns true if xchg was done.
1108 * @returns false if xchg wasn't done.
1109 *
1110 * @param pu64 Pointer to the 64-bit variable to update.
1111 * @param u64New The 64-bit value to assign to *pu64.
1112 * @param u64Old The value to compare with.
1113 * @param pu64Old Pointer store the old value at.
1114 */
1115#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1116 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1117DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
1118#else
1119DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
1120{
1121# if RT_INLINE_ASM_USES_INTRIN
1122 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
1123
1124# elif defined(RT_ARCH_AMD64)
1125# if RT_INLINE_ASM_GNU_STYLE
1126 uint8_t u8Ret;
1127 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1128 "setz %1\n\t"
1129 : "=m" (*pu64),
1130 "=qm" (u8Ret),
1131 "=a" (*pu64Old)
1132 : "r" (u64New),
1133 "a" (u64Old),
1134 "m" (*pu64));
1135 return (bool)u8Ret;
1136# else
1137 bool fRet;
1138 __asm
1139 {
1140 mov rdx, [pu32]
1141 mov rax, [u64Old]
1142 mov rcx, [u64New]
1143 lock cmpxchg [rdx], rcx
1144 mov rdx, [pu64Old]
1145 mov [rdx], rax
1146 setz al
1147 mov [fRet], al
1148 }
1149 return fRet;
1150# endif
1151# else /* !RT_ARCH_AMD64 */
1152# if RT_INLINE_ASM_GNU_STYLE
1153 uint64_t u64Ret;
1154# if defined(PIC) || defined(__PIC__)
1155 /* NB: this code uses a memory clobber description, because the clean
1156 * solution with an output value for *pu64 makes gcc run out of registers.
1157 * This will cause suboptimal code, and anyone with a better solution is
1158 * welcome to improve this. */
1159 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1160 "lock; cmpxchg8b %3\n\t"
1161 "xchgl %%ebx, %1\n\t"
1162 : "=A" (u64Ret)
1163 : "DS" ((uint32_t)u64New),
1164 "c" ((uint32_t)(u64New >> 32)),
1165 "m" (*pu64),
1166 "0" (u64Old)
1167 : "memory" );
1168# else /* !PIC */
1169 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1170 : "=A" (u64Ret),
1171 "=m" (*pu64)
1172 : "b" ((uint32_t)u64New),
1173 "c" ((uint32_t)(u64New >> 32)),
1174 "m" (*pu64),
1175 "0" (u64Old));
1176# endif
1177 *pu64Old = u64Ret;
1178 return u64Ret == u64Old;
1179# else
1180 uint32_t u32Ret;
1181 __asm
1182 {
1183 mov ebx, dword ptr [u64New]
1184 mov ecx, dword ptr [u64New + 4]
1185 mov edi, [pu64]
1186 mov eax, dword ptr [u64Old]
1187 mov edx, dword ptr [u64Old + 4]
1188 lock cmpxchg8b [edi]
1189 mov ebx, [pu64Old]
1190 mov [ebx], eax
1191 setz al
1192 movzx eax, al
1193 add ebx, 4
1194 mov [ebx], edx
1195 mov dword ptr [u32Ret], eax
1196 }
1197 return !!u32Ret;
1198# endif
1199# endif /* !RT_ARCH_AMD64 */
1200}
1201#endif
1202
1203
1204/**
1205 * Atomically Compare and exchange a signed 64-bit value, additionally
1206 * passing back old value, ordered.
1207 *
1208 * @returns true if xchg was done.
1209 * @returns false if xchg wasn't done.
1210 *
1211 * @param pi64 Pointer to the 64-bit variable to update.
1212 * @param i64 The 64-bit value to assign to *pu64.
1213 * @param i64Old The value to compare with.
1214 * @param pi64Old Pointer store the old value at.
1215 */
1216DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
1217{
1218 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
1219}
1220
1221/** @def ASMAtomicCmpXchgExHandle
1222 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1223 *
1224 * @param ph Pointer to the value to update.
1225 * @param hNew The new value to assigned to *pu.
1226 * @param hOld The old value to *pu compare with.
1227 * @param fRc Where to store the result.
1228 * @param phOldVal Pointer to where to store the old value.
1229 *
1230 * @remarks This doesn't currently work for all handles (like RTFILE).
1231 */
1232#if HC_ARCH_BITS == 32
1233# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1234 do { \
1235 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1236 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1237 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
1238 } while (0)
1239#elif HC_ARCH_BITS == 64
1240# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1241 do { \
1242 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1243 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1244 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
1245 } while (0)
1246#else
1247# error HC_ARCH_BITS
1248#endif
1249
1250
1251/** @def ASMAtomicCmpXchgExSize
1252 * Atomically Compare and Exchange a value which size might differ
1253 * between platforms or compilers. Additionally passes back old value.
1254 *
1255 * @param pu Pointer to the value to update.
1256 * @param uNew The new value to assigned to *pu.
1257 * @param uOld The old value to *pu compare with.
1258 * @param fRc Where to store the result.
1259 * @param puOldVal Pointer to where to store the old value.
1260 */
1261#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1262 do { \
1263 switch (sizeof(*(pu))) { \
1264 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
1265 break; \
1266 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
1267 break; \
1268 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1269 (fRc) = false; \
1270 (uOldVal) = 0; \
1271 break; \
1272 } \
1273 } while (0)
1274
1275
1276/**
1277 * Atomically Compare and Exchange a pointer value, additionally
1278 * passing back old value, ordered.
1279 *
1280 * @returns true if xchg was done.
1281 * @returns false if xchg wasn't done.
1282 *
1283 * @param ppv Pointer to the value to update.
1284 * @param pvNew The new value to assigned to *ppv.
1285 * @param pvOld The old value to *ppv compare with.
1286 * @param ppvOld Pointer store the old value at.
1287 */
1288DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
1289{
1290#if ARCH_BITS == 32
1291 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
1292#elif ARCH_BITS == 64
1293 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
1294#else
1295# error "ARCH_BITS is bogus"
1296#endif
1297}
1298
1299
1300/**
1301 * Atomically Compare and Exchange a pointer value, additionally
1302 * passing back old value, ordered.
1303 *
1304 * @returns true if xchg was done.
1305 * @returns false if xchg wasn't done.
1306 *
1307 * @param ppv Pointer to the value to update.
1308 * @param pvNew The new value to assigned to *ppv.
1309 * @param pvOld The old value to *ppv compare with.
1310 * @param ppvOld Pointer store the old value at.
1311 *
1312 * @remarks This is relatively type safe on GCC platforms.
1313 */
1314#ifdef __GNUC__
1315# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1316 __extension__ \
1317 ({\
1318 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1319 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1320 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1321 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1322 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1323 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1324 (void **)ppvOldTypeChecked); \
1325 fMacroRet; \
1326 })
1327#else
1328# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1329 ASMAtomicCmpXchgExPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld), (void **)(ppvOld))
1330#endif
1331
1332
1333/**
1334 * Serialize Instruction.
1335 */
1336#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1337DECLASM(void) ASMSerializeInstruction(void);
1338#else
1339DECLINLINE(void) ASMSerializeInstruction(void)
1340{
1341# if RT_INLINE_ASM_GNU_STYLE
1342 RTCCUINTREG xAX = 0;
1343# ifdef RT_ARCH_AMD64
1344 __asm__ ("cpuid"
1345 : "=a" (xAX)
1346 : "0" (xAX)
1347 : "rbx", "rcx", "rdx");
1348# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1349 __asm__ ("push %%ebx\n\t"
1350 "cpuid\n\t"
1351 "pop %%ebx\n\t"
1352 : "=a" (xAX)
1353 : "0" (xAX)
1354 : "ecx", "edx");
1355# else
1356 __asm__ ("cpuid"
1357 : "=a" (xAX)
1358 : "0" (xAX)
1359 : "ebx", "ecx", "edx");
1360# endif
1361
1362# elif RT_INLINE_ASM_USES_INTRIN
1363 int aInfo[4];
1364 __cpuid(aInfo, 0);
1365
1366# else
1367 __asm
1368 {
1369 push ebx
1370 xor eax, eax
1371 cpuid
1372 pop ebx
1373 }
1374# endif
1375}
1376#endif
1377
1378
1379/**
1380 * Memory fence, waits for any pending writes and reads to complete.
1381 */
1382DECLINLINE(void) ASMMemoryFence(void)
1383{
1384 /** @todo use mfence? check if all cpus we care for support it. */
1385 uint32_t volatile u32;
1386 ASMAtomicXchgU32(&u32, 0);
1387}
1388
1389
1390/**
1391 * Write fence, waits for any pending writes to complete.
1392 */
1393DECLINLINE(void) ASMWriteFence(void)
1394{
1395 /** @todo use sfence? check if all cpus we care for support it. */
1396 ASMMemoryFence();
1397}
1398
1399
1400/**
1401 * Read fence, waits for any pending reads to complete.
1402 */
1403DECLINLINE(void) ASMReadFence(void)
1404{
1405 /** @todo use lfence? check if all cpus we care for support it. */
1406 ASMMemoryFence();
1407}
1408
1409
1410/**
1411 * Atomically reads an unsigned 8-bit value, ordered.
1412 *
1413 * @returns Current *pu8 value
1414 * @param pu8 Pointer to the 8-bit variable to read.
1415 */
1416DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
1417{
1418 ASMMemoryFence();
1419 return *pu8; /* byte reads are atomic on x86 */
1420}
1421
1422
1423/**
1424 * Atomically reads an unsigned 8-bit value, unordered.
1425 *
1426 * @returns Current *pu8 value
1427 * @param pu8 Pointer to the 8-bit variable to read.
1428 */
1429DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
1430{
1431 return *pu8; /* byte reads are atomic on x86 */
1432}
1433
1434
1435/**
1436 * Atomically reads a signed 8-bit value, ordered.
1437 *
1438 * @returns Current *pi8 value
1439 * @param pi8 Pointer to the 8-bit variable to read.
1440 */
1441DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
1442{
1443 ASMMemoryFence();
1444 return *pi8; /* byte reads are atomic on x86 */
1445}
1446
1447
1448/**
1449 * Atomically reads a signed 8-bit value, unordered.
1450 *
1451 * @returns Current *pi8 value
1452 * @param pi8 Pointer to the 8-bit variable to read.
1453 */
1454DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
1455{
1456 return *pi8; /* byte reads are atomic on x86 */
1457}
1458
1459
1460/**
1461 * Atomically reads an unsigned 16-bit value, ordered.
1462 *
1463 * @returns Current *pu16 value
1464 * @param pu16 Pointer to the 16-bit variable to read.
1465 */
1466DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
1467{
1468 ASMMemoryFence();
1469 Assert(!((uintptr_t)pu16 & 1));
1470 return *pu16;
1471}
1472
1473
1474/**
1475 * Atomically reads an unsigned 16-bit value, unordered.
1476 *
1477 * @returns Current *pu16 value
1478 * @param pu16 Pointer to the 16-bit variable to read.
1479 */
1480DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
1481{
1482 Assert(!((uintptr_t)pu16 & 1));
1483 return *pu16;
1484}
1485
1486
1487/**
1488 * Atomically reads a signed 16-bit value, ordered.
1489 *
1490 * @returns Current *pi16 value
1491 * @param pi16 Pointer to the 16-bit variable to read.
1492 */
1493DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
1494{
1495 ASMMemoryFence();
1496 Assert(!((uintptr_t)pi16 & 1));
1497 return *pi16;
1498}
1499
1500
1501/**
1502 * Atomically reads a signed 16-bit value, unordered.
1503 *
1504 * @returns Current *pi16 value
1505 * @param pi16 Pointer to the 16-bit variable to read.
1506 */
1507DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
1508{
1509 Assert(!((uintptr_t)pi16 & 1));
1510 return *pi16;
1511}
1512
1513
1514/**
1515 * Atomically reads an unsigned 32-bit value, ordered.
1516 *
1517 * @returns Current *pu32 value
1518 * @param pu32 Pointer to the 32-bit variable to read.
1519 */
1520DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
1521{
1522 ASMMemoryFence();
1523 Assert(!((uintptr_t)pu32 & 3));
1524 return *pu32;
1525}
1526
1527
1528/**
1529 * Atomically reads an unsigned 32-bit value, unordered.
1530 *
1531 * @returns Current *pu32 value
1532 * @param pu32 Pointer to the 32-bit variable to read.
1533 */
1534DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
1535{
1536 Assert(!((uintptr_t)pu32 & 3));
1537 return *pu32;
1538}
1539
1540
1541/**
1542 * Atomically reads a signed 32-bit value, ordered.
1543 *
1544 * @returns Current *pi32 value
1545 * @param pi32 Pointer to the 32-bit variable to read.
1546 */
1547DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
1548{
1549 ASMMemoryFence();
1550 Assert(!((uintptr_t)pi32 & 3));
1551 return *pi32;
1552}
1553
1554
1555/**
1556 * Atomically reads a signed 32-bit value, unordered.
1557 *
1558 * @returns Current *pi32 value
1559 * @param pi32 Pointer to the 32-bit variable to read.
1560 */
1561DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
1562{
1563 Assert(!((uintptr_t)pi32 & 3));
1564 return *pi32;
1565}
1566
1567
1568/**
1569 * Atomically reads an unsigned 64-bit value, ordered.
1570 *
1571 * @returns Current *pu64 value
1572 * @param pu64 Pointer to the 64-bit variable to read.
1573 * The memory pointed to must be writable.
1574 * @remark This will fault if the memory is read-only!
1575 */
1576#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1577 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1578DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
1579#else
1580DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
1581{
1582 uint64_t u64;
1583# ifdef RT_ARCH_AMD64
1584 Assert(!((uintptr_t)pu64 & 7));
1585/*# if RT_INLINE_ASM_GNU_STYLE
1586 __asm__ __volatile__( "mfence\n\t"
1587 "movq %1, %0\n\t"
1588 : "=r" (u64)
1589 : "m" (*pu64));
1590# else
1591 __asm
1592 {
1593 mfence
1594 mov rdx, [pu64]
1595 mov rax, [rdx]
1596 mov [u64], rax
1597 }
1598# endif*/
1599 ASMMemoryFence();
1600 u64 = *pu64;
1601# else /* !RT_ARCH_AMD64 */
1602# if RT_INLINE_ASM_GNU_STYLE
1603# if defined(PIC) || defined(__PIC__)
1604 uint32_t u32EBX = 0;
1605 Assert(!((uintptr_t)pu64 & 7));
1606 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1607 "lock; cmpxchg8b (%5)\n\t"
1608 "movl %3, %%ebx\n\t"
1609 : "=A" (u64),
1610# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1611 "+m" (*pu64)
1612# else
1613 "=m" (*pu64)
1614# endif
1615 : "0" (0ULL),
1616 "m" (u32EBX),
1617 "c" (0),
1618 "S" (pu64));
1619# else /* !PIC */
1620 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1621 : "=A" (u64),
1622 "+m" (*pu64)
1623 : "0" (0ULL),
1624 "b" (0),
1625 "c" (0));
1626# endif
1627# else
1628 Assert(!((uintptr_t)pu64 & 7));
1629 __asm
1630 {
1631 xor eax, eax
1632 xor edx, edx
1633 mov edi, pu64
1634 xor ecx, ecx
1635 xor ebx, ebx
1636 lock cmpxchg8b [edi]
1637 mov dword ptr [u64], eax
1638 mov dword ptr [u64 + 4], edx
1639 }
1640# endif
1641# endif /* !RT_ARCH_AMD64 */
1642 return u64;
1643}
1644#endif
1645
1646
1647/**
1648 * Atomically reads an unsigned 64-bit value, unordered.
1649 *
1650 * @returns Current *pu64 value
1651 * @param pu64 Pointer to the 64-bit variable to read.
1652 * The memory pointed to must be writable.
1653 * @remark This will fault if the memory is read-only!
1654 */
1655#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1656 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1657DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
1658#else
1659DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
1660{
1661 uint64_t u64;
1662# ifdef RT_ARCH_AMD64
1663 Assert(!((uintptr_t)pu64 & 7));
1664/*# if RT_INLINE_ASM_GNU_STYLE
1665 Assert(!((uintptr_t)pu64 & 7));
1666 __asm__ __volatile__("movq %1, %0\n\t"
1667 : "=r" (u64)
1668 : "m" (*pu64));
1669# else
1670 __asm
1671 {
1672 mov rdx, [pu64]
1673 mov rax, [rdx]
1674 mov [u64], rax
1675 }
1676# endif */
1677 u64 = *pu64;
1678# else /* !RT_ARCH_AMD64 */
1679# if RT_INLINE_ASM_GNU_STYLE
1680# if defined(PIC) || defined(__PIC__)
1681 uint32_t u32EBX = 0;
1682 uint32_t u32Spill;
1683 Assert(!((uintptr_t)pu64 & 7));
1684 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1685 "xor %%ecx,%%ecx\n\t"
1686 "xor %%edx,%%edx\n\t"
1687 "xchgl %%ebx, %3\n\t"
1688 "lock; cmpxchg8b (%4)\n\t"
1689 "movl %3, %%ebx\n\t"
1690 : "=A" (u64),
1691# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1692 "+m" (*pu64),
1693# else
1694 "=m" (*pu64),
1695# endif
1696 "=c" (u32Spill)
1697 : "m" (u32EBX),
1698 "S" (pu64));
1699# else /* !PIC */
1700 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1701 : "=A" (u64),
1702 "+m" (*pu64)
1703 : "0" (0ULL),
1704 "b" (0),
1705 "c" (0));
1706# endif
1707# else
1708 Assert(!((uintptr_t)pu64 & 7));
1709 __asm
1710 {
1711 xor eax, eax
1712 xor edx, edx
1713 mov edi, pu64
1714 xor ecx, ecx
1715 xor ebx, ebx
1716 lock cmpxchg8b [edi]
1717 mov dword ptr [u64], eax
1718 mov dword ptr [u64 + 4], edx
1719 }
1720# endif
1721# endif /* !RT_ARCH_AMD64 */
1722 return u64;
1723}
1724#endif
1725
1726
1727/**
1728 * Atomically reads a signed 64-bit value, ordered.
1729 *
1730 * @returns Current *pi64 value
1731 * @param pi64 Pointer to the 64-bit variable to read.
1732 * The memory pointed to must be writable.
1733 * @remark This will fault if the memory is read-only!
1734 */
1735DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
1736{
1737 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
1738}
1739
1740
1741/**
1742 * Atomically reads a signed 64-bit value, unordered.
1743 *
1744 * @returns Current *pi64 value
1745 * @param pi64 Pointer to the 64-bit variable to read.
1746 * The memory pointed to must be writable.
1747 * @remark This will fault if the memory is read-only!
1748 */
1749DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
1750{
1751 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
1752}
1753
1754
1755/**
1756 * Atomically reads a pointer value, ordered.
1757 *
1758 * @returns Current *pv value
1759 * @param ppv Pointer to the pointer variable to read.
1760 *
1761 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
1762 * requires less typing (no casts).
1763 */
1764DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
1765{
1766#if ARCH_BITS == 32
1767 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
1768#elif ARCH_BITS == 64
1769 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
1770#else
1771# error "ARCH_BITS is bogus"
1772#endif
1773}
1774
1775/**
1776 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
1777 *
1778 * @returns Current *pv value
1779 * @param ppv Pointer to the pointer variable to read.
1780 * @param Type The type of *ppv, sans volatile.
1781 */
1782#ifdef __GNUC__
1783# define ASMAtomicReadPtrT(ppv, Type) \
1784 __extension__ \
1785 ({\
1786 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
1787 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
1788 pvTypeChecked; \
1789 })
1790#else
1791# define ASMAtomicReadPtrT(ppv, Type) \
1792 (Type)ASMAtomicReadPtr((void * volatile *)(ppv))
1793#endif
1794
1795
1796/**
1797 * Atomically reads a pointer value, unordered.
1798 *
1799 * @returns Current *pv value
1800 * @param ppv Pointer to the pointer variable to read.
1801 *
1802 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
1803 * requires less typing (no casts).
1804 */
1805DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
1806{
1807#if ARCH_BITS == 32
1808 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
1809#elif ARCH_BITS == 64
1810 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
1811#else
1812# error "ARCH_BITS is bogus"
1813#endif
1814}
1815
1816
1817/**
1818 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
1819 *
1820 * @returns Current *pv value
1821 * @param ppv Pointer to the pointer variable to read.
1822 * @param Type The type of *ppv, sans volatile.
1823 */
1824#ifdef __GNUC__
1825# define ASMAtomicUoReadPtrT(ppv, Type) \
1826 __extension__ \
1827 ({\
1828 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1829 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
1830 pvTypeChecked; \
1831 })
1832#else
1833# define ASMAtomicUoReadPtrT(ppv, Type) \
1834 (Type)ASMAtomicUoReadPtr((void * volatile *)(ppv))
1835#endif
1836
1837
1838/**
1839 * Atomically reads a boolean value, ordered.
1840 *
1841 * @returns Current *pf value
1842 * @param pf Pointer to the boolean variable to read.
1843 */
1844DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
1845{
1846 ASMMemoryFence();
1847 return *pf; /* byte reads are atomic on x86 */
1848}
1849
1850
1851/**
1852 * Atomically reads a boolean value, unordered.
1853 *
1854 * @returns Current *pf value
1855 * @param pf Pointer to the boolean variable to read.
1856 */
1857DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
1858{
1859 return *pf; /* byte reads are atomic on x86 */
1860}
1861
1862
1863/**
1864 * Atomically read a typical IPRT handle value, ordered.
1865 *
1866 * @param ph Pointer to the handle variable to read.
1867 * @param phRes Where to store the result.
1868 *
1869 * @remarks This doesn't currently work for all handles (like RTFILE).
1870 */
1871#if HC_ARCH_BITS == 32
1872# define ASMAtomicReadHandle(ph, phRes) \
1873 do { \
1874 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1875 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
1876 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
1877 } while (0)
1878#elif HC_ARCH_BITS == 64
1879# define ASMAtomicReadHandle(ph, phRes) \
1880 do { \
1881 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1882 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
1883 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
1884 } while (0)
1885#else
1886# error HC_ARCH_BITS
1887#endif
1888
1889
1890/**
1891 * Atomically read a typical IPRT handle value, unordered.
1892 *
1893 * @param ph Pointer to the handle variable to read.
1894 * @param phRes Where to store the result.
1895 *
1896 * @remarks This doesn't currently work for all handles (like RTFILE).
1897 */
1898#if HC_ARCH_BITS == 32
1899# define ASMAtomicUoReadHandle(ph, phRes) \
1900 do { \
1901 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1902 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
1903 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
1904 } while (0)
1905#elif HC_ARCH_BITS == 64
1906# define ASMAtomicUoReadHandle(ph, phRes) \
1907 do { \
1908 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1909 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
1910 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
1911 } while (0)
1912#else
1913# error HC_ARCH_BITS
1914#endif
1915
1916
1917/**
1918 * Atomically read a value which size might differ
1919 * between platforms or compilers, ordered.
1920 *
1921 * @param pu Pointer to the variable to update.
1922 * @param puRes Where to store the result.
1923 */
1924#define ASMAtomicReadSize(pu, puRes) \
1925 do { \
1926 switch (sizeof(*(pu))) { \
1927 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
1928 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
1929 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
1930 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
1931 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
1932 } \
1933 } while (0)
1934
1935
1936/**
1937 * Atomically read a value which size might differ
1938 * between platforms or compilers, unordered.
1939 *
1940 * @param pu Pointer to the variable to read.
1941 * @param puRes Where to store the result.
1942 */
1943#define ASMAtomicUoReadSize(pu, puRes) \
1944 do { \
1945 switch (sizeof(*(pu))) { \
1946 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
1947 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
1948 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
1949 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
1950 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
1951 } \
1952 } while (0)
1953
1954
1955/**
1956 * Atomically writes an unsigned 8-bit value, ordered.
1957 *
1958 * @param pu8 Pointer to the 8-bit variable.
1959 * @param u8 The 8-bit value to assign to *pu8.
1960 */
1961DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
1962{
1963 ASMAtomicXchgU8(pu8, u8);
1964}
1965
1966
1967/**
1968 * Atomically writes an unsigned 8-bit value, unordered.
1969 *
1970 * @param pu8 Pointer to the 8-bit variable.
1971 * @param u8 The 8-bit value to assign to *pu8.
1972 */
1973DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
1974{
1975 *pu8 = u8; /* byte writes are atomic on x86 */
1976}
1977
1978
1979/**
1980 * Atomically writes a signed 8-bit value, ordered.
1981 *
1982 * @param pi8 Pointer to the 8-bit variable to read.
1983 * @param i8 The 8-bit value to assign to *pi8.
1984 */
1985DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
1986{
1987 ASMAtomicXchgS8(pi8, i8);
1988}
1989
1990
1991/**
1992 * Atomically writes a signed 8-bit value, unordered.
1993 *
1994 * @param pi8 Pointer to the 8-bit variable to read.
1995 * @param i8 The 8-bit value to assign to *pi8.
1996 */
1997DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
1998{
1999 *pi8 = i8; /* byte writes are atomic on x86 */
2000}
2001
2002
2003/**
2004 * Atomically writes an unsigned 16-bit value, ordered.
2005 *
2006 * @param pu16 Pointer to the 16-bit variable.
2007 * @param u16 The 16-bit value to assign to *pu16.
2008 */
2009DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
2010{
2011 ASMAtomicXchgU16(pu16, u16);
2012}
2013
2014
2015/**
2016 * Atomically writes an unsigned 16-bit value, unordered.
2017 *
2018 * @param pu16 Pointer to the 16-bit variable.
2019 * @param u16 The 16-bit value to assign to *pu16.
2020 */
2021DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
2022{
2023 Assert(!((uintptr_t)pu16 & 1));
2024 *pu16 = u16;
2025}
2026
2027
2028/**
2029 * Atomically writes a signed 16-bit value, ordered.
2030 *
2031 * @param pi16 Pointer to the 16-bit variable to read.
2032 * @param i16 The 16-bit value to assign to *pi16.
2033 */
2034DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
2035{
2036 ASMAtomicXchgS16(pi16, i16);
2037}
2038
2039
2040/**
2041 * Atomically writes a signed 16-bit value, unordered.
2042 *
2043 * @param pi16 Pointer to the 16-bit variable to read.
2044 * @param i16 The 16-bit value to assign to *pi16.
2045 */
2046DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
2047{
2048 Assert(!((uintptr_t)pi16 & 1));
2049 *pi16 = i16;
2050}
2051
2052
2053/**
2054 * Atomically writes an unsigned 32-bit value, ordered.
2055 *
2056 * @param pu32 Pointer to the 32-bit variable.
2057 * @param u32 The 32-bit value to assign to *pu32.
2058 */
2059DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
2060{
2061 ASMAtomicXchgU32(pu32, u32);
2062}
2063
2064
2065/**
2066 * Atomically writes an unsigned 32-bit value, unordered.
2067 *
2068 * @param pu32 Pointer to the 32-bit variable.
2069 * @param u32 The 32-bit value to assign to *pu32.
2070 */
2071DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
2072{
2073 Assert(!((uintptr_t)pu32 & 3));
2074 *pu32 = u32;
2075}
2076
2077
2078/**
2079 * Atomically writes a signed 32-bit value, ordered.
2080 *
2081 * @param pi32 Pointer to the 32-bit variable to read.
2082 * @param i32 The 32-bit value to assign to *pi32.
2083 */
2084DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
2085{
2086 ASMAtomicXchgS32(pi32, i32);
2087}
2088
2089
2090/**
2091 * Atomically writes a signed 32-bit value, unordered.
2092 *
2093 * @param pi32 Pointer to the 32-bit variable to read.
2094 * @param i32 The 32-bit value to assign to *pi32.
2095 */
2096DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
2097{
2098 Assert(!((uintptr_t)pi32 & 3));
2099 *pi32 = i32;
2100}
2101
2102
2103/**
2104 * Atomically writes an unsigned 64-bit value, ordered.
2105 *
2106 * @param pu64 Pointer to the 64-bit variable.
2107 * @param u64 The 64-bit value to assign to *pu64.
2108 */
2109DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
2110{
2111 ASMAtomicXchgU64(pu64, u64);
2112}
2113
2114
2115/**
2116 * Atomically writes an unsigned 64-bit value, unordered.
2117 *
2118 * @param pu64 Pointer to the 64-bit variable.
2119 * @param u64 The 64-bit value to assign to *pu64.
2120 */
2121DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
2122{
2123 Assert(!((uintptr_t)pu64 & 7));
2124#if ARCH_BITS == 64
2125 *pu64 = u64;
2126#else
2127 ASMAtomicXchgU64(pu64, u64);
2128#endif
2129}
2130
2131
2132/**
2133 * Atomically writes a signed 64-bit value, ordered.
2134 *
2135 * @param pi64 Pointer to the 64-bit variable.
2136 * @param i64 The 64-bit value to assign to *pi64.
2137 */
2138DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
2139{
2140 ASMAtomicXchgS64(pi64, i64);
2141}
2142
2143
2144/**
2145 * Atomically writes a signed 64-bit value, unordered.
2146 *
2147 * @param pi64 Pointer to the 64-bit variable.
2148 * @param i64 The 64-bit value to assign to *pi64.
2149 */
2150DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
2151{
2152 Assert(!((uintptr_t)pi64 & 7));
2153#if ARCH_BITS == 64
2154 *pi64 = i64;
2155#else
2156 ASMAtomicXchgS64(pi64, i64);
2157#endif
2158}
2159
2160
2161/**
2162 * Atomically writes a boolean value, unordered.
2163 *
2164 * @param pf Pointer to the boolean variable.
2165 * @param f The boolean value to assign to *pf.
2166 */
2167DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
2168{
2169 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
2170}
2171
2172
2173/**
2174 * Atomically writes a boolean value, unordered.
2175 *
2176 * @param pf Pointer to the boolean variable.
2177 * @param f The boolean value to assign to *pf.
2178 */
2179DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
2180{
2181 *pf = f; /* byte writes are atomic on x86 */
2182}
2183
2184
2185/**
2186 * Atomically writes a pointer value, ordered.
2187 *
2188 * @param ppv Pointer to the pointer variable.
2189 * @param pv The pointer value to assign to *ppv.
2190 */
2191DECLINLINE(void) ASMAtomicWritePtrVoid(void * volatile *ppv, const void *pv)
2192{
2193#if ARCH_BITS == 32
2194 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2195#elif ARCH_BITS == 64
2196 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2197#else
2198# error "ARCH_BITS is bogus"
2199#endif
2200}
2201
2202
2203/**
2204 * Atomically writes a pointer value, ordered.
2205 *
2206 * @param ppv Pointer to the pointer variable.
2207 * @param pv The pointer value to assign to *ppv. If NULL use
2208 * ASMAtomicWriteNullPtr or you'll land in trouble.
2209 *
2210 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2211 * NULL.
2212 */
2213#ifdef __GNUC__
2214# define ASMAtomicWritePtr(ppv, pv) \
2215 do \
2216 { \
2217 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2218 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2219 \
2220 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2221 AssertCompile(sizeof(pv) == sizeof(void *)); \
2222 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2223 \
2224 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), (void *)(pvTypeChecked)); \
2225 } while (0)
2226#else
2227# define ASMAtomicWritePtr(ppv, pv) \
2228 do \
2229 { \
2230 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2231 AssertCompile(sizeof(pv) == sizeof(void *)); \
2232 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2233 \
2234 ASMAtomicWritePtrVoid((void * volatile *)(ppv), (void *)(pv)); \
2235 } while (0)
2236#endif
2237
2238
2239/**
2240 * Atomically sets a pointer to NULL, ordered.
2241 *
2242 * @param ppv Pointer to the pointer variable that should be set to NULL.
2243 *
2244 * @remarks This is relatively type safe on GCC platforms.
2245 */
2246#ifdef __GNUC__
2247# define ASMAtomicWriteNullPtr(ppv) \
2248 do \
2249 { \
2250 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2251 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2252 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2253 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), NULL); \
2254 } while (0)
2255#else
2256# define ASMAtomicWriteNullPtr(ppv) \
2257 do \
2258 { \
2259 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2260 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2261 ASMAtomicWritePtrVoid((void * volatile *)(ppv), NULL); \
2262 } while (0)
2263#endif
2264
2265
2266/**
2267 * Atomically writes a pointer value, unordered.
2268 *
2269 * @returns Current *pv value
2270 * @param ppv Pointer to the pointer variable.
2271 * @param pv The pointer value to assign to *ppv. If NULL use
2272 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2273 *
2274 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2275 * NULL.
2276 */
2277#ifdef __GNUC__
2278# define ASMAtomicUoWritePtr(ppv, pv) \
2279 do \
2280 { \
2281 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2282 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2283 \
2284 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2285 AssertCompile(sizeof(pv) == sizeof(void *)); \
2286 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2287 \
2288 *(ppvTypeChecked) = pvTypeChecked; \
2289 } while (0)
2290#else
2291# define ASMAtomicUoWritePtr(ppv, pv) \
2292 do \
2293 { \
2294 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2295 AssertCompile(sizeof(pv) == sizeof(void *)); \
2296 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2297 *(ppv) = pv; \
2298 } while (0)
2299#endif
2300
2301
2302/**
2303 * Atomically sets a pointer to NULL, unordered.
2304 *
2305 * @param ppv Pointer to the pointer variable that should be set to NULL.
2306 *
2307 * @remarks This is relatively type safe on GCC platforms.
2308 */
2309#ifdef __GNUC__
2310# define ASMAtomicUoWriteNullPtr(ppv) \
2311 do \
2312 { \
2313 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2314 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2315 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2316 *(ppvTypeChecked) = NULL; \
2317 } while (0)
2318#else
2319# define ASMAtomicUoWriteNullPtr(ppv) \
2320 do \
2321 { \
2322 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2323 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2324 *(ppv) = NULL; \
2325 } while (0)
2326#endif
2327
2328
2329/**
2330 * Atomically write a typical IPRT handle value, ordered.
2331 *
2332 * @param ph Pointer to the variable to update.
2333 * @param hNew The value to assign to *ph.
2334 *
2335 * @remarks This doesn't currently work for all handles (like RTFILE).
2336 */
2337#if HC_ARCH_BITS == 32
2338# define ASMAtomicWriteHandle(ph, hNew) \
2339 do { \
2340 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2341 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2342 } while (0)
2343#elif HC_ARCH_BITS == 64
2344# define ASMAtomicWriteHandle(ph, hNew) \
2345 do { \
2346 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2347 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
2348 } while (0)
2349#else
2350# error HC_ARCH_BITS
2351#endif
2352
2353
2354/**
2355 * Atomically write a typical IPRT handle value, unordered.
2356 *
2357 * @param ph Pointer to the variable to update.
2358 * @param hNew The value to assign to *ph.
2359 *
2360 * @remarks This doesn't currently work for all handles (like RTFILE).
2361 */
2362#if HC_ARCH_BITS == 32
2363# define ASMAtomicUoWriteHandle(ph, hNew) \
2364 do { \
2365 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2366 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
2367 } while (0)
2368#elif HC_ARCH_BITS == 64
2369# define ASMAtomicUoWriteHandle(ph, hNew) \
2370 do { \
2371 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2372 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
2373 } while (0)
2374#else
2375# error HC_ARCH_BITS
2376#endif
2377
2378
2379/**
2380 * Atomically write a value which size might differ
2381 * between platforms or compilers, ordered.
2382 *
2383 * @param pu Pointer to the variable to update.
2384 * @param uNew The value to assign to *pu.
2385 */
2386#define ASMAtomicWriteSize(pu, uNew) \
2387 do { \
2388 switch (sizeof(*(pu))) { \
2389 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2390 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2391 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2392 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2393 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2394 } \
2395 } while (0)
2396
2397/**
2398 * Atomically write a value which size might differ
2399 * between platforms or compilers, unordered.
2400 *
2401 * @param pu Pointer to the variable to update.
2402 * @param uNew The value to assign to *pu.
2403 */
2404#define ASMAtomicUoWriteSize(pu, uNew) \
2405 do { \
2406 switch (sizeof(*(pu))) { \
2407 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2408 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2409 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2410 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2411 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2412 } \
2413 } while (0)
2414
2415
2416
2417/**
2418 * Atomically exchanges and adds to a 32-bit value, ordered.
2419 *
2420 * @returns The old value.
2421 * @param pu32 Pointer to the value.
2422 * @param u32 Number to add.
2423 */
2424#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2425DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
2426#else
2427DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
2428{
2429# if RT_INLINE_ASM_USES_INTRIN
2430 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
2431 return u32;
2432
2433# elif RT_INLINE_ASM_GNU_STYLE
2434 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2435 : "=r" (u32),
2436 "=m" (*pu32)
2437 : "0" (u32),
2438 "m" (*pu32)
2439 : "memory");
2440 return u32;
2441# else
2442 __asm
2443 {
2444 mov eax, [u32]
2445# ifdef RT_ARCH_AMD64
2446 mov rdx, [pu32]
2447 lock xadd [rdx], eax
2448# else
2449 mov edx, [pu32]
2450 lock xadd [edx], eax
2451# endif
2452 mov [u32], eax
2453 }
2454 return u32;
2455# endif
2456}
2457#endif
2458
2459
2460/**
2461 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2462 *
2463 * @returns The old value.
2464 * @param pi32 Pointer to the value.
2465 * @param i32 Number to add.
2466 */
2467DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
2468{
2469 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
2470}
2471
2472
2473/**
2474 * Atomically exchanges and adds to a 64-bit value, ordered.
2475 *
2476 * @returns The old value.
2477 * @param pu64 Pointer to the value.
2478 * @param u64 Number to add.
2479 */
2480#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2481DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64);
2482#else
2483DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64)
2484{
2485# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2486 u64 = _InterlockedExchangeAdd64((__int64 *)pu64, u64);
2487 return u64;
2488
2489# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2490 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2491 : "=r" (u64),
2492 "=m" (*pu64)
2493 : "0" (u64),
2494 "m" (*pu64)
2495 : "memory");
2496 return u64;
2497# else
2498 uint64_t u64Old;
2499 for (;;)
2500 {
2501 uint64_t u64New;
2502 u64Old = ASMAtomicUoReadU64(pu64);
2503 u64New = u64Old + u64;
2504 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2505 break;
2506 ASMNopPause();
2507 }
2508 return u64Old;
2509# endif
2510}
2511#endif
2512
2513
2514/**
2515 * Atomically exchanges and adds to a signed 64-bit value, ordered.
2516 *
2517 * @returns The old value.
2518 * @param pi64 Pointer to the value.
2519 * @param i64 Number to add.
2520 */
2521DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile *pi64, int64_t i64)
2522{
2523 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)i64);
2524}
2525
2526
2527/**
2528 * Atomically exchanges and adds to a size_t value, ordered.
2529 *
2530 * @returns The old value.
2531 * @param pcb Pointer to the size_t value.
2532 * @param cb Number to add.
2533 */
2534DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile *pcb, size_t cb)
2535{
2536#if ARCH_BITS == 64
2537 return ASMAtomicAddU64((uint64_t volatile *)pcb, cb);
2538#elif ARCH_BITS == 32
2539 return ASMAtomicAddU32((uint32_t volatile *)pcb, cb);
2540#else
2541# error "Unsupported ARCH_BITS value"
2542#endif
2543}
2544
2545
2546/**
2547 * Atomically exchanges and adds a value which size might differ between
2548 * platforms or compilers, ordered.
2549 *
2550 * @param pu Pointer to the variable to update.
2551 * @param uNew The value to add to *pu.
2552 * @param puOld Where to store the old value.
2553 */
2554#define ASMAtomicAddSize(pu, uNew, puOld) \
2555 do { \
2556 switch (sizeof(*(pu))) { \
2557 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2558 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2559 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
2560 } \
2561 } while (0)
2562
2563
2564/**
2565 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
2566 *
2567 * @returns The old value.
2568 * @param pu32 Pointer to the value.
2569 * @param u32 Number to subtract.
2570 */
2571DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile *pu32, uint32_t u32)
2572{
2573 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
2574}
2575
2576
2577/**
2578 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
2579 *
2580 * @returns The old value.
2581 * @param pi32 Pointer to the value.
2582 * @param i32 Number to subtract.
2583 */
2584DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
2585{
2586 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
2587}
2588
2589
2590/**
2591 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
2592 *
2593 * @returns The old value.
2594 * @param pu64 Pointer to the value.
2595 * @param u64 Number to subtract.
2596 */
2597DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile *pu64, uint64_t u64)
2598{
2599 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
2600}
2601
2602
2603/**
2604 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
2605 *
2606 * @returns The old value.
2607 * @param pi64 Pointer to the value.
2608 * @param i64 Number to subtract.
2609 */
2610DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile *pi64, int64_t i64)
2611{
2612 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)-i64);
2613}
2614
2615
2616/**
2617 * Atomically exchanges and subtracts to a size_t value, ordered.
2618 *
2619 * @returns The old value.
2620 * @param pcb Pointer to the size_t value.
2621 * @param cb Number to subtract.
2622 */
2623DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile *pcb, size_t cb)
2624{
2625#if ARCH_BITS == 64
2626 return ASMAtomicSubU64((uint64_t volatile *)pcb, cb);
2627#elif ARCH_BITS == 32
2628 return ASMAtomicSubU32((uint32_t volatile *)pcb, cb);
2629#else
2630# error "Unsupported ARCH_BITS value"
2631#endif
2632}
2633
2634
2635/**
2636 * Atomically exchanges and subtracts a value which size might differ between
2637 * platforms or compilers, ordered.
2638 *
2639 * @param pu Pointer to the variable to update.
2640 * @param uNew The value to subtract to *pu.
2641 * @param puOld Where to store the old value.
2642 */
2643#define ASMAtomicSubSize(pu, uNew, puOld) \
2644 do { \
2645 switch (sizeof(*(pu))) { \
2646 case 4: *(uint32_t *)(puOld) = ASMAtomicSubU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2647 case 8: *(uint64_t *)(puOld) = ASMAtomicSubU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2648 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
2649 } \
2650 } while (0)
2651
2652
2653/**
2654 * Atomically increment a 32-bit value, ordered.
2655 *
2656 * @returns The new value.
2657 * @param pu32 Pointer to the value to increment.
2658 */
2659#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2660DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2661#else
2662DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2663{
2664 uint32_t u32;
2665# if RT_INLINE_ASM_USES_INTRIN
2666 u32 = _InterlockedIncrement((long *)pu32);
2667 return u32;
2668
2669# elif RT_INLINE_ASM_GNU_STYLE
2670 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2671 : "=r" (u32),
2672 "=m" (*pu32)
2673 : "0" (1),
2674 "m" (*pu32)
2675 : "memory");
2676 return u32+1;
2677# else
2678 __asm
2679 {
2680 mov eax, 1
2681# ifdef RT_ARCH_AMD64
2682 mov rdx, [pu32]
2683 lock xadd [rdx], eax
2684# else
2685 mov edx, [pu32]
2686 lock xadd [edx], eax
2687# endif
2688 mov u32, eax
2689 }
2690 return u32+1;
2691# endif
2692}
2693#endif
2694
2695
2696/**
2697 * Atomically increment a signed 32-bit value, ordered.
2698 *
2699 * @returns The new value.
2700 * @param pi32 Pointer to the value to increment.
2701 */
2702DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2703{
2704 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2705}
2706
2707
2708/**
2709 * Atomically increment a 64-bit value, ordered.
2710 *
2711 * @returns The new value.
2712 * @param pu64 Pointer to the value to increment.
2713 */
2714#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2715DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64);
2716#else
2717DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64)
2718{
2719# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2720 uint64_t u64;
2721 u64 = _InterlockedIncrement64((__int64 *)pu64);
2722 return u64;
2723
2724# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2725 uint64_t u64;
2726 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2727 : "=r" (u64),
2728 "=m" (*pu64)
2729 : "0" (1),
2730 "m" (*pu64)
2731 : "memory");
2732 return u64 + 1;
2733# else
2734 return ASMAtomicAddU64(pu64, 1) + 1;
2735# endif
2736}
2737#endif
2738
2739
2740/**
2741 * Atomically increment a signed 64-bit value, ordered.
2742 *
2743 * @returns The new value.
2744 * @param pi64 Pointer to the value to increment.
2745 */
2746DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile *pi64)
2747{
2748 return (int64_t)ASMAtomicIncU64((uint64_t volatile *)pi64);
2749}
2750
2751
2752/**
2753 * Atomically increment a size_t value, ordered.
2754 *
2755 * @returns The new value.
2756 * @param pcb Pointer to the value to increment.
2757 */
2758DECLINLINE(int64_t) ASMAtomicIncZ(size_t volatile *pcb)
2759{
2760#if ARCH_BITS == 64
2761 return ASMAtomicIncU64((uint64_t volatile *)pcb);
2762#elif ARCH_BITS == 32
2763 return ASMAtomicIncU32((uint32_t volatile *)pcb);
2764#else
2765# error "Unsupported ARCH_BITS value"
2766#endif
2767}
2768
2769
2770/**
2771 * Atomically decrement an unsigned 32-bit value, ordered.
2772 *
2773 * @returns The new value.
2774 * @param pu32 Pointer to the value to decrement.
2775 */
2776#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2777DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2778#else
2779DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2780{
2781 uint32_t u32;
2782# if RT_INLINE_ASM_USES_INTRIN
2783 u32 = _InterlockedDecrement((long *)pu32);
2784 return u32;
2785
2786# elif RT_INLINE_ASM_GNU_STYLE
2787 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2788 : "=r" (u32),
2789 "=m" (*pu32)
2790 : "0" (-1),
2791 "m" (*pu32)
2792 : "memory");
2793 return u32-1;
2794# else
2795 __asm
2796 {
2797 mov eax, -1
2798# ifdef RT_ARCH_AMD64
2799 mov rdx, [pu32]
2800 lock xadd [rdx], eax
2801# else
2802 mov edx, [pu32]
2803 lock xadd [edx], eax
2804# endif
2805 mov u32, eax
2806 }
2807 return u32-1;
2808# endif
2809}
2810#endif
2811
2812
2813/**
2814 * Atomically decrement a signed 32-bit value, ordered.
2815 *
2816 * @returns The new value.
2817 * @param pi32 Pointer to the value to decrement.
2818 */
2819DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2820{
2821 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2822}
2823
2824
2825/**
2826 * Atomically decrement an unsigned 64-bit value, ordered.
2827 *
2828 * @returns The new value.
2829 * @param pu64 Pointer to the value to decrement.
2830 */
2831#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2832DECLASM(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64);
2833#else
2834DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64)
2835{
2836# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2837 uint64_t u64 = _InterlockedDecrement64((__int64 volatile *)pu64);
2838 return u64;
2839
2840# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2841 uint64_t u64;
2842 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
2843 : "=r" (u64),
2844 "=m" (*pu64)
2845 : "0" (~(uint64_t)0),
2846 "m" (*pu64)
2847 : "memory");
2848 return u64-1;
2849# else
2850 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
2851# endif
2852}
2853#endif
2854
2855
2856/**
2857 * Atomically decrement a signed 64-bit value, ordered.
2858 *
2859 * @returns The new value.
2860 * @param pi64 Pointer to the value to decrement.
2861 */
2862DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile *pi64)
2863{
2864 return (int64_t)ASMAtomicDecU64((uint64_t volatile *)pi64);
2865}
2866
2867
2868/**
2869 * Atomically decrement a size_t value, ordered.
2870 *
2871 * @returns The new value.
2872 * @param pcb Pointer to the value to decrement.
2873 */
2874DECLINLINE(int64_t) ASMAtomicDecZ(size_t volatile *pcb)
2875{
2876#if ARCH_BITS == 64
2877 return ASMAtomicDecU64((uint64_t volatile *)pcb);
2878#elif ARCH_BITS == 32
2879 return ASMAtomicDecU32((uint32_t volatile *)pcb);
2880#else
2881# error "Unsupported ARCH_BITS value"
2882#endif
2883}
2884
2885
2886/**
2887 * Atomically Or an unsigned 32-bit value, ordered.
2888 *
2889 * @param pu32 Pointer to the pointer variable to OR u32 with.
2890 * @param u32 The value to OR *pu32 with.
2891 */
2892#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2893DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2894#else
2895DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2896{
2897# if RT_INLINE_ASM_USES_INTRIN
2898 _InterlockedOr((long volatile *)pu32, (long)u32);
2899
2900# elif RT_INLINE_ASM_GNU_STYLE
2901 __asm__ __volatile__("lock; orl %1, %0\n\t"
2902 : "=m" (*pu32)
2903 : "ir" (u32),
2904 "m" (*pu32));
2905# else
2906 __asm
2907 {
2908 mov eax, [u32]
2909# ifdef RT_ARCH_AMD64
2910 mov rdx, [pu32]
2911 lock or [rdx], eax
2912# else
2913 mov edx, [pu32]
2914 lock or [edx], eax
2915# endif
2916 }
2917# endif
2918}
2919#endif
2920
2921
2922/**
2923 * Atomically Or a signed 32-bit value, ordered.
2924 *
2925 * @param pi32 Pointer to the pointer variable to OR u32 with.
2926 * @param i32 The value to OR *pu32 with.
2927 */
2928DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2929{
2930 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2931}
2932
2933
2934/**
2935 * Atomically Or an unsigned 64-bit value, ordered.
2936 *
2937 * @param pu64 Pointer to the pointer variable to OR u64 with.
2938 * @param u64 The value to OR *pu64 with.
2939 */
2940#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2941DECLASM(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64);
2942#else
2943DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64)
2944{
2945# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2946 _InterlockedOr64((__int64 volatile *)pu64, (__int64)u64);
2947
2948# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2949 __asm__ __volatile__("lock; orq %1, %q0\n\t"
2950 : "=m" (*pu64)
2951 : "r" (u64),
2952 "m" (*pu64));
2953# else
2954 for (;;)
2955 {
2956 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
2957 uint64_t u64New = u64Old | u64;
2958 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2959 break;
2960 ASMNopPause();
2961 }
2962# endif
2963}
2964#endif
2965
2966
2967/**
2968 * Atomically Or a signed 64-bit value, ordered.
2969 *
2970 * @param pi64 Pointer to the pointer variable to OR u64 with.
2971 * @param i64 The value to OR *pu64 with.
2972 */
2973DECLINLINE(void) ASMAtomicOrS64(int64_t volatile *pi64, int64_t i64)
2974{
2975 ASMAtomicOrU64((uint64_t volatile *)pi64, i64);
2976}
2977/**
2978 * Atomically And an unsigned 32-bit value, ordered.
2979 *
2980 * @param pu32 Pointer to the pointer variable to AND u32 with.
2981 * @param u32 The value to AND *pu32 with.
2982 */
2983#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2984DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
2985#else
2986DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
2987{
2988# if RT_INLINE_ASM_USES_INTRIN
2989 _InterlockedAnd((long volatile *)pu32, u32);
2990
2991# elif RT_INLINE_ASM_GNU_STYLE
2992 __asm__ __volatile__("lock; andl %1, %0\n\t"
2993 : "=m" (*pu32)
2994 : "ir" (u32),
2995 "m" (*pu32));
2996# else
2997 __asm
2998 {
2999 mov eax, [u32]
3000# ifdef RT_ARCH_AMD64
3001 mov rdx, [pu32]
3002 lock and [rdx], eax
3003# else
3004 mov edx, [pu32]
3005 lock and [edx], eax
3006# endif
3007 }
3008# endif
3009}
3010#endif
3011
3012
3013/**
3014 * Atomically And a signed 32-bit value, ordered.
3015 *
3016 * @param pi32 Pointer to the pointer variable to AND i32 with.
3017 * @param i32 The value to AND *pi32 with.
3018 */
3019DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3020{
3021 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3022}
3023
3024
3025/**
3026 * Atomically And an unsigned 64-bit value, ordered.
3027 *
3028 * @param pu64 Pointer to the pointer variable to AND u64 with.
3029 * @param u64 The value to AND *pu64 with.
3030 */
3031#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3032DECLASM(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64);
3033#else
3034DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64)
3035{
3036# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3037 _InterlockedAnd64((__int64 volatile *)pu64, u64);
3038
3039# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3040 __asm__ __volatile__("lock; andq %1, %0\n\t"
3041 : "=m" (*pu64)
3042 : "r" (u64),
3043 "m" (*pu64));
3044# else
3045 for (;;)
3046 {
3047 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3048 uint64_t u64New = u64Old & u64;
3049 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3050 break;
3051 ASMNopPause();
3052 }
3053# endif
3054}
3055#endif
3056
3057
3058/**
3059 * Atomically And a signed 64-bit value, ordered.
3060 *
3061 * @param pi64 Pointer to the pointer variable to AND i64 with.
3062 * @param i64 The value to AND *pi64 with.
3063 */
3064DECLINLINE(void) ASMAtomicAndS64(int64_t volatile *pi64, int64_t i64)
3065{
3066 ASMAtomicAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3067}
3068
3069
3070
3071/** @def RT_ASM_PAGE_SIZE
3072 * We try avoid dragging in iprt/param.h here.
3073 * @internal
3074 */
3075#if defined(RT_ARCH_SPARC64)
3076# define RT_ASM_PAGE_SIZE 0x2000
3077# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3078# if PAGE_SIZE != 0x2000
3079# error "PAGE_SIZE is not 0x2000!"
3080# endif
3081# endif
3082#else
3083# define RT_ASM_PAGE_SIZE 0x1000
3084# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3085# if PAGE_SIZE != 0x1000
3086# error "PAGE_SIZE is not 0x1000!"
3087# endif
3088# endif
3089#endif
3090
3091/**
3092 * Zeros a 4K memory page.
3093 *
3094 * @param pv Pointer to the memory block. This must be page aligned.
3095 */
3096#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3097DECLASM(void) ASMMemZeroPage(volatile void *pv);
3098# else
3099DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
3100{
3101# if RT_INLINE_ASM_USES_INTRIN
3102# ifdef RT_ARCH_AMD64
3103 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
3104# else
3105 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
3106# endif
3107
3108# elif RT_INLINE_ASM_GNU_STYLE
3109 RTCCUINTREG uDummy;
3110# ifdef RT_ARCH_AMD64
3111 __asm__ __volatile__("rep stosq"
3112 : "=D" (pv),
3113 "=c" (uDummy)
3114 : "0" (pv),
3115 "c" (RT_ASM_PAGE_SIZE >> 3),
3116 "a" (0)
3117 : "memory");
3118# else
3119 __asm__ __volatile__("rep stosl"
3120 : "=D" (pv),
3121 "=c" (uDummy)
3122 : "0" (pv),
3123 "c" (RT_ASM_PAGE_SIZE >> 2),
3124 "a" (0)
3125 : "memory");
3126# endif
3127# else
3128 __asm
3129 {
3130# ifdef RT_ARCH_AMD64
3131 xor rax, rax
3132 mov ecx, 0200h
3133 mov rdi, [pv]
3134 rep stosq
3135# else
3136 xor eax, eax
3137 mov ecx, 0400h
3138 mov edi, [pv]
3139 rep stosd
3140# endif
3141 }
3142# endif
3143}
3144# endif
3145
3146
3147/**
3148 * Zeros a memory block with a 32-bit aligned size.
3149 *
3150 * @param pv Pointer to the memory block.
3151 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3152 */
3153#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3154DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3155#else
3156DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3157{
3158# if RT_INLINE_ASM_USES_INTRIN
3159# ifdef RT_ARCH_AMD64
3160 if (!(cb & 7))
3161 __stosq((unsigned __int64 *)pv, 0, cb / 8);
3162 else
3163# endif
3164 __stosd((unsigned long *)pv, 0, cb / 4);
3165
3166# elif RT_INLINE_ASM_GNU_STYLE
3167 __asm__ __volatile__("rep stosl"
3168 : "=D" (pv),
3169 "=c" (cb)
3170 : "0" (pv),
3171 "1" (cb >> 2),
3172 "a" (0)
3173 : "memory");
3174# else
3175 __asm
3176 {
3177 xor eax, eax
3178# ifdef RT_ARCH_AMD64
3179 mov rcx, [cb]
3180 shr rcx, 2
3181 mov rdi, [pv]
3182# else
3183 mov ecx, [cb]
3184 shr ecx, 2
3185 mov edi, [pv]
3186# endif
3187 rep stosd
3188 }
3189# endif
3190}
3191#endif
3192
3193
3194/**
3195 * Fills a memory block with a 32-bit aligned size.
3196 *
3197 * @param pv Pointer to the memory block.
3198 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3199 * @param u32 The value to fill with.
3200 */
3201#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3202DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3203#else
3204DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3205{
3206# if RT_INLINE_ASM_USES_INTRIN
3207# ifdef RT_ARCH_AMD64
3208 if (!(cb & 7))
3209 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3210 else
3211# endif
3212 __stosd((unsigned long *)pv, u32, cb / 4);
3213
3214# elif RT_INLINE_ASM_GNU_STYLE
3215 __asm__ __volatile__("rep stosl"
3216 : "=D" (pv),
3217 "=c" (cb)
3218 : "0" (pv),
3219 "1" (cb >> 2),
3220 "a" (u32)
3221 : "memory");
3222# else
3223 __asm
3224 {
3225# ifdef RT_ARCH_AMD64
3226 mov rcx, [cb]
3227 shr rcx, 2
3228 mov rdi, [pv]
3229# else
3230 mov ecx, [cb]
3231 shr ecx, 2
3232 mov edi, [pv]
3233# endif
3234 mov eax, [u32]
3235 rep stosd
3236 }
3237# endif
3238}
3239#endif
3240
3241
3242/**
3243 * Checks if a memory page is all zeros.
3244 *
3245 * @returns true / false.
3246 *
3247 * @param pvPage Pointer to the page. Must be aligned on 16 byte
3248 * boundary
3249 */
3250DECLINLINE(bool) ASMMemIsZeroPage(void const *pvPage)
3251{
3252# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
3253 union { RTCCUINTREG r; bool f; } uAX;
3254 RTCCUINTREG xCX, xDI;
3255 Assert(!((uintptr_t)pvPage & 15));
3256 __asm__ __volatile__("repe; "
3257# ifdef RT_ARCH_AMD64
3258 "scasq\n\t"
3259# else
3260 "scasl\n\t"
3261# endif
3262 "setnc %%al\n\t"
3263 : "=&c" (xCX),
3264 "=&D" (xDI),
3265 "=&a" (uAX.r)
3266 : "mr" (pvPage),
3267# ifdef RT_ARCH_AMD64
3268 "0" (RT_ASM_PAGE_SIZE/8),
3269# else
3270 "0" (RT_ASM_PAGE_SIZE/4),
3271# endif
3272 "1" (pvPage),
3273 "2" (0));
3274 return uAX.f;
3275# else
3276 uintptr_t const *puPtr = (uintptr_t const *)pvPage;
3277 int cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
3278 Assert(!((uintptr_t)pvPage & 15));
3279 for (;;)
3280 {
3281 if (puPtr[0]) return false;
3282 if (puPtr[4]) return false;
3283
3284 if (puPtr[2]) return false;
3285 if (puPtr[6]) return false;
3286
3287 if (puPtr[1]) return false;
3288 if (puPtr[5]) return false;
3289
3290 if (puPtr[3]) return false;
3291 if (puPtr[7]) return false;
3292
3293 if (!--cLeft)
3294 return true;
3295 puPtr += 8;
3296 }
3297 return true;
3298# endif
3299}
3300
3301
3302/**
3303 * Checks if a memory block is filled with the specified byte.
3304 *
3305 * This is a sort of inverted memchr.
3306 *
3307 * @returns Pointer to the byte which doesn't equal u8.
3308 * @returns NULL if all equal to u8.
3309 *
3310 * @param pv Pointer to the memory block.
3311 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3312 * @param u8 The value it's supposed to be filled with.
3313 *
3314 * @todo Fix name, it is a predicate function but it's not returning boolean!
3315 */
3316DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
3317{
3318/** @todo rewrite this in inline assembly? */
3319 uint8_t const *pb = (uint8_t const *)pv;
3320 for (; cb; cb--, pb++)
3321 if (RT_UNLIKELY(*pb != u8))
3322 return (void *)pb;
3323 return NULL;
3324}
3325
3326
3327/**
3328 * Checks if a memory block is filled with the specified 32-bit value.
3329 *
3330 * This is a sort of inverted memchr.
3331 *
3332 * @returns Pointer to the first value which doesn't equal u32.
3333 * @returns NULL if all equal to u32.
3334 *
3335 * @param pv Pointer to the memory block.
3336 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3337 * @param u32 The value it's supposed to be filled with.
3338 *
3339 * @todo Fix name, it is a predicate function but it's not returning boolean!
3340 */
3341DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
3342{
3343/** @todo rewrite this in inline assembly? */
3344 uint32_t const *pu32 = (uint32_t const *)pv;
3345 for (; cb; cb -= 4, pu32++)
3346 if (RT_UNLIKELY(*pu32 != u32))
3347 return (uint32_t *)pu32;
3348 return NULL;
3349}
3350
3351
3352/**
3353 * Probes a byte pointer for read access.
3354 *
3355 * While the function will not fault if the byte is not read accessible,
3356 * the idea is to do this in a safe place like before acquiring locks
3357 * and such like.
3358 *
3359 * Also, this functions guarantees that an eager compiler is not going
3360 * to optimize the probing away.
3361 *
3362 * @param pvByte Pointer to the byte.
3363 */
3364#if RT_INLINE_ASM_EXTERNAL
3365DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
3366#else
3367DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
3368{
3369 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3370 uint8_t u8;
3371# if RT_INLINE_ASM_GNU_STYLE
3372 __asm__ __volatile__("movb (%1), %0\n\t"
3373 : "=r" (u8)
3374 : "r" (pvByte));
3375# else
3376 __asm
3377 {
3378# ifdef RT_ARCH_AMD64
3379 mov rax, [pvByte]
3380 mov al, [rax]
3381# else
3382 mov eax, [pvByte]
3383 mov al, [eax]
3384# endif
3385 mov [u8], al
3386 }
3387# endif
3388 return u8;
3389}
3390#endif
3391
3392/**
3393 * Probes a buffer for read access page by page.
3394 *
3395 * While the function will fault if the buffer is not fully read
3396 * accessible, the idea is to do this in a safe place like before
3397 * acquiring locks and such like.
3398 *
3399 * Also, this functions guarantees that an eager compiler is not going
3400 * to optimize the probing away.
3401 *
3402 * @param pvBuf Pointer to the buffer.
3403 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
3404 */
3405DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
3406{
3407 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3408 /* the first byte */
3409 const uint8_t *pu8 = (const uint8_t *)pvBuf;
3410 ASMProbeReadByte(pu8);
3411
3412 /* the pages in between pages. */
3413 while (cbBuf > RT_ASM_PAGE_SIZE)
3414 {
3415 ASMProbeReadByte(pu8);
3416 cbBuf -= RT_ASM_PAGE_SIZE;
3417 pu8 += RT_ASM_PAGE_SIZE;
3418 }
3419
3420 /* the last byte */
3421 ASMProbeReadByte(pu8 + cbBuf - 1);
3422}
3423
3424
3425
3426/** @defgroup grp_inline_bits Bit Operations
3427 * @{
3428 */
3429
3430
3431/**
3432 * Sets a bit in a bitmap.
3433 *
3434 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
3435 * @param iBit The bit to set.
3436 *
3437 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3438 * However, doing so will yield better performance as well as avoiding
3439 * traps accessing the last bits in the bitmap.
3440 */
3441#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3442DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
3443#else
3444DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
3445{
3446# if RT_INLINE_ASM_USES_INTRIN
3447 _bittestandset((long *)pvBitmap, iBit);
3448
3449# elif RT_INLINE_ASM_GNU_STYLE
3450 __asm__ __volatile__("btsl %1, %0"
3451 : "=m" (*(volatile long *)pvBitmap)
3452 : "Ir" (iBit),
3453 "m" (*(volatile long *)pvBitmap)
3454 : "memory");
3455# else
3456 __asm
3457 {
3458# ifdef RT_ARCH_AMD64
3459 mov rax, [pvBitmap]
3460 mov edx, [iBit]
3461 bts [rax], edx
3462# else
3463 mov eax, [pvBitmap]
3464 mov edx, [iBit]
3465 bts [eax], edx
3466# endif
3467 }
3468# endif
3469}
3470#endif
3471
3472
3473/**
3474 * Atomically sets a bit in a bitmap, ordered.
3475 *
3476 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3477 * the memory access isn't atomic!
3478 * @param iBit The bit to set.
3479 */
3480#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3481DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
3482#else
3483DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
3484{
3485 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3486# if RT_INLINE_ASM_USES_INTRIN
3487 _interlockedbittestandset((long *)pvBitmap, iBit);
3488# elif RT_INLINE_ASM_GNU_STYLE
3489 __asm__ __volatile__("lock; btsl %1, %0"
3490 : "=m" (*(volatile long *)pvBitmap)
3491 : "Ir" (iBit),
3492 "m" (*(volatile long *)pvBitmap)
3493 : "memory");
3494# else
3495 __asm
3496 {
3497# ifdef RT_ARCH_AMD64
3498 mov rax, [pvBitmap]
3499 mov edx, [iBit]
3500 lock bts [rax], edx
3501# else
3502 mov eax, [pvBitmap]
3503 mov edx, [iBit]
3504 lock bts [eax], edx
3505# endif
3506 }
3507# endif
3508}
3509#endif
3510
3511
3512/**
3513 * Clears a bit in a bitmap.
3514 *
3515 * @param pvBitmap Pointer to the bitmap.
3516 * @param iBit The bit to clear.
3517 *
3518 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3519 * However, doing so will yield better performance as well as avoiding
3520 * traps accessing the last bits in the bitmap.
3521 */
3522#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3523DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
3524#else
3525DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
3526{
3527# if RT_INLINE_ASM_USES_INTRIN
3528 _bittestandreset((long *)pvBitmap, iBit);
3529
3530# elif RT_INLINE_ASM_GNU_STYLE
3531 __asm__ __volatile__("btrl %1, %0"
3532 : "=m" (*(volatile long *)pvBitmap)
3533 : "Ir" (iBit),
3534 "m" (*(volatile long *)pvBitmap)
3535 : "memory");
3536# else
3537 __asm
3538 {
3539# ifdef RT_ARCH_AMD64
3540 mov rax, [pvBitmap]
3541 mov edx, [iBit]
3542 btr [rax], edx
3543# else
3544 mov eax, [pvBitmap]
3545 mov edx, [iBit]
3546 btr [eax], edx
3547# endif
3548 }
3549# endif
3550}
3551#endif
3552
3553
3554/**
3555 * Atomically clears a bit in a bitmap, ordered.
3556 *
3557 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3558 * the memory access isn't atomic!
3559 * @param iBit The bit to toggle set.
3560 * @remarks No memory barrier, take care on smp.
3561 */
3562#if RT_INLINE_ASM_EXTERNAL
3563DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
3564#else
3565DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
3566{
3567 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3568# if RT_INLINE_ASM_GNU_STYLE
3569 __asm__ __volatile__("lock; btrl %1, %0"
3570 : "=m" (*(volatile long *)pvBitmap)
3571 : "Ir" (iBit),
3572 "m" (*(volatile long *)pvBitmap)
3573 : "memory");
3574# else
3575 __asm
3576 {
3577# ifdef RT_ARCH_AMD64
3578 mov rax, [pvBitmap]
3579 mov edx, [iBit]
3580 lock btr [rax], edx
3581# else
3582 mov eax, [pvBitmap]
3583 mov edx, [iBit]
3584 lock btr [eax], edx
3585# endif
3586 }
3587# endif
3588}
3589#endif
3590
3591
3592/**
3593 * Toggles a bit in a bitmap.
3594 *
3595 * @param pvBitmap Pointer to the bitmap.
3596 * @param iBit The bit to toggle.
3597 *
3598 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3599 * However, doing so will yield better performance as well as avoiding
3600 * traps accessing the last bits in the bitmap.
3601 */
3602#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3603DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
3604#else
3605DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
3606{
3607# if RT_INLINE_ASM_USES_INTRIN
3608 _bittestandcomplement((long *)pvBitmap, iBit);
3609# elif RT_INLINE_ASM_GNU_STYLE
3610 __asm__ __volatile__("btcl %1, %0"
3611 : "=m" (*(volatile long *)pvBitmap)
3612 : "Ir" (iBit),
3613 "m" (*(volatile long *)pvBitmap)
3614 : "memory");
3615# else
3616 __asm
3617 {
3618# ifdef RT_ARCH_AMD64
3619 mov rax, [pvBitmap]
3620 mov edx, [iBit]
3621 btc [rax], edx
3622# else
3623 mov eax, [pvBitmap]
3624 mov edx, [iBit]
3625 btc [eax], edx
3626# endif
3627 }
3628# endif
3629}
3630#endif
3631
3632
3633/**
3634 * Atomically toggles a bit in a bitmap, ordered.
3635 *
3636 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3637 * the memory access isn't atomic!
3638 * @param iBit The bit to test and set.
3639 */
3640#if RT_INLINE_ASM_EXTERNAL
3641DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
3642#else
3643DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
3644{
3645 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3646# if RT_INLINE_ASM_GNU_STYLE
3647 __asm__ __volatile__("lock; btcl %1, %0"
3648 : "=m" (*(volatile long *)pvBitmap)
3649 : "Ir" (iBit),
3650 "m" (*(volatile long *)pvBitmap)
3651 : "memory");
3652# else
3653 __asm
3654 {
3655# ifdef RT_ARCH_AMD64
3656 mov rax, [pvBitmap]
3657 mov edx, [iBit]
3658 lock btc [rax], edx
3659# else
3660 mov eax, [pvBitmap]
3661 mov edx, [iBit]
3662 lock btc [eax], edx
3663# endif
3664 }
3665# endif
3666}
3667#endif
3668
3669
3670/**
3671 * Tests and sets a bit in a bitmap.
3672 *
3673 * @returns true if the bit was set.
3674 * @returns false if the bit was clear.
3675 *
3676 * @param pvBitmap Pointer to the bitmap.
3677 * @param iBit The bit to test and set.
3678 *
3679 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3680 * However, doing so will yield better performance as well as avoiding
3681 * traps accessing the last bits in the bitmap.
3682 */
3683#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3684DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3685#else
3686DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3687{
3688 union { bool f; uint32_t u32; uint8_t u8; } rc;
3689# if RT_INLINE_ASM_USES_INTRIN
3690 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
3691
3692# elif RT_INLINE_ASM_GNU_STYLE
3693 __asm__ __volatile__("btsl %2, %1\n\t"
3694 "setc %b0\n\t"
3695 "andl $1, %0\n\t"
3696 : "=q" (rc.u32),
3697 "=m" (*(volatile long *)pvBitmap)
3698 : "Ir" (iBit),
3699 "m" (*(volatile long *)pvBitmap)
3700 : "memory");
3701# else
3702 __asm
3703 {
3704 mov edx, [iBit]
3705# ifdef RT_ARCH_AMD64
3706 mov rax, [pvBitmap]
3707 bts [rax], edx
3708# else
3709 mov eax, [pvBitmap]
3710 bts [eax], edx
3711# endif
3712 setc al
3713 and eax, 1
3714 mov [rc.u32], eax
3715 }
3716# endif
3717 return rc.f;
3718}
3719#endif
3720
3721
3722/**
3723 * Atomically tests and sets a bit in a bitmap, ordered.
3724 *
3725 * @returns true if the bit was set.
3726 * @returns false if the bit was clear.
3727 *
3728 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3729 * the memory access isn't atomic!
3730 * @param iBit The bit to set.
3731 */
3732#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3733DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3734#else
3735DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3736{
3737 union { bool f; uint32_t u32; uint8_t u8; } rc;
3738 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3739# if RT_INLINE_ASM_USES_INTRIN
3740 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
3741# elif RT_INLINE_ASM_GNU_STYLE
3742 __asm__ __volatile__("lock; btsl %2, %1\n\t"
3743 "setc %b0\n\t"
3744 "andl $1, %0\n\t"
3745 : "=q" (rc.u32),
3746 "=m" (*(volatile long *)pvBitmap)
3747 : "Ir" (iBit),
3748 "m" (*(volatile long *)pvBitmap)
3749 : "memory");
3750# else
3751 __asm
3752 {
3753 mov edx, [iBit]
3754# ifdef RT_ARCH_AMD64
3755 mov rax, [pvBitmap]
3756 lock bts [rax], edx
3757# else
3758 mov eax, [pvBitmap]
3759 lock bts [eax], edx
3760# endif
3761 setc al
3762 and eax, 1
3763 mov [rc.u32], eax
3764 }
3765# endif
3766 return rc.f;
3767}
3768#endif
3769
3770
3771/**
3772 * Tests and clears a bit in a bitmap.
3773 *
3774 * @returns true if the bit was set.
3775 * @returns false if the bit was clear.
3776 *
3777 * @param pvBitmap Pointer to the bitmap.
3778 * @param iBit The bit to test and clear.
3779 *
3780 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3781 * However, doing so will yield better performance as well as avoiding
3782 * traps accessing the last bits in the bitmap.
3783 */
3784#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3785DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3786#else
3787DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3788{
3789 union { bool f; uint32_t u32; uint8_t u8; } rc;
3790# if RT_INLINE_ASM_USES_INTRIN
3791 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
3792
3793# elif RT_INLINE_ASM_GNU_STYLE
3794 __asm__ __volatile__("btrl %2, %1\n\t"
3795 "setc %b0\n\t"
3796 "andl $1, %0\n\t"
3797 : "=q" (rc.u32),
3798 "=m" (*(volatile long *)pvBitmap)
3799 : "Ir" (iBit),
3800 "m" (*(volatile long *)pvBitmap)
3801 : "memory");
3802# else
3803 __asm
3804 {
3805 mov edx, [iBit]
3806# ifdef RT_ARCH_AMD64
3807 mov rax, [pvBitmap]
3808 btr [rax], edx
3809# else
3810 mov eax, [pvBitmap]
3811 btr [eax], edx
3812# endif
3813 setc al
3814 and eax, 1
3815 mov [rc.u32], eax
3816 }
3817# endif
3818 return rc.f;
3819}
3820#endif
3821
3822
3823/**
3824 * Atomically tests and clears a bit in a bitmap, ordered.
3825 *
3826 * @returns true if the bit was set.
3827 * @returns false if the bit was clear.
3828 *
3829 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3830 * the memory access isn't atomic!
3831 * @param iBit The bit to test and clear.
3832 *
3833 * @remarks No memory barrier, take care on smp.
3834 */
3835#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3836DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3837#else
3838DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3839{
3840 union { bool f; uint32_t u32; uint8_t u8; } rc;
3841 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3842# if RT_INLINE_ASM_USES_INTRIN
3843 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
3844
3845# elif RT_INLINE_ASM_GNU_STYLE
3846 __asm__ __volatile__("lock; btrl %2, %1\n\t"
3847 "setc %b0\n\t"
3848 "andl $1, %0\n\t"
3849 : "=q" (rc.u32),
3850 "=m" (*(volatile long *)pvBitmap)
3851 : "Ir" (iBit),
3852 "m" (*(volatile long *)pvBitmap)
3853 : "memory");
3854# else
3855 __asm
3856 {
3857 mov edx, [iBit]
3858# ifdef RT_ARCH_AMD64
3859 mov rax, [pvBitmap]
3860 lock btr [rax], edx
3861# else
3862 mov eax, [pvBitmap]
3863 lock btr [eax], edx
3864# endif
3865 setc al
3866 and eax, 1
3867 mov [rc.u32], eax
3868 }
3869# endif
3870 return rc.f;
3871}
3872#endif
3873
3874
3875/**
3876 * Tests and toggles a bit in a bitmap.
3877 *
3878 * @returns true if the bit was set.
3879 * @returns false if the bit was clear.
3880 *
3881 * @param pvBitmap Pointer to the bitmap.
3882 * @param iBit The bit to test and toggle.
3883 *
3884 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3885 * However, doing so will yield better performance as well as avoiding
3886 * traps accessing the last bits in the bitmap.
3887 */
3888#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3889DECLASM(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3890#else
3891DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3892{
3893 union { bool f; uint32_t u32; uint8_t u8; } rc;
3894# if RT_INLINE_ASM_USES_INTRIN
3895 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
3896
3897# elif RT_INLINE_ASM_GNU_STYLE
3898 __asm__ __volatile__("btcl %2, %1\n\t"
3899 "setc %b0\n\t"
3900 "andl $1, %0\n\t"
3901 : "=q" (rc.u32),
3902 "=m" (*(volatile long *)pvBitmap)
3903 : "Ir" (iBit),
3904 "m" (*(volatile long *)pvBitmap)
3905 : "memory");
3906# else
3907 __asm
3908 {
3909 mov edx, [iBit]
3910# ifdef RT_ARCH_AMD64
3911 mov rax, [pvBitmap]
3912 btc [rax], edx
3913# else
3914 mov eax, [pvBitmap]
3915 btc [eax], edx
3916# endif
3917 setc al
3918 and eax, 1
3919 mov [rc.u32], eax
3920 }
3921# endif
3922 return rc.f;
3923}
3924#endif
3925
3926
3927/**
3928 * Atomically tests and toggles a bit in a bitmap, ordered.
3929 *
3930 * @returns true if the bit was set.
3931 * @returns false if the bit was clear.
3932 *
3933 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3934 * the memory access isn't atomic!
3935 * @param iBit The bit to test and toggle.
3936 */
3937#if RT_INLINE_ASM_EXTERNAL
3938DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3939#else
3940DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3941{
3942 union { bool f; uint32_t u32; uint8_t u8; } rc;
3943 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3944# if RT_INLINE_ASM_GNU_STYLE
3945 __asm__ __volatile__("lock; btcl %2, %1\n\t"
3946 "setc %b0\n\t"
3947 "andl $1, %0\n\t"
3948 : "=q" (rc.u32),
3949 "=m" (*(volatile long *)pvBitmap)
3950 : "Ir" (iBit),
3951 "m" (*(volatile long *)pvBitmap)
3952 : "memory");
3953# else
3954 __asm
3955 {
3956 mov edx, [iBit]
3957# ifdef RT_ARCH_AMD64
3958 mov rax, [pvBitmap]
3959 lock btc [rax], edx
3960# else
3961 mov eax, [pvBitmap]
3962 lock btc [eax], edx
3963# endif
3964 setc al
3965 and eax, 1
3966 mov [rc.u32], eax
3967 }
3968# endif
3969 return rc.f;
3970}
3971#endif
3972
3973
3974/**
3975 * Tests if a bit in a bitmap is set.
3976 *
3977 * @returns true if the bit is set.
3978 * @returns false if the bit is clear.
3979 *
3980 * @param pvBitmap Pointer to the bitmap.
3981 * @param iBit The bit to test.
3982 *
3983 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3984 * However, doing so will yield better performance as well as avoiding
3985 * traps accessing the last bits in the bitmap.
3986 */
3987#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3988DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
3989#else
3990DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
3991{
3992 union { bool f; uint32_t u32; uint8_t u8; } rc;
3993# if RT_INLINE_ASM_USES_INTRIN
3994 rc.u32 = _bittest((long *)pvBitmap, iBit);
3995# elif RT_INLINE_ASM_GNU_STYLE
3996
3997 __asm__ __volatile__("btl %2, %1\n\t"
3998 "setc %b0\n\t"
3999 "andl $1, %0\n\t"
4000 : "=q" (rc.u32)
4001 : "m" (*(const volatile long *)pvBitmap),
4002 "Ir" (iBit)
4003 : "memory");
4004# else
4005 __asm
4006 {
4007 mov edx, [iBit]
4008# ifdef RT_ARCH_AMD64
4009 mov rax, [pvBitmap]
4010 bt [rax], edx
4011# else
4012 mov eax, [pvBitmap]
4013 bt [eax], edx
4014# endif
4015 setc al
4016 and eax, 1
4017 mov [rc.u32], eax
4018 }
4019# endif
4020 return rc.f;
4021}
4022#endif
4023
4024
4025/**
4026 * Clears a bit range within a bitmap.
4027 *
4028 * @param pvBitmap Pointer to the bitmap.
4029 * @param iBitStart The First bit to clear.
4030 * @param iBitEnd The first bit not to clear.
4031 */
4032DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4033{
4034 if (iBitStart < iBitEnd)
4035 {
4036 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4037 int iStart = iBitStart & ~31;
4038 int iEnd = iBitEnd & ~31;
4039 if (iStart == iEnd)
4040 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
4041 else
4042 {
4043 /* bits in first dword. */
4044 if (iBitStart & 31)
4045 {
4046 *pu32 &= (1 << (iBitStart & 31)) - 1;
4047 pu32++;
4048 iBitStart = iStart + 32;
4049 }
4050
4051 /* whole dword. */
4052 if (iBitStart != iEnd)
4053 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4054
4055 /* bits in last dword. */
4056 if (iBitEnd & 31)
4057 {
4058 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4059 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
4060 }
4061 }
4062 }
4063}
4064
4065
4066/**
4067 * Sets a bit range within a bitmap.
4068 *
4069 * @param pvBitmap Pointer to the bitmap.
4070 * @param iBitStart The First bit to set.
4071 * @param iBitEnd The first bit not to set.
4072 */
4073DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4074{
4075 if (iBitStart < iBitEnd)
4076 {
4077 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4078 int iStart = iBitStart & ~31;
4079 int iEnd = iBitEnd & ~31;
4080 if (iStart == iEnd)
4081 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
4082 else
4083 {
4084 /* bits in first dword. */
4085 if (iBitStart & 31)
4086 {
4087 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
4088 pu32++;
4089 iBitStart = iStart + 32;
4090 }
4091
4092 /* whole dword. */
4093 if (iBitStart != iEnd)
4094 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
4095
4096 /* bits in last dword. */
4097 if (iBitEnd & 31)
4098 {
4099 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4100 *pu32 |= (1 << (iBitEnd & 31)) - 1;
4101 }
4102 }
4103 }
4104}
4105
4106
4107/**
4108 * Finds the first clear bit in a bitmap.
4109 *
4110 * @returns Index of the first zero bit.
4111 * @returns -1 if no clear bit was found.
4112 * @param pvBitmap Pointer to the bitmap.
4113 * @param cBits The number of bits in the bitmap. Multiple of 32.
4114 */
4115#if RT_INLINE_ASM_EXTERNAL
4116DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
4117#else
4118DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
4119{
4120 if (cBits)
4121 {
4122 int32_t iBit;
4123# if RT_INLINE_ASM_GNU_STYLE
4124 RTCCUINTREG uEAX, uECX, uEDI;
4125 cBits = RT_ALIGN_32(cBits, 32);
4126 __asm__ __volatile__("repe; scasl\n\t"
4127 "je 1f\n\t"
4128# ifdef RT_ARCH_AMD64
4129 "lea -4(%%rdi), %%rdi\n\t"
4130 "xorl (%%rdi), %%eax\n\t"
4131 "subq %5, %%rdi\n\t"
4132# else
4133 "lea -4(%%edi), %%edi\n\t"
4134 "xorl (%%edi), %%eax\n\t"
4135 "subl %5, %%edi\n\t"
4136# endif
4137 "shll $3, %%edi\n\t"
4138 "bsfl %%eax, %%edx\n\t"
4139 "addl %%edi, %%edx\n\t"
4140 "1:\t\n"
4141 : "=d" (iBit),
4142 "=&c" (uECX),
4143 "=&D" (uEDI),
4144 "=&a" (uEAX)
4145 : "0" (0xffffffff),
4146 "mr" (pvBitmap),
4147 "1" (cBits >> 5),
4148 "2" (pvBitmap),
4149 "3" (0xffffffff));
4150# else
4151 cBits = RT_ALIGN_32(cBits, 32);
4152 __asm
4153 {
4154# ifdef RT_ARCH_AMD64
4155 mov rdi, [pvBitmap]
4156 mov rbx, rdi
4157# else
4158 mov edi, [pvBitmap]
4159 mov ebx, edi
4160# endif
4161 mov edx, 0ffffffffh
4162 mov eax, edx
4163 mov ecx, [cBits]
4164 shr ecx, 5
4165 repe scasd
4166 je done
4167
4168# ifdef RT_ARCH_AMD64
4169 lea rdi, [rdi - 4]
4170 xor eax, [rdi]
4171 sub rdi, rbx
4172# else
4173 lea edi, [edi - 4]
4174 xor eax, [edi]
4175 sub edi, ebx
4176# endif
4177 shl edi, 3
4178 bsf edx, eax
4179 add edx, edi
4180 done:
4181 mov [iBit], edx
4182 }
4183# endif
4184 return iBit;
4185 }
4186 return -1;
4187}
4188#endif
4189
4190
4191/**
4192 * Finds the next clear bit in a bitmap.
4193 *
4194 * @returns Index of the first zero bit.
4195 * @returns -1 if no clear bit was found.
4196 * @param pvBitmap Pointer to the bitmap.
4197 * @param cBits The number of bits in the bitmap. Multiple of 32.
4198 * @param iBitPrev The bit returned from the last search.
4199 * The search will start at iBitPrev + 1.
4200 */
4201#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4202DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4203#else
4204DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4205{
4206 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4207 int iBit = ++iBitPrev & 31;
4208 if (iBit)
4209 {
4210 /*
4211 * Inspect the 32-bit word containing the unaligned bit.
4212 */
4213 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
4214
4215# if RT_INLINE_ASM_USES_INTRIN
4216 unsigned long ulBit = 0;
4217 if (_BitScanForward(&ulBit, u32))
4218 return ulBit + iBitPrev;
4219# else
4220# if RT_INLINE_ASM_GNU_STYLE
4221 __asm__ __volatile__("bsf %1, %0\n\t"
4222 "jnz 1f\n\t"
4223 "movl $-1, %0\n\t"
4224 "1:\n\t"
4225 : "=r" (iBit)
4226 : "r" (u32));
4227# else
4228 __asm
4229 {
4230 mov edx, [u32]
4231 bsf eax, edx
4232 jnz done
4233 mov eax, 0ffffffffh
4234 done:
4235 mov [iBit], eax
4236 }
4237# endif
4238 if (iBit >= 0)
4239 return iBit + iBitPrev;
4240# endif
4241
4242 /*
4243 * Skip ahead and see if there is anything left to search.
4244 */
4245 iBitPrev |= 31;
4246 iBitPrev++;
4247 if (cBits <= (uint32_t)iBitPrev)
4248 return -1;
4249 }
4250
4251 /*
4252 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4253 */
4254 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4255 if (iBit >= 0)
4256 iBit += iBitPrev;
4257 return iBit;
4258}
4259#endif
4260
4261
4262/**
4263 * Finds the first set bit in a bitmap.
4264 *
4265 * @returns Index of the first set bit.
4266 * @returns -1 if no clear bit was found.
4267 * @param pvBitmap Pointer to the bitmap.
4268 * @param cBits The number of bits in the bitmap. Multiple of 32.
4269 */
4270#if RT_INLINE_ASM_EXTERNAL
4271DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
4272#else
4273DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
4274{
4275 if (cBits)
4276 {
4277 int32_t iBit;
4278# if RT_INLINE_ASM_GNU_STYLE
4279 RTCCUINTREG uEAX, uECX, uEDI;
4280 cBits = RT_ALIGN_32(cBits, 32);
4281 __asm__ __volatile__("repe; scasl\n\t"
4282 "je 1f\n\t"
4283# ifdef RT_ARCH_AMD64
4284 "lea -4(%%rdi), %%rdi\n\t"
4285 "movl (%%rdi), %%eax\n\t"
4286 "subq %5, %%rdi\n\t"
4287# else
4288 "lea -4(%%edi), %%edi\n\t"
4289 "movl (%%edi), %%eax\n\t"
4290 "subl %5, %%edi\n\t"
4291# endif
4292 "shll $3, %%edi\n\t"
4293 "bsfl %%eax, %%edx\n\t"
4294 "addl %%edi, %%edx\n\t"
4295 "1:\t\n"
4296 : "=d" (iBit),
4297 "=&c" (uECX),
4298 "=&D" (uEDI),
4299 "=&a" (uEAX)
4300 : "0" (0xffffffff),
4301 "mr" (pvBitmap),
4302 "1" (cBits >> 5),
4303 "2" (pvBitmap),
4304 "3" (0));
4305# else
4306 cBits = RT_ALIGN_32(cBits, 32);
4307 __asm
4308 {
4309# ifdef RT_ARCH_AMD64
4310 mov rdi, [pvBitmap]
4311 mov rbx, rdi
4312# else
4313 mov edi, [pvBitmap]
4314 mov ebx, edi
4315# endif
4316 mov edx, 0ffffffffh
4317 xor eax, eax
4318 mov ecx, [cBits]
4319 shr ecx, 5
4320 repe scasd
4321 je done
4322# ifdef RT_ARCH_AMD64
4323 lea rdi, [rdi - 4]
4324 mov eax, [rdi]
4325 sub rdi, rbx
4326# else
4327 lea edi, [edi - 4]
4328 mov eax, [edi]
4329 sub edi, ebx
4330# endif
4331 shl edi, 3
4332 bsf edx, eax
4333 add edx, edi
4334 done:
4335 mov [iBit], edx
4336 }
4337# endif
4338 return iBit;
4339 }
4340 return -1;
4341}
4342#endif
4343
4344
4345/**
4346 * Finds the next set bit in a bitmap.
4347 *
4348 * @returns Index of the next set bit.
4349 * @returns -1 if no set bit was found.
4350 * @param pvBitmap Pointer to the bitmap.
4351 * @param cBits The number of bits in the bitmap. Multiple of 32.
4352 * @param iBitPrev The bit returned from the last search.
4353 * The search will start at iBitPrev + 1.
4354 */
4355#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4356DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4357#else
4358DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4359{
4360 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4361 int iBit = ++iBitPrev & 31;
4362 if (iBit)
4363 {
4364 /*
4365 * Inspect the 32-bit word containing the unaligned bit.
4366 */
4367 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
4368
4369# if RT_INLINE_ASM_USES_INTRIN
4370 unsigned long ulBit = 0;
4371 if (_BitScanForward(&ulBit, u32))
4372 return ulBit + iBitPrev;
4373# else
4374# if RT_INLINE_ASM_GNU_STYLE
4375 __asm__ __volatile__("bsf %1, %0\n\t"
4376 "jnz 1f\n\t"
4377 "movl $-1, %0\n\t"
4378 "1:\n\t"
4379 : "=r" (iBit)
4380 : "r" (u32));
4381# else
4382 __asm
4383 {
4384 mov edx, [u32]
4385 bsf eax, edx
4386 jnz done
4387 mov eax, 0ffffffffh
4388 done:
4389 mov [iBit], eax
4390 }
4391# endif
4392 if (iBit >= 0)
4393 return iBit + iBitPrev;
4394# endif
4395
4396 /*
4397 * Skip ahead and see if there is anything left to search.
4398 */
4399 iBitPrev |= 31;
4400 iBitPrev++;
4401 if (cBits <= (uint32_t)iBitPrev)
4402 return -1;
4403 }
4404
4405 /*
4406 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4407 */
4408 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4409 if (iBit >= 0)
4410 iBit += iBitPrev;
4411 return iBit;
4412}
4413#endif
4414
4415
4416/**
4417 * Finds the first bit which is set in the given 32-bit integer.
4418 * Bits are numbered from 1 (least significant) to 32.
4419 *
4420 * @returns index [1..32] of the first set bit.
4421 * @returns 0 if all bits are cleared.
4422 * @param u32 Integer to search for set bits.
4423 * @remark Similar to ffs() in BSD.
4424 */
4425#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4426DECLASM(unsigned) ASMBitFirstSetU32(uint32_t u32);
4427#else
4428DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
4429{
4430# if RT_INLINE_ASM_USES_INTRIN
4431 unsigned long iBit;
4432 if (_BitScanForward(&iBit, u32))
4433 iBit++;
4434 else
4435 iBit = 0;
4436# elif RT_INLINE_ASM_GNU_STYLE
4437 uint32_t iBit;
4438 __asm__ __volatile__("bsf %1, %0\n\t"
4439 "jnz 1f\n\t"
4440 "xorl %0, %0\n\t"
4441 "jmp 2f\n"
4442 "1:\n\t"
4443 "incl %0\n"
4444 "2:\n\t"
4445 : "=r" (iBit)
4446 : "rm" (u32));
4447# else
4448 uint32_t iBit;
4449 _asm
4450 {
4451 bsf eax, [u32]
4452 jnz found
4453 xor eax, eax
4454 jmp done
4455 found:
4456 inc eax
4457 done:
4458 mov [iBit], eax
4459 }
4460# endif
4461 return iBit;
4462}
4463#endif
4464
4465
4466/**
4467 * Finds the first bit which is set in the given 32-bit integer.
4468 * Bits are numbered from 1 (least significant) to 32.
4469 *
4470 * @returns index [1..32] of the first set bit.
4471 * @returns 0 if all bits are cleared.
4472 * @param i32 Integer to search for set bits.
4473 * @remark Similar to ffs() in BSD.
4474 */
4475DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
4476{
4477 return ASMBitFirstSetU32((uint32_t)i32);
4478}
4479
4480
4481/**
4482 * Finds the last bit which is set in the given 32-bit integer.
4483 * Bits are numbered from 1 (least significant) to 32.
4484 *
4485 * @returns index [1..32] of the last set bit.
4486 * @returns 0 if all bits are cleared.
4487 * @param u32 Integer to search for set bits.
4488 * @remark Similar to fls() in BSD.
4489 */
4490#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4491DECLASM(unsigned) ASMBitLastSetU32(uint32_t u32);
4492#else
4493DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
4494{
4495# if RT_INLINE_ASM_USES_INTRIN
4496 unsigned long iBit;
4497 if (_BitScanReverse(&iBit, u32))
4498 iBit++;
4499 else
4500 iBit = 0;
4501# elif RT_INLINE_ASM_GNU_STYLE
4502 uint32_t iBit;
4503 __asm__ __volatile__("bsrl %1, %0\n\t"
4504 "jnz 1f\n\t"
4505 "xorl %0, %0\n\t"
4506 "jmp 2f\n"
4507 "1:\n\t"
4508 "incl %0\n"
4509 "2:\n\t"
4510 : "=r" (iBit)
4511 : "rm" (u32));
4512# else
4513 uint32_t iBit;
4514 _asm
4515 {
4516 bsr eax, [u32]
4517 jnz found
4518 xor eax, eax
4519 jmp done
4520 found:
4521 inc eax
4522 done:
4523 mov [iBit], eax
4524 }
4525# endif
4526 return iBit;
4527}
4528#endif
4529
4530
4531/**
4532 * Finds the last bit which is set in the given 32-bit integer.
4533 * Bits are numbered from 1 (least significant) to 32.
4534 *
4535 * @returns index [1..32] of the last set bit.
4536 * @returns 0 if all bits are cleared.
4537 * @param i32 Integer to search for set bits.
4538 * @remark Similar to fls() in BSD.
4539 */
4540DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
4541{
4542 return ASMBitLastSetU32((uint32_t)i32);
4543}
4544
4545/**
4546 * Reverse the byte order of the given 16-bit integer.
4547 *
4548 * @returns Revert
4549 * @param u16 16-bit integer value.
4550 */
4551#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4552DECLASM(uint16_t) ASMByteSwapU16(uint16_t u16);
4553#else
4554DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
4555{
4556# if RT_INLINE_ASM_USES_INTRIN
4557 u16 = _byteswap_ushort(u16);
4558# elif RT_INLINE_ASM_GNU_STYLE
4559 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
4560# else
4561 _asm
4562 {
4563 mov ax, [u16]
4564 ror ax, 8
4565 mov [u16], ax
4566 }
4567# endif
4568 return u16;
4569}
4570#endif
4571
4572
4573/**
4574 * Reverse the byte order of the given 32-bit integer.
4575 *
4576 * @returns Revert
4577 * @param u32 32-bit integer value.
4578 */
4579#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4580DECLASM(uint32_t) ASMByteSwapU32(uint32_t u32);
4581#else
4582DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
4583{
4584# if RT_INLINE_ASM_USES_INTRIN
4585 u32 = _byteswap_ulong(u32);
4586# elif RT_INLINE_ASM_GNU_STYLE
4587 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
4588# else
4589 _asm
4590 {
4591 mov eax, [u32]
4592 bswap eax
4593 mov [u32], eax
4594 }
4595# endif
4596 return u32;
4597}
4598#endif
4599
4600
4601/**
4602 * Reverse the byte order of the given 64-bit integer.
4603 *
4604 * @returns Revert
4605 * @param u64 64-bit integer value.
4606 */
4607DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
4608{
4609#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
4610 u64 = _byteswap_uint64(u64);
4611#else
4612 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
4613 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
4614#endif
4615 return u64;
4616}
4617
4618
4619/** @} */
4620
4621
4622/** @} */
4623
4624#endif
4625
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette