VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 71608

Last change on this file since 71608 was 71522, checked in by vboxsync, 7 years ago

Fixes for SPARC

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 162.5 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2017 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37/* Solaris 10 header ugliness */
38#ifdef u
39# undef u
40#endif
41
42#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
43# pragma warning(push)
44# pragma warning(disable:4668) /* Several incorrect __cplusplus uses. */
45# pragma warning(disable:4255) /* Incorrect __slwpcb prototype. */
46# include <intrin.h>
47# pragma warning(pop)
48 /* Emit the intrinsics at all optimization levels. */
49# pragma intrinsic(_ReadWriteBarrier)
50# pragma intrinsic(__cpuid)
51# pragma intrinsic(__stosd)
52# pragma intrinsic(__stosw)
53# pragma intrinsic(__stosb)
54# pragma intrinsic(_BitScanForward)
55# pragma intrinsic(_BitScanReverse)
56# pragma intrinsic(_bittest)
57# pragma intrinsic(_bittestandset)
58# pragma intrinsic(_bittestandreset)
59# pragma intrinsic(_bittestandcomplement)
60# pragma intrinsic(_byteswap_ushort)
61# pragma intrinsic(_byteswap_ulong)
62# pragma intrinsic(_interlockedbittestandset)
63# pragma intrinsic(_interlockedbittestandreset)
64# pragma intrinsic(_InterlockedAnd)
65# pragma intrinsic(_InterlockedOr)
66# pragma intrinsic(_InterlockedIncrement)
67# pragma intrinsic(_InterlockedDecrement)
68# pragma intrinsic(_InterlockedExchange)
69# pragma intrinsic(_InterlockedExchangeAdd)
70# pragma intrinsic(_InterlockedCompareExchange)
71# pragma intrinsic(_InterlockedCompareExchange64)
72# pragma intrinsic(_rotl)
73# pragma intrinsic(_rotr)
74# pragma intrinsic(_rotl64)
75# pragma intrinsic(_rotr64)
76# ifdef RT_ARCH_AMD64
77# pragma intrinsic(__stosq)
78# pragma intrinsic(_byteswap_uint64)
79# pragma intrinsic(_InterlockedExchange64)
80# pragma intrinsic(_InterlockedExchangeAdd64)
81# pragma intrinsic(_InterlockedAnd64)
82# pragma intrinsic(_InterlockedOr64)
83# pragma intrinsic(_InterlockedIncrement64)
84# pragma intrinsic(_InterlockedDecrement64)
85# endif
86#endif
87
88/*
89 * Include #pragma aux definitions for Watcom C/C++.
90 */
91#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
92# include "asm-watcom-x86-16.h"
93#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
94# include "asm-watcom-x86-32.h"
95#endif
96
97
98
99/** @defgroup grp_rt_asm ASM - Assembly Routines
100 * @ingroup grp_rt
101 *
102 * @remarks The difference between ordered and unordered atomic operations are that
103 * the former will complete outstanding reads and writes before continuing
104 * while the latter doesn't make any promises about the order. Ordered
105 * operations doesn't, it seems, make any 100% promise wrt to whether
106 * the operation will complete before any subsequent memory access.
107 * (please, correct if wrong.)
108 *
109 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
110 * are unordered (note the Uo).
111 *
112 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
113 * or even optimize assembler instructions away. For instance, in the following code
114 * the second rdmsr instruction is optimized away because gcc treats that instruction
115 * as deterministic:
116 *
117 * @code
118 * static inline uint64_t rdmsr_low(int idx)
119 * {
120 * uint32_t low;
121 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
122 * }
123 * ...
124 * uint32_t msr1 = rdmsr_low(1);
125 * foo(msr1);
126 * msr1 = rdmsr_low(1);
127 * bar(msr1);
128 * @endcode
129 *
130 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
131 * use the result of the first call as input parameter for bar() as well. For rdmsr this
132 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
133 * machine status information in general.
134 *
135 * @{
136 */
137
138
139/** @def RT_INLINE_ASM_GCC_4_3_X_X86
140 * Used to work around some 4.3.x register allocation issues in this version of
141 * the compiler. So far this workaround is still required for 4.4 and 4.5 but
142 * definitely not for 5.x */
143#if (RT_GNUC_PREREQ(4, 3) && !RT_GNUC_PREREQ(5, 0) && defined(__i386__))
144# define RT_INLINE_ASM_GCC_4_3_X_X86 1
145#else
146# define RT_INLINE_ASM_GCC_4_3_X_X86 0
147#endif
148
149/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
150 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
151 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
152 * mode, x86.
153 *
154 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
155 * when in PIC mode on x86.
156 */
157#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
158# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
159# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
160# elif defined(_MSC_VER) /* Visual C++ has trouble too, but it'll only tell us when C4688 is enabled. */
161# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
162# elif ( (defined(PIC) || defined(__PIC__)) \
163 && defined(RT_ARCH_X86) \
164 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
165 || defined(RT_OS_DARWIN)) )
166# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
167# else
168# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
169# endif
170#endif
171
172
173/** @def ASMReturnAddress
174 * Gets the return address of the current (or calling if you like) function or method.
175 */
176#ifdef _MSC_VER
177# ifdef __cplusplus
178extern "C"
179# endif
180void * _ReturnAddress(void);
181# pragma intrinsic(_ReturnAddress)
182# define ASMReturnAddress() _ReturnAddress()
183#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
184# define ASMReturnAddress() __builtin_return_address(0)
185#elif defined(__WATCOMC__)
186# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
187#else
188# error "Unsupported compiler."
189#endif
190
191
192/**
193 * Compiler memory barrier.
194 *
195 * Ensure that the compiler does not use any cached (register/tmp stack) memory
196 * values or any outstanding writes when returning from this function.
197 *
198 * This function must be used if non-volatile data is modified by a
199 * device or the VMM. Typical cases are port access, MMIO access,
200 * trapping instruction, etc.
201 */
202#if RT_INLINE_ASM_GNU_STYLE
203# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
204#elif RT_INLINE_ASM_USES_INTRIN
205# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
206#elif defined(__WATCOMC__)
207void ASMCompilerBarrier(void);
208#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
209DECLINLINE(void) ASMCompilerBarrier(void)
210{
211 __asm
212 {
213 }
214}
215#endif
216
217
218/** @def ASMBreakpoint
219 * Debugger Breakpoint.
220 * @deprecated Use RT_BREAKPOINT instead.
221 * @internal
222 */
223#define ASMBreakpoint() RT_BREAKPOINT()
224
225
226/**
227 * Spinloop hint for platforms that have these, empty function on the other
228 * platforms.
229 *
230 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
231 * spin locks.
232 */
233#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
234DECLASM(void) ASMNopPause(void);
235#else
236DECLINLINE(void) ASMNopPause(void)
237{
238# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
239# if RT_INLINE_ASM_GNU_STYLE
240 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
241# else
242 __asm {
243 _emit 0f3h
244 _emit 090h
245 }
246# endif
247# else
248 /* dummy */
249# endif
250}
251#endif
252
253
254/**
255 * Atomically Exchange an unsigned 8-bit value, ordered.
256 *
257 * @returns Current *pu8 value
258 * @param pu8 Pointer to the 8-bit variable to update.
259 * @param u8 The 8-bit value to assign to *pu8.
260 */
261#if RT_INLINE_ASM_EXTERNAL
262DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8);
263#else
264DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8)
265{
266# if RT_INLINE_ASM_GNU_STYLE
267 __asm__ __volatile__("xchgb %0, %1\n\t"
268 : "=m" (*pu8),
269 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
270 : "1" (u8),
271 "m" (*pu8));
272# else
273 __asm
274 {
275# ifdef RT_ARCH_AMD64
276 mov rdx, [pu8]
277 mov al, [u8]
278 xchg [rdx], al
279 mov [u8], al
280# else
281 mov edx, [pu8]
282 mov al, [u8]
283 xchg [edx], al
284 mov [u8], al
285# endif
286 }
287# endif
288 return u8;
289}
290#endif
291
292
293/**
294 * Atomically Exchange a signed 8-bit value, ordered.
295 *
296 * @returns Current *pu8 value
297 * @param pi8 Pointer to the 8-bit variable to update.
298 * @param i8 The 8-bit value to assign to *pi8.
299 */
300DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t RT_FAR *pi8, int8_t i8)
301{
302 return (int8_t)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8);
303}
304
305
306/**
307 * Atomically Exchange a bool value, ordered.
308 *
309 * @returns Current *pf value
310 * @param pf Pointer to the 8-bit variable to update.
311 * @param f The 8-bit value to assign to *pi8.
312 */
313DECLINLINE(bool) ASMAtomicXchgBool(volatile bool RT_FAR *pf, bool f)
314{
315#ifdef _MSC_VER
316 return !!ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
317#else
318 return (bool)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
319#endif
320}
321
322
323/**
324 * Atomically Exchange an unsigned 16-bit value, ordered.
325 *
326 * @returns Current *pu16 value
327 * @param pu16 Pointer to the 16-bit variable to update.
328 * @param u16 The 16-bit value to assign to *pu16.
329 */
330#if RT_INLINE_ASM_EXTERNAL
331DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16);
332#else
333DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16)
334{
335# if RT_INLINE_ASM_GNU_STYLE
336 __asm__ __volatile__("xchgw %0, %1\n\t"
337 : "=m" (*pu16),
338 "=r" (u16)
339 : "1" (u16),
340 "m" (*pu16));
341# else
342 __asm
343 {
344# ifdef RT_ARCH_AMD64
345 mov rdx, [pu16]
346 mov ax, [u16]
347 xchg [rdx], ax
348 mov [u16], ax
349# else
350 mov edx, [pu16]
351 mov ax, [u16]
352 xchg [edx], ax
353 mov [u16], ax
354# endif
355 }
356# endif
357 return u16;
358}
359#endif
360
361
362/**
363 * Atomically Exchange a signed 16-bit value, ordered.
364 *
365 * @returns Current *pu16 value
366 * @param pi16 Pointer to the 16-bit variable to update.
367 * @param i16 The 16-bit value to assign to *pi16.
368 */
369DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t RT_FAR *pi16, int16_t i16)
370{
371 return (int16_t)ASMAtomicXchgU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16);
372}
373
374
375/**
376 * Atomically Exchange an unsigned 32-bit value, ordered.
377 *
378 * @returns Current *pu32 value
379 * @param pu32 Pointer to the 32-bit variable to update.
380 * @param u32 The 32-bit value to assign to *pu32.
381 *
382 * @remarks Does not work on 286 and earlier.
383 */
384#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
385DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32);
386#else
387DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32)
388{
389# if RT_INLINE_ASM_GNU_STYLE
390 __asm__ __volatile__("xchgl %0, %1\n\t"
391 : "=m" (*pu32),
392 "=r" (u32)
393 : "1" (u32),
394 "m" (*pu32));
395
396# elif RT_INLINE_ASM_USES_INTRIN
397 u32 = _InterlockedExchange((long RT_FAR *)pu32, u32);
398
399# else
400 __asm
401 {
402# ifdef RT_ARCH_AMD64
403 mov rdx, [pu32]
404 mov eax, u32
405 xchg [rdx], eax
406 mov [u32], eax
407# else
408 mov edx, [pu32]
409 mov eax, u32
410 xchg [edx], eax
411 mov [u32], eax
412# endif
413 }
414# endif
415 return u32;
416}
417#endif
418
419
420/**
421 * Atomically Exchange a signed 32-bit value, ordered.
422 *
423 * @returns Current *pu32 value
424 * @param pi32 Pointer to the 32-bit variable to update.
425 * @param i32 The 32-bit value to assign to *pi32.
426 */
427DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t RT_FAR *pi32, int32_t i32)
428{
429 return (int32_t)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32);
430}
431
432
433/**
434 * Atomically Exchange an unsigned 64-bit value, ordered.
435 *
436 * @returns Current *pu64 value
437 * @param pu64 Pointer to the 64-bit variable to update.
438 * @param u64 The 64-bit value to assign to *pu64.
439 *
440 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
441 */
442#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
443 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
444DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64);
445#else
446DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64)
447{
448# if defined(RT_ARCH_AMD64)
449# if RT_INLINE_ASM_USES_INTRIN
450 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
451
452# elif RT_INLINE_ASM_GNU_STYLE
453 __asm__ __volatile__("xchgq %0, %1\n\t"
454 : "=m" (*pu64),
455 "=r" (u64)
456 : "1" (u64),
457 "m" (*pu64));
458# else
459 __asm
460 {
461 mov rdx, [pu64]
462 mov rax, [u64]
463 xchg [rdx], rax
464 mov [u64], rax
465 }
466# endif
467# else /* !RT_ARCH_AMD64 */
468# if RT_INLINE_ASM_GNU_STYLE
469# if defined(PIC) || defined(__PIC__)
470 uint32_t u32EBX = (uint32_t)u64;
471 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
472 "xchgl %%ebx, %3\n\t"
473 "1:\n\t"
474 "lock; cmpxchg8b (%5)\n\t"
475 "jnz 1b\n\t"
476 "movl %3, %%ebx\n\t"
477 /*"xchgl %%esi, %5\n\t"*/
478 : "=A" (u64),
479 "=m" (*pu64)
480 : "0" (*pu64),
481 "m" ( u32EBX ),
482 "c" ( (uint32_t)(u64 >> 32) ),
483 "S" (pu64));
484# else /* !PIC */
485 __asm__ __volatile__("1:\n\t"
486 "lock; cmpxchg8b %1\n\t"
487 "jnz 1b\n\t"
488 : "=A" (u64),
489 "=m" (*pu64)
490 : "0" (*pu64),
491 "b" ( (uint32_t)u64 ),
492 "c" ( (uint32_t)(u64 >> 32) ));
493# endif
494# else
495 __asm
496 {
497 mov ebx, dword ptr [u64]
498 mov ecx, dword ptr [u64 + 4]
499 mov edi, pu64
500 mov eax, dword ptr [edi]
501 mov edx, dword ptr [edi + 4]
502 retry:
503 lock cmpxchg8b [edi]
504 jnz retry
505 mov dword ptr [u64], eax
506 mov dword ptr [u64 + 4], edx
507 }
508# endif
509# endif /* !RT_ARCH_AMD64 */
510 return u64;
511}
512#endif
513
514
515/**
516 * Atomically Exchange an signed 64-bit value, ordered.
517 *
518 * @returns Current *pi64 value
519 * @param pi64 Pointer to the 64-bit variable to update.
520 * @param i64 The 64-bit value to assign to *pi64.
521 */
522DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t RT_FAR *pi64, int64_t i64)
523{
524 return (int64_t)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64);
525}
526
527
528/**
529 * Atomically Exchange a pointer value, ordered.
530 *
531 * @returns Current *ppv value
532 * @param ppv Pointer to the pointer variable to update.
533 * @param pv The pointer value to assign to *ppv.
534 */
535DECLINLINE(void RT_FAR *) ASMAtomicXchgPtr(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pv)
536{
537#if ARCH_BITS == 32 || ARCH_BITS == 16
538 return (void RT_FAR *)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
539#elif ARCH_BITS == 64
540 return (void RT_FAR *)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
541#else
542# error "ARCH_BITS is bogus"
543#endif
544}
545
546
547/**
548 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
549 *
550 * @returns Current *pv value
551 * @param ppv Pointer to the pointer variable to update.
552 * @param pv The pointer value to assign to *ppv.
553 * @param Type The type of *ppv, sans volatile.
554 */
555#ifdef __GNUC__
556# define ASMAtomicXchgPtrT(ppv, pv, Type) \
557 __extension__ \
558 ({\
559 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
560 Type const pvTypeChecked = (pv); \
561 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
562 pvTypeCheckedRet; \
563 })
564#else
565# define ASMAtomicXchgPtrT(ppv, pv, Type) \
566 (Type)ASMAtomicXchgPtr((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv))
567#endif
568
569
570/**
571 * Atomically Exchange a raw-mode context pointer value, ordered.
572 *
573 * @returns Current *ppv value
574 * @param ppvRC Pointer to the pointer variable to update.
575 * @param pvRC The pointer value to assign to *ppv.
576 */
577DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile RT_FAR *ppvRC, RTRCPTR pvRC)
578{
579 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(void RT_FAR *)ppvRC, (uint32_t)pvRC);
580}
581
582
583/**
584 * Atomically Exchange a ring-0 pointer value, ordered.
585 *
586 * @returns Current *ppv value
587 * @param ppvR0 Pointer to the pointer variable to update.
588 * @param pvR0 The pointer value to assign to *ppv.
589 */
590DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile RT_FAR *ppvR0, RTR0PTR pvR0)
591{
592#if R0_ARCH_BITS == 32 || ARCH_BITS == 16
593 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR0, (uint32_t)pvR0);
594#elif R0_ARCH_BITS == 64
595 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR0, (uint64_t)pvR0);
596#else
597# error "R0_ARCH_BITS is bogus"
598#endif
599}
600
601
602/**
603 * Atomically Exchange a ring-3 pointer value, ordered.
604 *
605 * @returns Current *ppv value
606 * @param ppvR3 Pointer to the pointer variable to update.
607 * @param pvR3 The pointer value to assign to *ppv.
608 */
609DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile RT_FAR *ppvR3, RTR3PTR pvR3)
610{
611#if R3_ARCH_BITS == 32 || ARCH_BITS == 16
612 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR3, (uint32_t)pvR3);
613#elif R3_ARCH_BITS == 64
614 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR3, (uint64_t)pvR3);
615#else
616# error "R3_ARCH_BITS is bogus"
617#endif
618}
619
620
621/** @def ASMAtomicXchgHandle
622 * Atomically Exchange a typical IPRT handle value, ordered.
623 *
624 * @param ph Pointer to the value to update.
625 * @param hNew The new value to assigned to *pu.
626 * @param phRes Where to store the current *ph value.
627 *
628 * @remarks This doesn't currently work for all handles (like RTFILE).
629 */
630#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
631# define ASMAtomicXchgHandle(ph, hNew, phRes) \
632 do { \
633 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
634 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
635 *(uint32_t RT_FAR *)(phRes) = ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
636 } while (0)
637#elif HC_ARCH_BITS == 64
638# define ASMAtomicXchgHandle(ph, hNew, phRes) \
639 do { \
640 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
641 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
642 *(uint64_t RT_FAR *)(phRes) = ASMAtomicXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
643 } while (0)
644#else
645# error HC_ARCH_BITS
646#endif
647
648
649/**
650 * Atomically Exchange a value which size might differ
651 * between platforms or compilers, ordered.
652 *
653 * @param pu Pointer to the variable to update.
654 * @param uNew The value to assign to *pu.
655 * @todo This is busted as its missing the result argument.
656 */
657#define ASMAtomicXchgSize(pu, uNew) \
658 do { \
659 switch (sizeof(*(pu))) { \
660 case 1: ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
661 case 2: ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
662 case 4: ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
663 case 8: ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
664 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
665 } \
666 } while (0)
667
668/**
669 * Atomically Exchange a value which size might differ
670 * between platforms or compilers, ordered.
671 *
672 * @param pu Pointer to the variable to update.
673 * @param uNew The value to assign to *pu.
674 * @param puRes Where to store the current *pu value.
675 */
676#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
677 do { \
678 switch (sizeof(*(pu))) { \
679 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
680 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
681 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
682 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
683 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
684 } \
685 } while (0)
686
687
688
689/**
690 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
691 *
692 * @returns true if xchg was done.
693 * @returns false if xchg wasn't done.
694 *
695 * @param pu8 Pointer to the value to update.
696 * @param u8New The new value to assigned to *pu8.
697 * @param u8Old The old value to *pu8 compare with.
698 *
699 * @remarks x86: Requires a 486 or later.
700 */
701#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
702DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old);
703#else
704DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, uint8_t u8Old)
705{
706 uint8_t u8Ret;
707 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
708 "setz %1\n\t"
709 : "=m" (*pu8),
710 "=qm" (u8Ret),
711 "=a" (u8Old)
712 : "q" (u8New),
713 "2" (u8Old),
714 "m" (*pu8));
715 return (bool)u8Ret;
716}
717#endif
718
719
720/**
721 * Atomically Compare and Exchange a signed 8-bit value, ordered.
722 *
723 * @returns true if xchg was done.
724 * @returns false if xchg wasn't done.
725 *
726 * @param pi8 Pointer to the value to update.
727 * @param i8New The new value to assigned to *pi8.
728 * @param i8Old The old value to *pi8 compare with.
729 *
730 * @remarks x86: Requires a 486 or later.
731 */
732DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old)
733{
734 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old);
735}
736
737
738/**
739 * Atomically Compare and Exchange a bool value, ordered.
740 *
741 * @returns true if xchg was done.
742 * @returns false if xchg wasn't done.
743 *
744 * @param pf Pointer to the value to update.
745 * @param fNew The new value to assigned to *pf.
746 * @param fOld The old value to *pf compare with.
747 *
748 * @remarks x86: Requires a 486 or later.
749 */
750DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool RT_FAR *pf, const bool fNew, const bool fOld)
751{
752 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pf, (const uint8_t)fNew, (const uint8_t)fOld);
753}
754
755
756/**
757 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
758 *
759 * @returns true if xchg was done.
760 * @returns false if xchg wasn't done.
761 *
762 * @param pu32 Pointer to the value to update.
763 * @param u32New The new value to assigned to *pu32.
764 * @param u32Old The old value to *pu32 compare with.
765 *
766 * @remarks x86: Requires a 486 or later.
767 */
768#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
769DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old);
770#else
771DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, uint32_t u32Old)
772{
773# if RT_INLINE_ASM_GNU_STYLE
774 uint8_t u8Ret;
775 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
776 "setz %1\n\t"
777 : "=m" (*pu32),
778 "=qm" (u8Ret),
779 "=a" (u32Old)
780 : "r" (u32New),
781 "2" (u32Old),
782 "m" (*pu32));
783 return (bool)u8Ret;
784
785# elif RT_INLINE_ASM_USES_INTRIN
786 return (uint32_t)_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old) == u32Old;
787
788# else
789 uint32_t u32Ret;
790 __asm
791 {
792# ifdef RT_ARCH_AMD64
793 mov rdx, [pu32]
794# else
795 mov edx, [pu32]
796# endif
797 mov eax, [u32Old]
798 mov ecx, [u32New]
799# ifdef RT_ARCH_AMD64
800 lock cmpxchg [rdx], ecx
801# else
802 lock cmpxchg [edx], ecx
803# endif
804 setz al
805 movzx eax, al
806 mov [u32Ret], eax
807 }
808 return !!u32Ret;
809# endif
810}
811#endif
812
813
814/**
815 * Atomically Compare and Exchange a signed 32-bit value, ordered.
816 *
817 * @returns true if xchg was done.
818 * @returns false if xchg wasn't done.
819 *
820 * @param pi32 Pointer to the value to update.
821 * @param i32New The new value to assigned to *pi32.
822 * @param i32Old The old value to *pi32 compare with.
823 *
824 * @remarks x86: Requires a 486 or later.
825 */
826DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old)
827{
828 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
829}
830
831
832/**
833 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
834 *
835 * @returns true if xchg was done.
836 * @returns false if xchg wasn't done.
837 *
838 * @param pu64 Pointer to the 64-bit variable to update.
839 * @param u64New The 64-bit value to assign to *pu64.
840 * @param u64Old The value to compare with.
841 *
842 * @remarks x86: Requires a Pentium or later.
843 */
844#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
845 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
846DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old);
847#else
848DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64New, uint64_t u64Old)
849{
850# if RT_INLINE_ASM_USES_INTRIN
851 return (uint64_t)_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old) == u64Old;
852
853# elif defined(RT_ARCH_AMD64)
854# if RT_INLINE_ASM_GNU_STYLE
855 uint8_t u8Ret;
856 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
857 "setz %1\n\t"
858 : "=m" (*pu64),
859 "=qm" (u8Ret),
860 "=a" (u64Old)
861 : "r" (u64New),
862 "2" (u64Old),
863 "m" (*pu64));
864 return (bool)u8Ret;
865# else
866 bool fRet;
867 __asm
868 {
869 mov rdx, [pu32]
870 mov rax, [u64Old]
871 mov rcx, [u64New]
872 lock cmpxchg [rdx], rcx
873 setz al
874 mov [fRet], al
875 }
876 return fRet;
877# endif
878# else /* !RT_ARCH_AMD64 */
879 uint32_t u32Ret;
880# if RT_INLINE_ASM_GNU_STYLE
881# if defined(PIC) || defined(__PIC__)
882 uint32_t u32EBX = (uint32_t)u64New;
883 uint32_t u32Spill;
884 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
885 "lock; cmpxchg8b (%6)\n\t"
886 "setz %%al\n\t"
887 "movl %4, %%ebx\n\t"
888 "movzbl %%al, %%eax\n\t"
889 : "=a" (u32Ret),
890 "=d" (u32Spill),
891# if RT_GNUC_PREREQ(4, 3)
892 "+m" (*pu64)
893# else
894 "=m" (*pu64)
895# endif
896 : "A" (u64Old),
897 "m" ( u32EBX ),
898 "c" ( (uint32_t)(u64New >> 32) ),
899 "S" (pu64));
900# else /* !PIC */
901 uint32_t u32Spill;
902 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
903 "setz %%al\n\t"
904 "movzbl %%al, %%eax\n\t"
905 : "=a" (u32Ret),
906 "=d" (u32Spill),
907 "+m" (*pu64)
908 : "A" (u64Old),
909 "b" ( (uint32_t)u64New ),
910 "c" ( (uint32_t)(u64New >> 32) ));
911# endif
912 return (bool)u32Ret;
913# else
914 __asm
915 {
916 mov ebx, dword ptr [u64New]
917 mov ecx, dword ptr [u64New + 4]
918 mov edi, [pu64]
919 mov eax, dword ptr [u64Old]
920 mov edx, dword ptr [u64Old + 4]
921 lock cmpxchg8b [edi]
922 setz al
923 movzx eax, al
924 mov dword ptr [u32Ret], eax
925 }
926 return !!u32Ret;
927# endif
928# endif /* !RT_ARCH_AMD64 */
929}
930#endif
931
932
933/**
934 * Atomically Compare and exchange a signed 64-bit value, ordered.
935 *
936 * @returns true if xchg was done.
937 * @returns false if xchg wasn't done.
938 *
939 * @param pi64 Pointer to the 64-bit variable to update.
940 * @param i64 The 64-bit value to assign to *pu64.
941 * @param i64Old The value to compare with.
942 *
943 * @remarks x86: Requires a Pentium or later.
944 */
945DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old)
946{
947 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old);
948}
949
950
951/**
952 * Atomically Compare and Exchange a pointer value, ordered.
953 *
954 * @returns true if xchg was done.
955 * @returns false if xchg wasn't done.
956 *
957 * @param ppv Pointer to the value to update.
958 * @param pvNew The new value to assigned to *ppv.
959 * @param pvOld The old value to *ppv compare with.
960 *
961 * @remarks x86: Requires a 486 or later.
962 */
963DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld)
964{
965#if ARCH_BITS == 32 || ARCH_BITS == 16
966 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
967#elif ARCH_BITS == 64
968 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
969#else
970# error "ARCH_BITS is bogus"
971#endif
972}
973
974
975/**
976 * Atomically Compare and Exchange a pointer value, ordered.
977 *
978 * @returns true if xchg was done.
979 * @returns false if xchg wasn't done.
980 *
981 * @param ppv Pointer to the value to update.
982 * @param pvNew The new value to assigned to *ppv.
983 * @param pvOld The old value to *ppv compare with.
984 *
985 * @remarks This is relatively type safe on GCC platforms.
986 * @remarks x86: Requires a 486 or later.
987 */
988#ifdef __GNUC__
989# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
990 __extension__ \
991 ({\
992 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
993 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
994 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
995 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
996 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
997 fMacroRet; \
998 })
999#else
1000# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1001 ASMAtomicCmpXchgPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld))
1002#endif
1003
1004
1005/** @def ASMAtomicCmpXchgHandle
1006 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1007 *
1008 * @param ph Pointer to the value to update.
1009 * @param hNew The new value to assigned to *pu.
1010 * @param hOld The old value to *pu compare with.
1011 * @param fRc Where to store the result.
1012 *
1013 * @remarks This doesn't currently work for all handles (like RTFILE).
1014 * @remarks x86: Requires a 486 or later.
1015 */
1016#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1017# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1018 do { \
1019 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1020 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1021 } while (0)
1022#elif HC_ARCH_BITS == 64
1023# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1024 do { \
1025 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1026 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1027 } while (0)
1028#else
1029# error HC_ARCH_BITS
1030#endif
1031
1032
1033/** @def ASMAtomicCmpXchgSize
1034 * Atomically Compare and Exchange a value which size might differ
1035 * between platforms or compilers, ordered.
1036 *
1037 * @param pu Pointer to the value to update.
1038 * @param uNew The new value to assigned to *pu.
1039 * @param uOld The old value to *pu compare with.
1040 * @param fRc Where to store the result.
1041 *
1042 * @remarks x86: Requires a 486 or later.
1043 */
1044#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1045 do { \
1046 switch (sizeof(*(pu))) { \
1047 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1048 break; \
1049 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1050 break; \
1051 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1052 (fRc) = false; \
1053 break; \
1054 } \
1055 } while (0)
1056
1057
1058/**
1059 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1060 * passes back old value, ordered.
1061 *
1062 * @returns true if xchg was done.
1063 * @returns false if xchg wasn't done.
1064 *
1065 * @param pu32 Pointer to the value to update.
1066 * @param u32New The new value to assigned to *pu32.
1067 * @param u32Old The old value to *pu32 compare with.
1068 * @param pu32Old Pointer store the old value at.
1069 *
1070 * @remarks x86: Requires a 486 or later.
1071 */
1072#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1073DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old);
1074#else
1075DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old)
1076{
1077# if RT_INLINE_ASM_GNU_STYLE
1078 uint8_t u8Ret;
1079 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1080 "setz %1\n\t"
1081 : "=m" (*pu32),
1082 "=qm" (u8Ret),
1083 "=a" (*pu32Old)
1084 : "r" (u32New),
1085 "a" (u32Old),
1086 "m" (*pu32));
1087 return (bool)u8Ret;
1088
1089# elif RT_INLINE_ASM_USES_INTRIN
1090 return (*pu32Old =_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old)) == u32Old;
1091
1092# else
1093 uint32_t u32Ret;
1094 __asm
1095 {
1096# ifdef RT_ARCH_AMD64
1097 mov rdx, [pu32]
1098# else
1099 mov edx, [pu32]
1100# endif
1101 mov eax, [u32Old]
1102 mov ecx, [u32New]
1103# ifdef RT_ARCH_AMD64
1104 lock cmpxchg [rdx], ecx
1105 mov rdx, [pu32Old]
1106 mov [rdx], eax
1107# else
1108 lock cmpxchg [edx], ecx
1109 mov edx, [pu32Old]
1110 mov [edx], eax
1111# endif
1112 setz al
1113 movzx eax, al
1114 mov [u32Ret], eax
1115 }
1116 return !!u32Ret;
1117# endif
1118}
1119#endif
1120
1121
1122/**
1123 * Atomically Compare and Exchange a signed 32-bit value, additionally
1124 * passes back old value, ordered.
1125 *
1126 * @returns true if xchg was done.
1127 * @returns false if xchg wasn't done.
1128 *
1129 * @param pi32 Pointer to the value to update.
1130 * @param i32New The new value to assigned to *pi32.
1131 * @param i32Old The old value to *pi32 compare with.
1132 * @param pi32Old Pointer store the old value at.
1133 *
1134 * @remarks x86: Requires a 486 or later.
1135 */
1136DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old, int32_t RT_FAR *pi32Old)
1137{
1138 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t RT_FAR *)pi32Old);
1139}
1140
1141
1142/**
1143 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1144 * passing back old value, ordered.
1145 *
1146 * @returns true if xchg was done.
1147 * @returns false if xchg wasn't done.
1148 *
1149 * @param pu64 Pointer to the 64-bit variable to update.
1150 * @param u64New The 64-bit value to assign to *pu64.
1151 * @param u64Old The value to compare with.
1152 * @param pu64Old Pointer store the old value at.
1153 *
1154 * @remarks x86: Requires a Pentium or later.
1155 */
1156#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1157 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1158DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old);
1159#else
1160DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old)
1161{
1162# if RT_INLINE_ASM_USES_INTRIN
1163 return (*pu64Old =_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old)) == u64Old;
1164
1165# elif defined(RT_ARCH_AMD64)
1166# if RT_INLINE_ASM_GNU_STYLE
1167 uint8_t u8Ret;
1168 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1169 "setz %1\n\t"
1170 : "=m" (*pu64),
1171 "=qm" (u8Ret),
1172 "=a" (*pu64Old)
1173 : "r" (u64New),
1174 "a" (u64Old),
1175 "m" (*pu64));
1176 return (bool)u8Ret;
1177# else
1178 bool fRet;
1179 __asm
1180 {
1181 mov rdx, [pu32]
1182 mov rax, [u64Old]
1183 mov rcx, [u64New]
1184 lock cmpxchg [rdx], rcx
1185 mov rdx, [pu64Old]
1186 mov [rdx], rax
1187 setz al
1188 mov [fRet], al
1189 }
1190 return fRet;
1191# endif
1192# else /* !RT_ARCH_AMD64 */
1193# if RT_INLINE_ASM_GNU_STYLE
1194 uint64_t u64Ret;
1195# if defined(PIC) || defined(__PIC__)
1196 /* NB: this code uses a memory clobber description, because the clean
1197 * solution with an output value for *pu64 makes gcc run out of registers.
1198 * This will cause suboptimal code, and anyone with a better solution is
1199 * welcome to improve this. */
1200 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1201 "lock; cmpxchg8b %3\n\t"
1202 "xchgl %%ebx, %1\n\t"
1203 : "=A" (u64Ret)
1204 : "DS" ((uint32_t)u64New),
1205 "c" ((uint32_t)(u64New >> 32)),
1206 "m" (*pu64),
1207 "0" (u64Old)
1208 : "memory" );
1209# else /* !PIC */
1210 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1211 : "=A" (u64Ret),
1212 "=m" (*pu64)
1213 : "b" ((uint32_t)u64New),
1214 "c" ((uint32_t)(u64New >> 32)),
1215 "m" (*pu64),
1216 "0" (u64Old));
1217# endif
1218 *pu64Old = u64Ret;
1219 return u64Ret == u64Old;
1220# else
1221 uint32_t u32Ret;
1222 __asm
1223 {
1224 mov ebx, dword ptr [u64New]
1225 mov ecx, dword ptr [u64New + 4]
1226 mov edi, [pu64]
1227 mov eax, dword ptr [u64Old]
1228 mov edx, dword ptr [u64Old + 4]
1229 lock cmpxchg8b [edi]
1230 mov ebx, [pu64Old]
1231 mov [ebx], eax
1232 setz al
1233 movzx eax, al
1234 add ebx, 4
1235 mov [ebx], edx
1236 mov dword ptr [u32Ret], eax
1237 }
1238 return !!u32Ret;
1239# endif
1240# endif /* !RT_ARCH_AMD64 */
1241}
1242#endif
1243
1244
1245/**
1246 * Atomically Compare and exchange a signed 64-bit value, additionally
1247 * passing back old value, ordered.
1248 *
1249 * @returns true if xchg was done.
1250 * @returns false if xchg wasn't done.
1251 *
1252 * @param pi64 Pointer to the 64-bit variable to update.
1253 * @param i64 The 64-bit value to assign to *pu64.
1254 * @param i64Old The value to compare with.
1255 * @param pi64Old Pointer store the old value at.
1256 *
1257 * @remarks x86: Requires a Pentium or later.
1258 */
1259DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old, int64_t RT_FAR *pi64Old)
1260{
1261 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t RT_FAR *)pi64Old);
1262}
1263
1264/** @def ASMAtomicCmpXchgExHandle
1265 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1266 *
1267 * @param ph Pointer to the value to update.
1268 * @param hNew The new value to assigned to *pu.
1269 * @param hOld The old value to *pu compare with.
1270 * @param fRc Where to store the result.
1271 * @param phOldVal Pointer to where to store the old value.
1272 *
1273 * @remarks This doesn't currently work for all handles (like RTFILE).
1274 */
1275#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1276# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1277 do { \
1278 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1279 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1280 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(puOldVal)); \
1281 } while (0)
1282#elif HC_ARCH_BITS == 64
1283# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1284 do { \
1285 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1286 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1287 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(puOldVal)); \
1288 } while (0)
1289#else
1290# error HC_ARCH_BITS
1291#endif
1292
1293
1294/** @def ASMAtomicCmpXchgExSize
1295 * Atomically Compare and Exchange a value which size might differ
1296 * between platforms or compilers. Additionally passes back old value.
1297 *
1298 * @param pu Pointer to the value to update.
1299 * @param uNew The new value to assigned to *pu.
1300 * @param uOld The old value to *pu compare with.
1301 * @param fRc Where to store the result.
1302 * @param puOldVal Pointer to where to store the old value.
1303 *
1304 * @remarks x86: Requires a 486 or later.
1305 */
1306#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1307 do { \
1308 switch (sizeof(*(pu))) { \
1309 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(uOldVal)); \
1310 break; \
1311 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(uOldVal)); \
1312 break; \
1313 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1314 (fRc) = false; \
1315 (uOldVal) = 0; \
1316 break; \
1317 } \
1318 } while (0)
1319
1320
1321/**
1322 * Atomically Compare and Exchange a pointer value, additionally
1323 * passing back old value, ordered.
1324 *
1325 * @returns true if xchg was done.
1326 * @returns false if xchg wasn't done.
1327 *
1328 * @param ppv Pointer to the value to update.
1329 * @param pvNew The new value to assigned to *ppv.
1330 * @param pvOld The old value to *ppv compare with.
1331 * @param ppvOld Pointer store the old value at.
1332 *
1333 * @remarks x86: Requires a 486 or later.
1334 */
1335DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld,
1336 void RT_FAR * RT_FAR *ppvOld)
1337{
1338#if ARCH_BITS == 32 || ARCH_BITS == 16
1339 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t RT_FAR *)ppvOld);
1340#elif ARCH_BITS == 64
1341 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t RT_FAR *)ppvOld);
1342#else
1343# error "ARCH_BITS is bogus"
1344#endif
1345}
1346
1347
1348/**
1349 * Atomically Compare and Exchange a pointer value, additionally
1350 * passing back old value, ordered.
1351 *
1352 * @returns true if xchg was done.
1353 * @returns false if xchg wasn't done.
1354 *
1355 * @param ppv Pointer to the value to update.
1356 * @param pvNew The new value to assigned to *ppv.
1357 * @param pvOld The old value to *ppv compare with.
1358 * @param ppvOld Pointer store the old value at.
1359 *
1360 * @remarks This is relatively type safe on GCC platforms.
1361 * @remarks x86: Requires a 486 or later.
1362 */
1363#ifdef __GNUC__
1364# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1365 __extension__ \
1366 ({\
1367 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1368 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1369 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1370 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1371 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1372 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1373 (void **)ppvOldTypeChecked); \
1374 fMacroRet; \
1375 })
1376#else
1377# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1378 ASMAtomicCmpXchgExPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld), (void RT_FAR * RT_FAR *)(ppvOld))
1379#endif
1380
1381
1382/**
1383 * Virtualization unfriendly serializing instruction, always exits.
1384 */
1385#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1386DECLASM(void) ASMSerializeInstructionCpuId(void);
1387#else
1388DECLINLINE(void) ASMSerializeInstructionCpuId(void)
1389{
1390# if RT_INLINE_ASM_GNU_STYLE
1391 RTCCUINTREG xAX = 0;
1392# ifdef RT_ARCH_AMD64
1393 __asm__ __volatile__ ("cpuid"
1394 : "=a" (xAX)
1395 : "0" (xAX)
1396 : "rbx", "rcx", "rdx", "memory");
1397# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1398 __asm__ __volatile__ ("push %%ebx\n\t"
1399 "cpuid\n\t"
1400 "pop %%ebx\n\t"
1401 : "=a" (xAX)
1402 : "0" (xAX)
1403 : "ecx", "edx", "memory");
1404# else
1405 __asm__ __volatile__ ("cpuid"
1406 : "=a" (xAX)
1407 : "0" (xAX)
1408 : "ebx", "ecx", "edx", "memory");
1409# endif
1410
1411# elif RT_INLINE_ASM_USES_INTRIN
1412 int aInfo[4];
1413 _ReadWriteBarrier();
1414 __cpuid(aInfo, 0);
1415
1416# else
1417 __asm
1418 {
1419 push ebx
1420 xor eax, eax
1421 cpuid
1422 pop ebx
1423 }
1424# endif
1425}
1426#endif
1427
1428/**
1429 * Virtualization friendly serializing instruction, though more expensive.
1430 */
1431#if RT_INLINE_ASM_EXTERNAL
1432DECLASM(void) ASMSerializeInstructionIRet(void);
1433#else
1434DECLINLINE(void) ASMSerializeInstructionIRet(void)
1435{
1436# if RT_INLINE_ASM_GNU_STYLE
1437# ifdef RT_ARCH_AMD64
1438 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
1439 "subq $128, %%rsp\n\t" /*redzone*/
1440 "mov %%ss, %%eax\n\t"
1441 "pushq %%rax\n\t"
1442 "pushq %%r10\n\t"
1443 "pushfq\n\t"
1444 "movl %%cs, %%eax\n\t"
1445 "pushq %%rax\n\t"
1446 "leaq 1f(%%rip), %%rax\n\t"
1447 "pushq %%rax\n\t"
1448 "iretq\n\t"
1449 "1:\n\t"
1450 ::: "rax", "r10", "memory");
1451# else
1452 __asm__ __volatile__ ("pushfl\n\t"
1453 "pushl %%cs\n\t"
1454 "pushl $1f\n\t"
1455 "iretl\n\t"
1456 "1:\n\t"
1457 ::: "memory");
1458# endif
1459
1460# else
1461 __asm
1462 {
1463 pushfd
1464 push cs
1465 push la_ret
1466 iretd
1467 la_ret:
1468 }
1469# endif
1470}
1471#endif
1472
1473/**
1474 * Virtualization friendlier serializing instruction, may still cause exits.
1475 */
1476#if RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < 15
1477DECLASM(void) ASMSerializeInstructionRdTscp(void);
1478#else
1479DECLINLINE(void) ASMSerializeInstructionRdTscp(void)
1480{
1481# if RT_INLINE_ASM_GNU_STYLE
1482 /* rdtscp is not supported by ancient linux build VM of course :-( */
1483# ifdef RT_ARCH_AMD64
1484 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
1485 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
1486# else
1487 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
1488 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
1489# endif
1490# else
1491# if RT_INLINE_ASM_USES_INTRIN >= 15
1492 uint32_t uIgnore;
1493 _ReadWriteBarrier();
1494 (void)__rdtscp(&uIgnore);
1495 (void)uIgnore;
1496# else
1497 __asm
1498 {
1499 rdtscp
1500 }
1501# endif
1502# endif
1503}
1504#endif
1505
1506
1507/**
1508 * Serialize Instruction.
1509 */
1510#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
1511# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
1512#elif defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
1513# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
1514#elif defined(RT_ARCH_SPARC64)
1515RTDECL(void) ASMSerializeInstruction(void);
1516#else
1517# error "Port me"
1518#endif
1519
1520
1521/**
1522 * Memory fence, waits for any pending writes and reads to complete.
1523 */
1524DECLINLINE(void) ASMMemoryFence(void)
1525{
1526 /** @todo use mfence? check if all cpus we care for support it. */
1527#if ARCH_BITS == 16
1528 uint16_t volatile u16;
1529 ASMAtomicXchgU16(&u16, 0);
1530#else
1531 uint32_t volatile u32;
1532 ASMAtomicXchgU32(&u32, 0);
1533#endif
1534}
1535
1536
1537/**
1538 * Write fence, waits for any pending writes to complete.
1539 */
1540DECLINLINE(void) ASMWriteFence(void)
1541{
1542 /** @todo use sfence? check if all cpus we care for support it. */
1543 ASMMemoryFence();
1544}
1545
1546
1547/**
1548 * Read fence, waits for any pending reads to complete.
1549 */
1550DECLINLINE(void) ASMReadFence(void)
1551{
1552 /** @todo use lfence? check if all cpus we care for support it. */
1553 ASMMemoryFence();
1554}
1555
1556
1557/**
1558 * Atomically reads an unsigned 8-bit value, ordered.
1559 *
1560 * @returns Current *pu8 value
1561 * @param pu8 Pointer to the 8-bit variable to read.
1562 */
1563DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t RT_FAR *pu8)
1564{
1565 ASMMemoryFence();
1566 return *pu8; /* byte reads are atomic on x86 */
1567}
1568
1569
1570/**
1571 * Atomically reads an unsigned 8-bit value, unordered.
1572 *
1573 * @returns Current *pu8 value
1574 * @param pu8 Pointer to the 8-bit variable to read.
1575 */
1576DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t RT_FAR *pu8)
1577{
1578 return *pu8; /* byte reads are atomic on x86 */
1579}
1580
1581
1582/**
1583 * Atomically reads a signed 8-bit value, ordered.
1584 *
1585 * @returns Current *pi8 value
1586 * @param pi8 Pointer to the 8-bit variable to read.
1587 */
1588DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t RT_FAR *pi8)
1589{
1590 ASMMemoryFence();
1591 return *pi8; /* byte reads are atomic on x86 */
1592}
1593
1594
1595/**
1596 * Atomically reads a signed 8-bit value, unordered.
1597 *
1598 * @returns Current *pi8 value
1599 * @param pi8 Pointer to the 8-bit variable to read.
1600 */
1601DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t RT_FAR *pi8)
1602{
1603 return *pi8; /* byte reads are atomic on x86 */
1604}
1605
1606
1607/**
1608 * Atomically reads an unsigned 16-bit value, ordered.
1609 *
1610 * @returns Current *pu16 value
1611 * @param pu16 Pointer to the 16-bit variable to read.
1612 */
1613DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t RT_FAR *pu16)
1614{
1615 ASMMemoryFence();
1616 Assert(!((uintptr_t)pu16 & 1));
1617 return *pu16;
1618}
1619
1620
1621/**
1622 * Atomically reads an unsigned 16-bit value, unordered.
1623 *
1624 * @returns Current *pu16 value
1625 * @param pu16 Pointer to the 16-bit variable to read.
1626 */
1627DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t RT_FAR *pu16)
1628{
1629 Assert(!((uintptr_t)pu16 & 1));
1630 return *pu16;
1631}
1632
1633
1634/**
1635 * Atomically reads a signed 16-bit value, ordered.
1636 *
1637 * @returns Current *pi16 value
1638 * @param pi16 Pointer to the 16-bit variable to read.
1639 */
1640DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t RT_FAR *pi16)
1641{
1642 ASMMemoryFence();
1643 Assert(!((uintptr_t)pi16 & 1));
1644 return *pi16;
1645}
1646
1647
1648/**
1649 * Atomically reads a signed 16-bit value, unordered.
1650 *
1651 * @returns Current *pi16 value
1652 * @param pi16 Pointer to the 16-bit variable to read.
1653 */
1654DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t RT_FAR *pi16)
1655{
1656 Assert(!((uintptr_t)pi16 & 1));
1657 return *pi16;
1658}
1659
1660
1661/**
1662 * Atomically reads an unsigned 32-bit value, ordered.
1663 *
1664 * @returns Current *pu32 value
1665 * @param pu32 Pointer to the 32-bit variable to read.
1666 */
1667DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t RT_FAR *pu32)
1668{
1669 ASMMemoryFence();
1670 Assert(!((uintptr_t)pu32 & 3));
1671#if ARCH_BITS == 16
1672 AssertFailed(); /** @todo 16-bit */
1673#endif
1674 return *pu32;
1675}
1676
1677
1678/**
1679 * Atomically reads an unsigned 32-bit value, unordered.
1680 *
1681 * @returns Current *pu32 value
1682 * @param pu32 Pointer to the 32-bit variable to read.
1683 */
1684DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t RT_FAR *pu32)
1685{
1686 Assert(!((uintptr_t)pu32 & 3));
1687#if ARCH_BITS == 16
1688 AssertFailed(); /** @todo 16-bit */
1689#endif
1690 return *pu32;
1691}
1692
1693
1694/**
1695 * Atomically reads a signed 32-bit value, ordered.
1696 *
1697 * @returns Current *pi32 value
1698 * @param pi32 Pointer to the 32-bit variable to read.
1699 */
1700DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t RT_FAR *pi32)
1701{
1702 ASMMemoryFence();
1703 Assert(!((uintptr_t)pi32 & 3));
1704#if ARCH_BITS == 16
1705 AssertFailed(); /** @todo 16-bit */
1706#endif
1707 return *pi32;
1708}
1709
1710
1711/**
1712 * Atomically reads a signed 32-bit value, unordered.
1713 *
1714 * @returns Current *pi32 value
1715 * @param pi32 Pointer to the 32-bit variable to read.
1716 */
1717DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t RT_FAR *pi32)
1718{
1719 Assert(!((uintptr_t)pi32 & 3));
1720#if ARCH_BITS == 16
1721 AssertFailed(); /** @todo 16-bit */
1722#endif
1723 return *pi32;
1724}
1725
1726
1727/**
1728 * Atomically reads an unsigned 64-bit value, ordered.
1729 *
1730 * @returns Current *pu64 value
1731 * @param pu64 Pointer to the 64-bit variable to read.
1732 * The memory pointed to must be writable.
1733 *
1734 * @remarks This may fault if the memory is read-only!
1735 * @remarks x86: Requires a Pentium or later.
1736 */
1737#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1738 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1739DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64);
1740#else
1741DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64)
1742{
1743 uint64_t u64;
1744# ifdef RT_ARCH_AMD64
1745 Assert(!((uintptr_t)pu64 & 7));
1746/*# if RT_INLINE_ASM_GNU_STYLE
1747 __asm__ __volatile__( "mfence\n\t"
1748 "movq %1, %0\n\t"
1749 : "=r" (u64)
1750 : "m" (*pu64));
1751# else
1752 __asm
1753 {
1754 mfence
1755 mov rdx, [pu64]
1756 mov rax, [rdx]
1757 mov [u64], rax
1758 }
1759# endif*/
1760 ASMMemoryFence();
1761 u64 = *pu64;
1762# else /* !RT_ARCH_AMD64 */
1763# if RT_INLINE_ASM_GNU_STYLE
1764# if defined(PIC) || defined(__PIC__)
1765 uint32_t u32EBX = 0;
1766 Assert(!((uintptr_t)pu64 & 7));
1767 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1768 "lock; cmpxchg8b (%5)\n\t"
1769 "movl %3, %%ebx\n\t"
1770 : "=A" (u64),
1771# if RT_GNUC_PREREQ(4, 3)
1772 "+m" (*pu64)
1773# else
1774 "=m" (*pu64)
1775# endif
1776 : "0" (0ULL),
1777 "m" (u32EBX),
1778 "c" (0),
1779 "S" (pu64));
1780# else /* !PIC */
1781 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1782 : "=A" (u64),
1783 "+m" (*pu64)
1784 : "0" (0ULL),
1785 "b" (0),
1786 "c" (0));
1787# endif
1788# else
1789 Assert(!((uintptr_t)pu64 & 7));
1790 __asm
1791 {
1792 xor eax, eax
1793 xor edx, edx
1794 mov edi, pu64
1795 xor ecx, ecx
1796 xor ebx, ebx
1797 lock cmpxchg8b [edi]
1798 mov dword ptr [u64], eax
1799 mov dword ptr [u64 + 4], edx
1800 }
1801# endif
1802# endif /* !RT_ARCH_AMD64 */
1803 return u64;
1804}
1805#endif
1806
1807
1808/**
1809 * Atomically reads an unsigned 64-bit value, unordered.
1810 *
1811 * @returns Current *pu64 value
1812 * @param pu64 Pointer to the 64-bit variable to read.
1813 * The memory pointed to must be writable.
1814 *
1815 * @remarks This may fault if the memory is read-only!
1816 * @remarks x86: Requires a Pentium or later.
1817 */
1818#if !defined(RT_ARCH_AMD64) \
1819 && ( (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1820 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
1821DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64);
1822#else
1823DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64)
1824{
1825 uint64_t u64;
1826# ifdef RT_ARCH_AMD64
1827 Assert(!((uintptr_t)pu64 & 7));
1828/*# if RT_INLINE_ASM_GNU_STYLE
1829 Assert(!((uintptr_t)pu64 & 7));
1830 __asm__ __volatile__("movq %1, %0\n\t"
1831 : "=r" (u64)
1832 : "m" (*pu64));
1833# else
1834 __asm
1835 {
1836 mov rdx, [pu64]
1837 mov rax, [rdx]
1838 mov [u64], rax
1839 }
1840# endif */
1841 u64 = *pu64;
1842# else /* !RT_ARCH_AMD64 */
1843# if RT_INLINE_ASM_GNU_STYLE
1844# if defined(PIC) || defined(__PIC__)
1845 uint32_t u32EBX = 0;
1846 uint32_t u32Spill;
1847 Assert(!((uintptr_t)pu64 & 7));
1848 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1849 "xor %%ecx,%%ecx\n\t"
1850 "xor %%edx,%%edx\n\t"
1851 "xchgl %%ebx, %3\n\t"
1852 "lock; cmpxchg8b (%4)\n\t"
1853 "movl %3, %%ebx\n\t"
1854 : "=A" (u64),
1855# if RT_GNUC_PREREQ(4, 3)
1856 "+m" (*pu64),
1857# else
1858 "=m" (*pu64),
1859# endif
1860 "=c" (u32Spill)
1861 : "m" (u32EBX),
1862 "S" (pu64));
1863# else /* !PIC */
1864 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1865 : "=A" (u64),
1866 "+m" (*pu64)
1867 : "0" (0ULL),
1868 "b" (0),
1869 "c" (0));
1870# endif
1871# else
1872 Assert(!((uintptr_t)pu64 & 7));
1873 __asm
1874 {
1875 xor eax, eax
1876 xor edx, edx
1877 mov edi, pu64
1878 xor ecx, ecx
1879 xor ebx, ebx
1880 lock cmpxchg8b [edi]
1881 mov dword ptr [u64], eax
1882 mov dword ptr [u64 + 4], edx
1883 }
1884# endif
1885# endif /* !RT_ARCH_AMD64 */
1886 return u64;
1887}
1888#endif
1889
1890
1891/**
1892 * Atomically reads a signed 64-bit value, ordered.
1893 *
1894 * @returns Current *pi64 value
1895 * @param pi64 Pointer to the 64-bit variable to read.
1896 * The memory pointed to must be writable.
1897 *
1898 * @remarks This may fault if the memory is read-only!
1899 * @remarks x86: Requires a Pentium or later.
1900 */
1901DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t RT_FAR *pi64)
1902{
1903 return (int64_t)ASMAtomicReadU64((volatile uint64_t RT_FAR *)pi64);
1904}
1905
1906
1907/**
1908 * Atomically reads a signed 64-bit value, unordered.
1909 *
1910 * @returns Current *pi64 value
1911 * @param pi64 Pointer to the 64-bit variable to read.
1912 * The memory pointed to must be writable.
1913 *
1914 * @remarks This will fault if the memory is read-only!
1915 * @remarks x86: Requires a Pentium or later.
1916 */
1917DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t RT_FAR *pi64)
1918{
1919 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)pi64);
1920}
1921
1922
1923/**
1924 * Atomically reads a size_t value, ordered.
1925 *
1926 * @returns Current *pcb value
1927 * @param pcb Pointer to the size_t variable to read.
1928 */
1929DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile RT_FAR *pcb)
1930{
1931#if ARCH_BITS == 64
1932 return ASMAtomicReadU64((uint64_t volatile RT_FAR *)pcb);
1933#elif ARCH_BITS == 32
1934 return ASMAtomicReadU32((uint32_t volatile RT_FAR *)pcb);
1935#elif ARCH_BITS == 16
1936 AssertCompileSize(size_t, 2);
1937 return ASMAtomicReadU16((uint16_t volatile RT_FAR *)pcb);
1938#else
1939# error "Unsupported ARCH_BITS value"
1940#endif
1941}
1942
1943
1944/**
1945 * Atomically reads a size_t value, unordered.
1946 *
1947 * @returns Current *pcb value
1948 * @param pcb Pointer to the size_t variable to read.
1949 */
1950DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile RT_FAR *pcb)
1951{
1952#if ARCH_BITS == 64 || ARCH_BITS == 16
1953 return ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)pcb);
1954#elif ARCH_BITS == 32
1955 return ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)pcb);
1956#elif ARCH_BITS == 16
1957 AssertCompileSize(size_t, 2);
1958 return ASMAtomicUoReadU16((uint16_t volatile RT_FAR *)pcb);
1959#else
1960# error "Unsupported ARCH_BITS value"
1961#endif
1962}
1963
1964
1965/**
1966 * Atomically reads a pointer value, ordered.
1967 *
1968 * @returns Current *pv value
1969 * @param ppv Pointer to the pointer variable to read.
1970 *
1971 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
1972 * requires less typing (no casts).
1973 */
1974DECLINLINE(void RT_FAR *) ASMAtomicReadPtr(void RT_FAR * volatile RT_FAR *ppv)
1975{
1976#if ARCH_BITS == 32 || ARCH_BITS == 16
1977 return (void RT_FAR *)ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
1978#elif ARCH_BITS == 64
1979 return (void RT_FAR *)ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
1980#else
1981# error "ARCH_BITS is bogus"
1982#endif
1983}
1984
1985/**
1986 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
1987 *
1988 * @returns Current *pv value
1989 * @param ppv Pointer to the pointer variable to read.
1990 * @param Type The type of *ppv, sans volatile.
1991 */
1992#ifdef __GNUC__
1993# define ASMAtomicReadPtrT(ppv, Type) \
1994 __extension__ \
1995 ({\
1996 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
1997 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
1998 pvTypeChecked; \
1999 })
2000#else
2001# define ASMAtomicReadPtrT(ppv, Type) \
2002 (Type)ASMAtomicReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2003#endif
2004
2005
2006/**
2007 * Atomically reads a pointer value, unordered.
2008 *
2009 * @returns Current *pv value
2010 * @param ppv Pointer to the pointer variable to read.
2011 *
2012 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
2013 * requires less typing (no casts).
2014 */
2015DECLINLINE(void RT_FAR *) ASMAtomicUoReadPtr(void RT_FAR * volatile RT_FAR *ppv)
2016{
2017#if ARCH_BITS == 32 || ARCH_BITS == 16
2018 return (void RT_FAR *)ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2019#elif ARCH_BITS == 64
2020 return (void RT_FAR *)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2021#else
2022# error "ARCH_BITS is bogus"
2023#endif
2024}
2025
2026
2027/**
2028 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
2029 *
2030 * @returns Current *pv value
2031 * @param ppv Pointer to the pointer variable to read.
2032 * @param Type The type of *ppv, sans volatile.
2033 */
2034#ifdef __GNUC__
2035# define ASMAtomicUoReadPtrT(ppv, Type) \
2036 __extension__ \
2037 ({\
2038 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2039 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
2040 pvTypeChecked; \
2041 })
2042#else
2043# define ASMAtomicUoReadPtrT(ppv, Type) \
2044 (Type)ASMAtomicUoReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2045#endif
2046
2047
2048/**
2049 * Atomically reads a boolean value, ordered.
2050 *
2051 * @returns Current *pf value
2052 * @param pf Pointer to the boolean variable to read.
2053 */
2054DECLINLINE(bool) ASMAtomicReadBool(volatile bool RT_FAR *pf)
2055{
2056 ASMMemoryFence();
2057 return *pf; /* byte reads are atomic on x86 */
2058}
2059
2060
2061/**
2062 * Atomically reads a boolean value, unordered.
2063 *
2064 * @returns Current *pf value
2065 * @param pf Pointer to the boolean variable to read.
2066 */
2067DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool RT_FAR *pf)
2068{
2069 return *pf; /* byte reads are atomic on x86 */
2070}
2071
2072
2073/**
2074 * Atomically read a typical IPRT handle value, ordered.
2075 *
2076 * @param ph Pointer to the handle variable to read.
2077 * @param phRes Where to store the result.
2078 *
2079 * @remarks This doesn't currently work for all handles (like RTFILE).
2080 */
2081#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2082# define ASMAtomicReadHandle(ph, phRes) \
2083 do { \
2084 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2085 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2086 *(uint32_t RT_FAR *)(phRes) = ASMAtomicReadU32((uint32_t volatile RT_FAR *)(ph)); \
2087 } while (0)
2088#elif HC_ARCH_BITS == 64
2089# define ASMAtomicReadHandle(ph, phRes) \
2090 do { \
2091 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2092 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2093 *(uint64_t RT_FAR *)(phRes) = ASMAtomicReadU64((uint64_t volatile RT_FAR *)(ph)); \
2094 } while (0)
2095#else
2096# error HC_ARCH_BITS
2097#endif
2098
2099
2100/**
2101 * Atomically read a typical IPRT handle value, unordered.
2102 *
2103 * @param ph Pointer to the handle variable to read.
2104 * @param phRes Where to store the result.
2105 *
2106 * @remarks This doesn't currently work for all handles (like RTFILE).
2107 */
2108#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2109# define ASMAtomicUoReadHandle(ph, phRes) \
2110 do { \
2111 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2112 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2113 *(uint32_t RT_FAR *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)(ph)); \
2114 } while (0)
2115#elif HC_ARCH_BITS == 64
2116# define ASMAtomicUoReadHandle(ph, phRes) \
2117 do { \
2118 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2119 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2120 *(uint64_t RT_FAR *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)(ph)); \
2121 } while (0)
2122#else
2123# error HC_ARCH_BITS
2124#endif
2125
2126
2127/**
2128 * Atomically read a value which size might differ
2129 * between platforms or compilers, ordered.
2130 *
2131 * @param pu Pointer to the variable to read.
2132 * @param puRes Where to store the result.
2133 */
2134#define ASMAtomicReadSize(pu, puRes) \
2135 do { \
2136 switch (sizeof(*(pu))) { \
2137 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2138 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2139 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2140 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2141 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2142 } \
2143 } while (0)
2144
2145
2146/**
2147 * Atomically read a value which size might differ
2148 * between platforms or compilers, unordered.
2149 *
2150 * @param pu Pointer to the variable to read.
2151 * @param puRes Where to store the result.
2152 */
2153#define ASMAtomicUoReadSize(pu, puRes) \
2154 do { \
2155 switch (sizeof(*(pu))) { \
2156 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2157 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2158 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2159 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2160 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2161 } \
2162 } while (0)
2163
2164
2165/**
2166 * Atomically writes an unsigned 8-bit value, ordered.
2167 *
2168 * @param pu8 Pointer to the 8-bit variable.
2169 * @param u8 The 8-bit value to assign to *pu8.
2170 */
2171DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8)
2172{
2173 ASMAtomicXchgU8(pu8, u8);
2174}
2175
2176
2177/**
2178 * Atomically writes an unsigned 8-bit value, unordered.
2179 *
2180 * @param pu8 Pointer to the 8-bit variable.
2181 * @param u8 The 8-bit value to assign to *pu8.
2182 */
2183DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8)
2184{
2185 *pu8 = u8; /* byte writes are atomic on x86 */
2186}
2187
2188
2189/**
2190 * Atomically writes a signed 8-bit value, ordered.
2191 *
2192 * @param pi8 Pointer to the 8-bit variable to read.
2193 * @param i8 The 8-bit value to assign to *pi8.
2194 */
2195DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8)
2196{
2197 ASMAtomicXchgS8(pi8, i8);
2198}
2199
2200
2201/**
2202 * Atomically writes a signed 8-bit value, unordered.
2203 *
2204 * @param pi8 Pointer to the 8-bit variable to write.
2205 * @param i8 The 8-bit value to assign to *pi8.
2206 */
2207DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8)
2208{
2209 *pi8 = i8; /* byte writes are atomic on x86 */
2210}
2211
2212
2213/**
2214 * Atomically writes an unsigned 16-bit value, ordered.
2215 *
2216 * @param pu16 Pointer to the 16-bit variable to write.
2217 * @param u16 The 16-bit value to assign to *pu16.
2218 */
2219DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16)
2220{
2221 ASMAtomicXchgU16(pu16, u16);
2222}
2223
2224
2225/**
2226 * Atomically writes an unsigned 16-bit value, unordered.
2227 *
2228 * @param pu16 Pointer to the 16-bit variable to write.
2229 * @param u16 The 16-bit value to assign to *pu16.
2230 */
2231DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16)
2232{
2233 Assert(!((uintptr_t)pu16 & 1));
2234 *pu16 = u16;
2235}
2236
2237
2238/**
2239 * Atomically writes a signed 16-bit value, ordered.
2240 *
2241 * @param pi16 Pointer to the 16-bit variable to write.
2242 * @param i16 The 16-bit value to assign to *pi16.
2243 */
2244DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16)
2245{
2246 ASMAtomicXchgS16(pi16, i16);
2247}
2248
2249
2250/**
2251 * Atomically writes a signed 16-bit value, unordered.
2252 *
2253 * @param pi16 Pointer to the 16-bit variable to write.
2254 * @param i16 The 16-bit value to assign to *pi16.
2255 */
2256DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16)
2257{
2258 Assert(!((uintptr_t)pi16 & 1));
2259 *pi16 = i16;
2260}
2261
2262
2263/**
2264 * Atomically writes an unsigned 32-bit value, ordered.
2265 *
2266 * @param pu32 Pointer to the 32-bit variable to write.
2267 * @param u32 The 32-bit value to assign to *pu32.
2268 */
2269DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32)
2270{
2271 ASMAtomicXchgU32(pu32, u32);
2272}
2273
2274
2275/**
2276 * Atomically writes an unsigned 32-bit value, unordered.
2277 *
2278 * @param pu32 Pointer to the 32-bit variable to write.
2279 * @param u32 The 32-bit value to assign to *pu32.
2280 */
2281DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32)
2282{
2283 Assert(!((uintptr_t)pu32 & 3));
2284#if ARCH_BITS >= 32
2285 *pu32 = u32;
2286#else
2287 ASMAtomicXchgU32(pu32, u32);
2288#endif
2289}
2290
2291
2292/**
2293 * Atomically writes a signed 32-bit value, ordered.
2294 *
2295 * @param pi32 Pointer to the 32-bit variable to write.
2296 * @param i32 The 32-bit value to assign to *pi32.
2297 */
2298DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32)
2299{
2300 ASMAtomicXchgS32(pi32, i32);
2301}
2302
2303
2304/**
2305 * Atomically writes a signed 32-bit value, unordered.
2306 *
2307 * @param pi32 Pointer to the 32-bit variable to write.
2308 * @param i32 The 32-bit value to assign to *pi32.
2309 */
2310DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32)
2311{
2312 Assert(!((uintptr_t)pi32 & 3));
2313#if ARCH_BITS >= 32
2314 *pi32 = i32;
2315#else
2316 ASMAtomicXchgS32(pi32, i32);
2317#endif
2318}
2319
2320
2321/**
2322 * Atomically writes an unsigned 64-bit value, ordered.
2323 *
2324 * @param pu64 Pointer to the 64-bit variable to write.
2325 * @param u64 The 64-bit value to assign to *pu64.
2326 */
2327DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64)
2328{
2329 ASMAtomicXchgU64(pu64, u64);
2330}
2331
2332
2333/**
2334 * Atomically writes an unsigned 64-bit value, unordered.
2335 *
2336 * @param pu64 Pointer to the 64-bit variable to write.
2337 * @param u64 The 64-bit value to assign to *pu64.
2338 */
2339DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64)
2340{
2341 Assert(!((uintptr_t)pu64 & 7));
2342#if ARCH_BITS == 64
2343 *pu64 = u64;
2344#else
2345 ASMAtomicXchgU64(pu64, u64);
2346#endif
2347}
2348
2349
2350/**
2351 * Atomically writes a signed 64-bit value, ordered.
2352 *
2353 * @param pi64 Pointer to the 64-bit variable to write.
2354 * @param i64 The 64-bit value to assign to *pi64.
2355 */
2356DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64)
2357{
2358 ASMAtomicXchgS64(pi64, i64);
2359}
2360
2361
2362/**
2363 * Atomically writes a signed 64-bit value, unordered.
2364 *
2365 * @param pi64 Pointer to the 64-bit variable to write.
2366 * @param i64 The 64-bit value to assign to *pi64.
2367 */
2368DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64)
2369{
2370 Assert(!((uintptr_t)pi64 & 7));
2371#if ARCH_BITS == 64
2372 *pi64 = i64;
2373#else
2374 ASMAtomicXchgS64(pi64, i64);
2375#endif
2376}
2377
2378
2379/**
2380 * Atomically writes a size_t value, ordered.
2381 *
2382 * @returns nothing.
2383 * @param pcb Pointer to the size_t variable to write.
2384 * @param cb The value to assign to *pcb.
2385 */
2386DECLINLINE(void) ASMAtomicWriteZ(volatile size_t RT_FAR *pcb, size_t cb)
2387{
2388#if ARCH_BITS == 64
2389 ASMAtomicWriteU64((uint64_t volatile *)pcb, cb);
2390#elif ARCH_BITS == 32
2391 ASMAtomicWriteU32((uint32_t volatile *)pcb, cb);
2392#elif ARCH_BITS == 16
2393 AssertCompileSize(size_t, 2);
2394 ASMAtomicWriteU16((uint16_t volatile *)pcb, cb);
2395#else
2396# error "Unsupported ARCH_BITS value"
2397#endif
2398}
2399
2400
2401/**
2402 * Atomically writes a boolean value, unordered.
2403 *
2404 * @param pf Pointer to the boolean variable to write.
2405 * @param f The boolean value to assign to *pf.
2406 */
2407DECLINLINE(void) ASMAtomicWriteBool(volatile bool RT_FAR *pf, bool f)
2408{
2409 ASMAtomicWriteU8((uint8_t volatile RT_FAR *)pf, f);
2410}
2411
2412
2413/**
2414 * Atomically writes a boolean value, unordered.
2415 *
2416 * @param pf Pointer to the boolean variable to write.
2417 * @param f The boolean value to assign to *pf.
2418 */
2419DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool RT_FAR *pf, bool f)
2420{
2421 *pf = f; /* byte writes are atomic on x86 */
2422}
2423
2424
2425/**
2426 * Atomically writes a pointer value, ordered.
2427 *
2428 * @param ppv Pointer to the pointer variable to write.
2429 * @param pv The pointer value to assign to *ppv.
2430 */
2431DECLINLINE(void) ASMAtomicWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv)
2432{
2433#if ARCH_BITS == 32 || ARCH_BITS == 16
2434 ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
2435#elif ARCH_BITS == 64
2436 ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
2437#else
2438# error "ARCH_BITS is bogus"
2439#endif
2440}
2441
2442
2443/**
2444 * Atomically writes a pointer value, ordered.
2445 *
2446 * @param ppv Pointer to the pointer variable to write.
2447 * @param pv The pointer value to assign to *ppv. If NULL use
2448 * ASMAtomicWriteNullPtr or you'll land in trouble.
2449 *
2450 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2451 * NULL.
2452 */
2453#ifdef __GNUC__
2454# define ASMAtomicWritePtr(ppv, pv) \
2455 do \
2456 { \
2457 __typeof__(*(ppv)) volatile RT_FAR * const ppvTypeChecked = (ppv); \
2458 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2459 \
2460 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2461 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2462 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2463 \
2464 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), (void RT_FAR *)(pvTypeChecked)); \
2465 } while (0)
2466#else
2467# define ASMAtomicWritePtr(ppv, pv) \
2468 do \
2469 { \
2470 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2471 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2472 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2473 \
2474 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv)); \
2475 } while (0)
2476#endif
2477
2478
2479/**
2480 * Atomically sets a pointer to NULL, ordered.
2481 *
2482 * @param ppv Pointer to the pointer variable that should be set to NULL.
2483 *
2484 * @remarks This is relatively type safe on GCC platforms.
2485 */
2486#ifdef __GNUC__
2487# define ASMAtomicWriteNullPtr(ppv) \
2488 do \
2489 { \
2490 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
2491 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2492 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2493 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), NULL); \
2494 } while (0)
2495#else
2496# define ASMAtomicWriteNullPtr(ppv) \
2497 do \
2498 { \
2499 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2500 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2501 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), NULL); \
2502 } while (0)
2503#endif
2504
2505
2506/**
2507 * Atomically writes a pointer value, unordered.
2508 *
2509 * @returns Current *pv value
2510 * @param ppv Pointer to the pointer variable.
2511 * @param pv The pointer value to assign to *ppv. If NULL use
2512 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2513 *
2514 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2515 * NULL.
2516 */
2517#ifdef __GNUC__
2518# define ASMAtomicUoWritePtr(ppv, pv) \
2519 do \
2520 { \
2521 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2522 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2523 \
2524 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2525 AssertCompile(sizeof(pv) == sizeof(void *)); \
2526 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2527 \
2528 *(ppvTypeChecked) = pvTypeChecked; \
2529 } while (0)
2530#else
2531# define ASMAtomicUoWritePtr(ppv, pv) \
2532 do \
2533 { \
2534 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2535 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2536 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2537 *(ppv) = pv; \
2538 } while (0)
2539#endif
2540
2541
2542/**
2543 * Atomically sets a pointer to NULL, unordered.
2544 *
2545 * @param ppv Pointer to the pointer variable that should be set to NULL.
2546 *
2547 * @remarks This is relatively type safe on GCC platforms.
2548 */
2549#ifdef __GNUC__
2550# define ASMAtomicUoWriteNullPtr(ppv) \
2551 do \
2552 { \
2553 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2554 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2555 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2556 *(ppvTypeChecked) = NULL; \
2557 } while (0)
2558#else
2559# define ASMAtomicUoWriteNullPtr(ppv) \
2560 do \
2561 { \
2562 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2563 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2564 *(ppv) = NULL; \
2565 } while (0)
2566#endif
2567
2568
2569/**
2570 * Atomically write a typical IPRT handle value, ordered.
2571 *
2572 * @param ph Pointer to the variable to update.
2573 * @param hNew The value to assign to *ph.
2574 *
2575 * @remarks This doesn't currently work for all handles (like RTFILE).
2576 */
2577#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2578# define ASMAtomicWriteHandle(ph, hNew) \
2579 do { \
2580 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2581 ASMAtomicWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
2582 } while (0)
2583#elif HC_ARCH_BITS == 64
2584# define ASMAtomicWriteHandle(ph, hNew) \
2585 do { \
2586 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2587 ASMAtomicWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
2588 } while (0)
2589#else
2590# error HC_ARCH_BITS
2591#endif
2592
2593
2594/**
2595 * Atomically write a typical IPRT handle value, unordered.
2596 *
2597 * @param ph Pointer to the variable to update.
2598 * @param hNew The value to assign to *ph.
2599 *
2600 * @remarks This doesn't currently work for all handles (like RTFILE).
2601 */
2602#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2603# define ASMAtomicUoWriteHandle(ph, hNew) \
2604 do { \
2605 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2606 ASMAtomicUoWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)hNew); \
2607 } while (0)
2608#elif HC_ARCH_BITS == 64
2609# define ASMAtomicUoWriteHandle(ph, hNew) \
2610 do { \
2611 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2612 ASMAtomicUoWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)hNew); \
2613 } while (0)
2614#else
2615# error HC_ARCH_BITS
2616#endif
2617
2618
2619/**
2620 * Atomically write a value which size might differ
2621 * between platforms or compilers, ordered.
2622 *
2623 * @param pu Pointer to the variable to update.
2624 * @param uNew The value to assign to *pu.
2625 */
2626#define ASMAtomicWriteSize(pu, uNew) \
2627 do { \
2628 switch (sizeof(*(pu))) { \
2629 case 1: ASMAtomicWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
2630 case 2: ASMAtomicWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
2631 case 4: ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2632 case 8: ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2633 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2634 } \
2635 } while (0)
2636
2637/**
2638 * Atomically write a value which size might differ
2639 * between platforms or compilers, unordered.
2640 *
2641 * @param pu Pointer to the variable to update.
2642 * @param uNew The value to assign to *pu.
2643 */
2644#define ASMAtomicUoWriteSize(pu, uNew) \
2645 do { \
2646 switch (sizeof(*(pu))) { \
2647 case 1: ASMAtomicUoWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
2648 case 2: ASMAtomicUoWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
2649 case 4: ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2650 case 8: ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2651 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2652 } \
2653 } while (0)
2654
2655
2656
2657/**
2658 * Atomically exchanges and adds to a 16-bit value, ordered.
2659 *
2660 * @returns The old value.
2661 * @param pu16 Pointer to the value.
2662 * @param u16 Number to add.
2663 *
2664 * @remarks Currently not implemented, just to make 16-bit code happy.
2665 * @remarks x86: Requires a 486 or later.
2666 */
2667DECLASM(uint16_t) ASMAtomicAddU16(uint16_t volatile RT_FAR *pu16, uint32_t u16);
2668
2669
2670/**
2671 * Atomically exchanges and adds to a 32-bit value, ordered.
2672 *
2673 * @returns The old value.
2674 * @param pu32 Pointer to the value.
2675 * @param u32 Number to add.
2676 *
2677 * @remarks x86: Requires a 486 or later.
2678 */
2679#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2680DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
2681#else
2682DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
2683{
2684# if RT_INLINE_ASM_USES_INTRIN
2685 u32 = _InterlockedExchangeAdd((long RT_FAR *)pu32, u32);
2686 return u32;
2687
2688# elif RT_INLINE_ASM_GNU_STYLE
2689 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2690 : "=r" (u32),
2691 "=m" (*pu32)
2692 : "0" (u32),
2693 "m" (*pu32)
2694 : "memory");
2695 return u32;
2696# else
2697 __asm
2698 {
2699 mov eax, [u32]
2700# ifdef RT_ARCH_AMD64
2701 mov rdx, [pu32]
2702 lock xadd [rdx], eax
2703# else
2704 mov edx, [pu32]
2705 lock xadd [edx], eax
2706# endif
2707 mov [u32], eax
2708 }
2709 return u32;
2710# endif
2711}
2712#endif
2713
2714
2715/**
2716 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2717 *
2718 * @returns The old value.
2719 * @param pi32 Pointer to the value.
2720 * @param i32 Number to add.
2721 *
2722 * @remarks x86: Requires a 486 or later.
2723 */
2724DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile RT_FAR *pi32, int32_t i32)
2725{
2726 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
2727}
2728
2729
2730/**
2731 * Atomically exchanges and adds to a 64-bit value, ordered.
2732 *
2733 * @returns The old value.
2734 * @param pu64 Pointer to the value.
2735 * @param u64 Number to add.
2736 *
2737 * @remarks x86: Requires a Pentium or later.
2738 */
2739#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2740DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
2741#else
2742DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
2743{
2744# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2745 u64 = _InterlockedExchangeAdd64((__int64 RT_FAR *)pu64, u64);
2746 return u64;
2747
2748# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2749 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2750 : "=r" (u64),
2751 "=m" (*pu64)
2752 : "0" (u64),
2753 "m" (*pu64)
2754 : "memory");
2755 return u64;
2756# else
2757 uint64_t u64Old;
2758 for (;;)
2759 {
2760 uint64_t u64New;
2761 u64Old = ASMAtomicUoReadU64(pu64);
2762 u64New = u64Old + u64;
2763 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2764 break;
2765 ASMNopPause();
2766 }
2767 return u64Old;
2768# endif
2769}
2770#endif
2771
2772
2773/**
2774 * Atomically exchanges and adds to a signed 64-bit value, ordered.
2775 *
2776 * @returns The old value.
2777 * @param pi64 Pointer to the value.
2778 * @param i64 Number to add.
2779 *
2780 * @remarks x86: Requires a Pentium or later.
2781 */
2782DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile RT_FAR *pi64, int64_t i64)
2783{
2784 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
2785}
2786
2787
2788/**
2789 * Atomically exchanges and adds to a size_t value, ordered.
2790 *
2791 * @returns The old value.
2792 * @param pcb Pointer to the size_t value.
2793 * @param cb Number to add.
2794 */
2795DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile RT_FAR *pcb, size_t cb)
2796{
2797#if ARCH_BITS == 64
2798 AssertCompileSize(size_t, 8);
2799 return ASMAtomicAddU64((uint64_t volatile RT_FAR *)pcb, cb);
2800#elif ARCH_BITS == 32
2801 AssertCompileSize(size_t, 4);
2802 return ASMAtomicAddU32((uint32_t volatile RT_FAR *)pcb, cb);
2803#elif ARCH_BITS == 16
2804 AssertCompileSize(size_t, 2);
2805 return ASMAtomicAddU16((uint16_t volatile RT_FAR *)pcb, cb);
2806#else
2807# error "Unsupported ARCH_BITS value"
2808#endif
2809}
2810
2811
2812/**
2813 * Atomically exchanges and adds a value which size might differ between
2814 * platforms or compilers, ordered.
2815 *
2816 * @param pu Pointer to the variable to update.
2817 * @param uNew The value to add to *pu.
2818 * @param puOld Where to store the old value.
2819 */
2820#define ASMAtomicAddSize(pu, uNew, puOld) \
2821 do { \
2822 switch (sizeof(*(pu))) { \
2823 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2824 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2825 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
2826 } \
2827 } while (0)
2828
2829
2830
2831/**
2832 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
2833 *
2834 * @returns The old value.
2835 * @param pu16 Pointer to the value.
2836 * @param u16 Number to subtract.
2837 *
2838 * @remarks x86: Requires a 486 or later.
2839 */
2840DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile RT_FAR *pu16, uint32_t u16)
2841{
2842 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
2843}
2844
2845
2846/**
2847 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
2848 *
2849 * @returns The old value.
2850 * @param pi16 Pointer to the value.
2851 * @param i16 Number to subtract.
2852 *
2853 * @remarks x86: Requires a 486 or later.
2854 */
2855DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile RT_FAR *pi16, int16_t i16)
2856{
2857 return (int16_t)ASMAtomicAddU16((uint16_t volatile RT_FAR *)pi16, (uint16_t)-i16);
2858}
2859
2860
2861/**
2862 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
2863 *
2864 * @returns The old value.
2865 * @param pu32 Pointer to the value.
2866 * @param u32 Number to subtract.
2867 *
2868 * @remarks x86: Requires a 486 or later.
2869 */
2870DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
2871{
2872 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
2873}
2874
2875
2876/**
2877 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
2878 *
2879 * @returns The old value.
2880 * @param pi32 Pointer to the value.
2881 * @param i32 Number to subtract.
2882 *
2883 * @remarks x86: Requires a 486 or later.
2884 */
2885DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile RT_FAR *pi32, int32_t i32)
2886{
2887 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)-i32);
2888}
2889
2890
2891/**
2892 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
2893 *
2894 * @returns The old value.
2895 * @param pu64 Pointer to the value.
2896 * @param u64 Number to subtract.
2897 *
2898 * @remarks x86: Requires a Pentium or later.
2899 */
2900DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
2901{
2902 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
2903}
2904
2905
2906/**
2907 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
2908 *
2909 * @returns The old value.
2910 * @param pi64 Pointer to the value.
2911 * @param i64 Number to subtract.
2912 *
2913 * @remarks x86: Requires a Pentium or later.
2914 */
2915DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile RT_FAR *pi64, int64_t i64)
2916{
2917 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)-i64);
2918}
2919
2920
2921/**
2922 * Atomically exchanges and subtracts to a size_t value, ordered.
2923 *
2924 * @returns The old value.
2925 * @param pcb Pointer to the size_t value.
2926 * @param cb Number to subtract.
2927 *
2928 * @remarks x86: Requires a 486 or later.
2929 */
2930DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile RT_FAR *pcb, size_t cb)
2931{
2932#if ARCH_BITS == 64
2933 return ASMAtomicSubU64((uint64_t volatile RT_FAR *)pcb, cb);
2934#elif ARCH_BITS == 32
2935 return ASMAtomicSubU32((uint32_t volatile RT_FAR *)pcb, cb);
2936#elif ARCH_BITS == 16
2937 AssertCompileSize(size_t, 2);
2938 return ASMAtomicSubU16((uint16_t volatile RT_FAR *)pcb, cb);
2939#else
2940# error "Unsupported ARCH_BITS value"
2941#endif
2942}
2943
2944
2945/**
2946 * Atomically exchanges and subtracts a value which size might differ between
2947 * platforms or compilers, ordered.
2948 *
2949 * @param pu Pointer to the variable to update.
2950 * @param uNew The value to subtract to *pu.
2951 * @param puOld Where to store the old value.
2952 *
2953 * @remarks x86: Requires a 486 or later.
2954 */
2955#define ASMAtomicSubSize(pu, uNew, puOld) \
2956 do { \
2957 switch (sizeof(*(pu))) { \
2958 case 4: *(uint32_t RT_FAR *)(puOld) = ASMAtomicSubU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2959 case 8: *(uint64_t RT_FAR *)(puOld) = ASMAtomicSubU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2960 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
2961 } \
2962 } while (0)
2963
2964
2965
2966/**
2967 * Atomically increment a 16-bit value, ordered.
2968 *
2969 * @returns The new value.
2970 * @param pu16 Pointer to the value to increment.
2971 * @remarks Not implemented. Just to make 16-bit code happy.
2972 *
2973 * @remarks x86: Requires a 486 or later.
2974 */
2975DECLASM(uint16_t) ASMAtomicIncU16(uint16_t volatile RT_FAR *pu16);
2976
2977
2978/**
2979 * Atomically increment a 32-bit value, ordered.
2980 *
2981 * @returns The new value.
2982 * @param pu32 Pointer to the value to increment.
2983 *
2984 * @remarks x86: Requires a 486 or later.
2985 */
2986#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2987DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32);
2988#else
2989DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32)
2990{
2991 uint32_t u32;
2992# if RT_INLINE_ASM_USES_INTRIN
2993 u32 = _InterlockedIncrement((long RT_FAR *)pu32);
2994 return u32;
2995
2996# elif RT_INLINE_ASM_GNU_STYLE
2997 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2998 : "=r" (u32),
2999 "=m" (*pu32)
3000 : "0" (1),
3001 "m" (*pu32)
3002 : "memory");
3003 return u32+1;
3004# else
3005 __asm
3006 {
3007 mov eax, 1
3008# ifdef RT_ARCH_AMD64
3009 mov rdx, [pu32]
3010 lock xadd [rdx], eax
3011# else
3012 mov edx, [pu32]
3013 lock xadd [edx], eax
3014# endif
3015 mov u32, eax
3016 }
3017 return u32+1;
3018# endif
3019}
3020#endif
3021
3022
3023/**
3024 * Atomically increment a signed 32-bit value, ordered.
3025 *
3026 * @returns The new value.
3027 * @param pi32 Pointer to the value to increment.
3028 *
3029 * @remarks x86: Requires a 486 or later.
3030 */
3031DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile RT_FAR *pi32)
3032{
3033 return (int32_t)ASMAtomicIncU32((uint32_t volatile RT_FAR *)pi32);
3034}
3035
3036
3037/**
3038 * Atomically increment a 64-bit value, ordered.
3039 *
3040 * @returns The new value.
3041 * @param pu64 Pointer to the value to increment.
3042 *
3043 * @remarks x86: Requires a Pentium or later.
3044 */
3045#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3046DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64);
3047#else
3048DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64)
3049{
3050# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3051 uint64_t u64;
3052 u64 = _InterlockedIncrement64((__int64 RT_FAR *)pu64);
3053 return u64;
3054
3055# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3056 uint64_t u64;
3057 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
3058 : "=r" (u64),
3059 "=m" (*pu64)
3060 : "0" (1),
3061 "m" (*pu64)
3062 : "memory");
3063 return u64 + 1;
3064# else
3065 return ASMAtomicAddU64(pu64, 1) + 1;
3066# endif
3067}
3068#endif
3069
3070
3071/**
3072 * Atomically increment a signed 64-bit value, ordered.
3073 *
3074 * @returns The new value.
3075 * @param pi64 Pointer to the value to increment.
3076 *
3077 * @remarks x86: Requires a Pentium or later.
3078 */
3079DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile RT_FAR *pi64)
3080{
3081 return (int64_t)ASMAtomicIncU64((uint64_t volatile RT_FAR *)pi64);
3082}
3083
3084
3085/**
3086 * Atomically increment a size_t value, ordered.
3087 *
3088 * @returns The new value.
3089 * @param pcb Pointer to the value to increment.
3090 *
3091 * @remarks x86: Requires a 486 or later.
3092 */
3093DECLINLINE(int64_t) ASMAtomicIncZ(size_t volatile RT_FAR *pcb)
3094{
3095#if ARCH_BITS == 64
3096 return ASMAtomicIncU64((uint64_t volatile RT_FAR *)pcb);
3097#elif ARCH_BITS == 32
3098 return ASMAtomicIncU32((uint32_t volatile RT_FAR *)pcb);
3099#elif ARCH_BITS == 16
3100 return ASMAtomicIncU16((uint16_t volatile RT_FAR *)pcb);
3101#else
3102# error "Unsupported ARCH_BITS value"
3103#endif
3104}
3105
3106
3107
3108/**
3109 * Atomically decrement an unsigned 32-bit value, ordered.
3110 *
3111 * @returns The new value.
3112 * @param pu16 Pointer to the value to decrement.
3113 * @remarks Not implemented. Just to make 16-bit code happy.
3114 *
3115 * @remarks x86: Requires a 486 or later.
3116 */
3117DECLASM(uint32_t) ASMAtomicDecU16(uint16_t volatile RT_FAR *pu16);
3118
3119
3120/**
3121 * Atomically decrement an unsigned 32-bit value, ordered.
3122 *
3123 * @returns The new value.
3124 * @param pu32 Pointer to the value to decrement.
3125 *
3126 * @remarks x86: Requires a 486 or later.
3127 */
3128#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3129DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32);
3130#else
3131DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32)
3132{
3133 uint32_t u32;
3134# if RT_INLINE_ASM_USES_INTRIN
3135 u32 = _InterlockedDecrement((long RT_FAR *)pu32);
3136 return u32;
3137
3138# elif RT_INLINE_ASM_GNU_STYLE
3139 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3140 : "=r" (u32),
3141 "=m" (*pu32)
3142 : "0" (-1),
3143 "m" (*pu32)
3144 : "memory");
3145 return u32-1;
3146# else
3147 __asm
3148 {
3149 mov eax, -1
3150# ifdef RT_ARCH_AMD64
3151 mov rdx, [pu32]
3152 lock xadd [rdx], eax
3153# else
3154 mov edx, [pu32]
3155 lock xadd [edx], eax
3156# endif
3157 mov u32, eax
3158 }
3159 return u32-1;
3160# endif
3161}
3162#endif
3163
3164
3165/**
3166 * Atomically decrement a signed 32-bit value, ordered.
3167 *
3168 * @returns The new value.
3169 * @param pi32 Pointer to the value to decrement.
3170 *
3171 * @remarks x86: Requires a 486 or later.
3172 */
3173DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile RT_FAR *pi32)
3174{
3175 return (int32_t)ASMAtomicDecU32((uint32_t volatile RT_FAR *)pi32);
3176}
3177
3178
3179/**
3180 * Atomically decrement an unsigned 64-bit value, ordered.
3181 *
3182 * @returns The new value.
3183 * @param pu64 Pointer to the value to decrement.
3184 *
3185 * @remarks x86: Requires a Pentium or later.
3186 */
3187#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3188DECLASM(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64);
3189#else
3190DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64)
3191{
3192# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3193 uint64_t u64 = _InterlockedDecrement64((__int64 volatile RT_FAR *)pu64);
3194 return u64;
3195
3196# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3197 uint64_t u64;
3198 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
3199 : "=r" (u64),
3200 "=m" (*pu64)
3201 : "0" (~(uint64_t)0),
3202 "m" (*pu64)
3203 : "memory");
3204 return u64-1;
3205# else
3206 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
3207# endif
3208}
3209#endif
3210
3211
3212/**
3213 * Atomically decrement a signed 64-bit value, ordered.
3214 *
3215 * @returns The new value.
3216 * @param pi64 Pointer to the value to decrement.
3217 *
3218 * @remarks x86: Requires a Pentium or later.
3219 */
3220DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile RT_FAR *pi64)
3221{
3222 return (int64_t)ASMAtomicDecU64((uint64_t volatile RT_FAR *)pi64);
3223}
3224
3225
3226/**
3227 * Atomically decrement a size_t value, ordered.
3228 *
3229 * @returns The new value.
3230 * @param pcb Pointer to the value to decrement.
3231 *
3232 * @remarks x86: Requires a 486 or later.
3233 */
3234DECLINLINE(int64_t) ASMAtomicDecZ(size_t volatile RT_FAR *pcb)
3235{
3236#if ARCH_BITS == 64
3237 return ASMAtomicDecU64((uint64_t volatile RT_FAR *)pcb);
3238#elif ARCH_BITS == 32
3239 return ASMAtomicDecU32((uint32_t volatile RT_FAR *)pcb);
3240#elif ARCH_BITS == 16
3241 return ASMAtomicDecU16((uint16_t volatile RT_FAR *)pcb);
3242#else
3243# error "Unsupported ARCH_BITS value"
3244#endif
3245}
3246
3247
3248/**
3249 * Atomically Or an unsigned 32-bit value, ordered.
3250 *
3251 * @param pu32 Pointer to the pointer variable to OR u32 with.
3252 * @param u32 The value to OR *pu32 with.
3253 *
3254 * @remarks x86: Requires a 386 or later.
3255 */
3256#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3257DECLASM(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3258#else
3259DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3260{
3261# if RT_INLINE_ASM_USES_INTRIN
3262 _InterlockedOr((long volatile RT_FAR *)pu32, (long)u32);
3263
3264# elif RT_INLINE_ASM_GNU_STYLE
3265 __asm__ __volatile__("lock; orl %1, %0\n\t"
3266 : "=m" (*pu32)
3267 : "ir" (u32),
3268 "m" (*pu32));
3269# else
3270 __asm
3271 {
3272 mov eax, [u32]
3273# ifdef RT_ARCH_AMD64
3274 mov rdx, [pu32]
3275 lock or [rdx], eax
3276# else
3277 mov edx, [pu32]
3278 lock or [edx], eax
3279# endif
3280 }
3281# endif
3282}
3283#endif
3284
3285
3286/**
3287 * Atomically Or a signed 32-bit value, ordered.
3288 *
3289 * @param pi32 Pointer to the pointer variable to OR u32 with.
3290 * @param i32 The value to OR *pu32 with.
3291 *
3292 * @remarks x86: Requires a 386 or later.
3293 */
3294DECLINLINE(void) ASMAtomicOrS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3295{
3296 ASMAtomicOrU32((uint32_t volatile RT_FAR *)pi32, i32);
3297}
3298
3299
3300/**
3301 * Atomically Or an unsigned 64-bit value, ordered.
3302 *
3303 * @param pu64 Pointer to the pointer variable to OR u64 with.
3304 * @param u64 The value to OR *pu64 with.
3305 *
3306 * @remarks x86: Requires a Pentium or later.
3307 */
3308#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3309DECLASM(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3310#else
3311DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3312{
3313# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3314 _InterlockedOr64((__int64 volatile RT_FAR *)pu64, (__int64)u64);
3315
3316# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3317 __asm__ __volatile__("lock; orq %1, %q0\n\t"
3318 : "=m" (*pu64)
3319 : "r" (u64),
3320 "m" (*pu64));
3321# else
3322 for (;;)
3323 {
3324 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3325 uint64_t u64New = u64Old | u64;
3326 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3327 break;
3328 ASMNopPause();
3329 }
3330# endif
3331}
3332#endif
3333
3334
3335/**
3336 * Atomically Or a signed 64-bit value, ordered.
3337 *
3338 * @param pi64 Pointer to the pointer variable to OR u64 with.
3339 * @param i64 The value to OR *pu64 with.
3340 *
3341 * @remarks x86: Requires a Pentium or later.
3342 */
3343DECLINLINE(void) ASMAtomicOrS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3344{
3345 ASMAtomicOrU64((uint64_t volatile RT_FAR *)pi64, i64);
3346}
3347
3348
3349/**
3350 * Atomically And an unsigned 32-bit value, ordered.
3351 *
3352 * @param pu32 Pointer to the pointer variable to AND u32 with.
3353 * @param u32 The value to AND *pu32 with.
3354 *
3355 * @remarks x86: Requires a 386 or later.
3356 */
3357#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3358DECLASM(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3359#else
3360DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3361{
3362# if RT_INLINE_ASM_USES_INTRIN
3363 _InterlockedAnd((long volatile RT_FAR *)pu32, u32);
3364
3365# elif RT_INLINE_ASM_GNU_STYLE
3366 __asm__ __volatile__("lock; andl %1, %0\n\t"
3367 : "=m" (*pu32)
3368 : "ir" (u32),
3369 "m" (*pu32));
3370# else
3371 __asm
3372 {
3373 mov eax, [u32]
3374# ifdef RT_ARCH_AMD64
3375 mov rdx, [pu32]
3376 lock and [rdx], eax
3377# else
3378 mov edx, [pu32]
3379 lock and [edx], eax
3380# endif
3381 }
3382# endif
3383}
3384#endif
3385
3386
3387/**
3388 * Atomically And a signed 32-bit value, ordered.
3389 *
3390 * @param pi32 Pointer to the pointer variable to AND i32 with.
3391 * @param i32 The value to AND *pi32 with.
3392 *
3393 * @remarks x86: Requires a 386 or later.
3394 */
3395DECLINLINE(void) ASMAtomicAndS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3396{
3397 ASMAtomicAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3398}
3399
3400
3401/**
3402 * Atomically And an unsigned 64-bit value, ordered.
3403 *
3404 * @param pu64 Pointer to the pointer variable to AND u64 with.
3405 * @param u64 The value to AND *pu64 with.
3406 *
3407 * @remarks x86: Requires a Pentium or later.
3408 */
3409#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3410DECLASM(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3411#else
3412DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3413{
3414# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3415 _InterlockedAnd64((__int64 volatile RT_FAR *)pu64, u64);
3416
3417# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3418 __asm__ __volatile__("lock; andq %1, %0\n\t"
3419 : "=m" (*pu64)
3420 : "r" (u64),
3421 "m" (*pu64));
3422# else
3423 for (;;)
3424 {
3425 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3426 uint64_t u64New = u64Old & u64;
3427 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3428 break;
3429 ASMNopPause();
3430 }
3431# endif
3432}
3433#endif
3434
3435
3436/**
3437 * Atomically And a signed 64-bit value, ordered.
3438 *
3439 * @param pi64 Pointer to the pointer variable to AND i64 with.
3440 * @param i64 The value to AND *pi64 with.
3441 *
3442 * @remarks x86: Requires a Pentium or later.
3443 */
3444DECLINLINE(void) ASMAtomicAndS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3445{
3446 ASMAtomicAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3447}
3448
3449
3450/**
3451 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
3452 *
3453 * @param pu32 Pointer to the pointer variable to OR u32 with.
3454 * @param u32 The value to OR *pu32 with.
3455 *
3456 * @remarks x86: Requires a 386 or later.
3457 */
3458#if RT_INLINE_ASM_EXTERNAL
3459DECLASM(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3460#else
3461DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3462{
3463# if RT_INLINE_ASM_GNU_STYLE
3464 __asm__ __volatile__("orl %1, %0\n\t"
3465 : "=m" (*pu32)
3466 : "ir" (u32),
3467 "m" (*pu32));
3468# else
3469 __asm
3470 {
3471 mov eax, [u32]
3472# ifdef RT_ARCH_AMD64
3473 mov rdx, [pu32]
3474 or [rdx], eax
3475# else
3476 mov edx, [pu32]
3477 or [edx], eax
3478# endif
3479 }
3480# endif
3481}
3482#endif
3483
3484
3485/**
3486 * Atomically OR a signed 32-bit value, unordered.
3487 *
3488 * @param pi32 Pointer to the pointer variable to OR u32 with.
3489 * @param i32 The value to OR *pu32 with.
3490 *
3491 * @remarks x86: Requires a 386 or later.
3492 */
3493DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3494{
3495 ASMAtomicUoOrU32((uint32_t volatile RT_FAR *)pi32, i32);
3496}
3497
3498
3499/**
3500 * Atomically OR an unsigned 64-bit value, unordered.
3501 *
3502 * @param pu64 Pointer to the pointer variable to OR u64 with.
3503 * @param u64 The value to OR *pu64 with.
3504 *
3505 * @remarks x86: Requires a Pentium or later.
3506 */
3507#if RT_INLINE_ASM_EXTERNAL
3508DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3509#else
3510DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3511{
3512# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3513 __asm__ __volatile__("orq %1, %q0\n\t"
3514 : "=m" (*pu64)
3515 : "r" (u64),
3516 "m" (*pu64));
3517# else
3518 for (;;)
3519 {
3520 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3521 uint64_t u64New = u64Old | u64;
3522 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3523 break;
3524 ASMNopPause();
3525 }
3526# endif
3527}
3528#endif
3529
3530
3531/**
3532 * Atomically Or a signed 64-bit value, unordered.
3533 *
3534 * @param pi64 Pointer to the pointer variable to OR u64 with.
3535 * @param i64 The value to OR *pu64 with.
3536 *
3537 * @remarks x86: Requires a Pentium or later.
3538 */
3539DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3540{
3541 ASMAtomicUoOrU64((uint64_t volatile RT_FAR *)pi64, i64);
3542}
3543
3544
3545/**
3546 * Atomically And an unsigned 32-bit value, unordered.
3547 *
3548 * @param pu32 Pointer to the pointer variable to AND u32 with.
3549 * @param u32 The value to AND *pu32 with.
3550 *
3551 * @remarks x86: Requires a 386 or later.
3552 */
3553#if RT_INLINE_ASM_EXTERNAL
3554DECLASM(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3555#else
3556DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3557{
3558# if RT_INLINE_ASM_GNU_STYLE
3559 __asm__ __volatile__("andl %1, %0\n\t"
3560 : "=m" (*pu32)
3561 : "ir" (u32),
3562 "m" (*pu32));
3563# else
3564 __asm
3565 {
3566 mov eax, [u32]
3567# ifdef RT_ARCH_AMD64
3568 mov rdx, [pu32]
3569 and [rdx], eax
3570# else
3571 mov edx, [pu32]
3572 and [edx], eax
3573# endif
3574 }
3575# endif
3576}
3577#endif
3578
3579
3580/**
3581 * Atomically And a signed 32-bit value, unordered.
3582 *
3583 * @param pi32 Pointer to the pointer variable to AND i32 with.
3584 * @param i32 The value to AND *pi32 with.
3585 *
3586 * @remarks x86: Requires a 386 or later.
3587 */
3588DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3589{
3590 ASMAtomicUoAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3591}
3592
3593
3594/**
3595 * Atomically And an unsigned 64-bit value, unordered.
3596 *
3597 * @param pu64 Pointer to the pointer variable to AND u64 with.
3598 * @param u64 The value to AND *pu64 with.
3599 *
3600 * @remarks x86: Requires a Pentium or later.
3601 */
3602#if RT_INLINE_ASM_EXTERNAL
3603DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3604#else
3605DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3606{
3607# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3608 __asm__ __volatile__("andq %1, %0\n\t"
3609 : "=m" (*pu64)
3610 : "r" (u64),
3611 "m" (*pu64));
3612# else
3613 for (;;)
3614 {
3615 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3616 uint64_t u64New = u64Old & u64;
3617 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3618 break;
3619 ASMNopPause();
3620 }
3621# endif
3622}
3623#endif
3624
3625
3626/**
3627 * Atomically And a signed 64-bit value, unordered.
3628 *
3629 * @param pi64 Pointer to the pointer variable to AND i64 with.
3630 * @param i64 The value to AND *pi64 with.
3631 *
3632 * @remarks x86: Requires a Pentium or later.
3633 */
3634DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3635{
3636 ASMAtomicUoAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3637}
3638
3639
3640/**
3641 * Atomically increment an unsigned 32-bit value, unordered.
3642 *
3643 * @returns the new value.
3644 * @param pu32 Pointer to the variable to increment.
3645 *
3646 * @remarks x86: Requires a 486 or later.
3647 */
3648#if RT_INLINE_ASM_EXTERNAL
3649DECLASM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32);
3650#else
3651DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32)
3652{
3653 uint32_t u32;
3654# if RT_INLINE_ASM_GNU_STYLE
3655 __asm__ __volatile__("xaddl %0, %1\n\t"
3656 : "=r" (u32),
3657 "=m" (*pu32)
3658 : "0" (1),
3659 "m" (*pu32)
3660 : "memory");
3661 return u32 + 1;
3662# else
3663 __asm
3664 {
3665 mov eax, 1
3666# ifdef RT_ARCH_AMD64
3667 mov rdx, [pu32]
3668 xadd [rdx], eax
3669# else
3670 mov edx, [pu32]
3671 xadd [edx], eax
3672# endif
3673 mov u32, eax
3674 }
3675 return u32 + 1;
3676# endif
3677}
3678#endif
3679
3680
3681/**
3682 * Atomically decrement an unsigned 32-bit value, unordered.
3683 *
3684 * @returns the new value.
3685 * @param pu32 Pointer to the variable to decrement.
3686 *
3687 * @remarks x86: Requires a 486 or later.
3688 */
3689#if RT_INLINE_ASM_EXTERNAL
3690DECLASM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32);
3691#else
3692DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32)
3693{
3694 uint32_t u32;
3695# if RT_INLINE_ASM_GNU_STYLE
3696 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3697 : "=r" (u32),
3698 "=m" (*pu32)
3699 : "0" (-1),
3700 "m" (*pu32)
3701 : "memory");
3702 return u32 - 1;
3703# else
3704 __asm
3705 {
3706 mov eax, -1
3707# ifdef RT_ARCH_AMD64
3708 mov rdx, [pu32]
3709 xadd [rdx], eax
3710# else
3711 mov edx, [pu32]
3712 xadd [edx], eax
3713# endif
3714 mov u32, eax
3715 }
3716 return u32 - 1;
3717# endif
3718}
3719#endif
3720
3721
3722/** @def RT_ASM_PAGE_SIZE
3723 * We try avoid dragging in iprt/param.h here.
3724 * @internal
3725 */
3726#if defined(RT_ARCH_SPARC64)
3727# define RT_ASM_PAGE_SIZE 0x2000
3728# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3729# if PAGE_SIZE != 0x2000
3730# error "PAGE_SIZE is not 0x2000!"
3731# endif
3732# endif
3733#else
3734# define RT_ASM_PAGE_SIZE 0x1000
3735# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3736# if PAGE_SIZE != 0x1000
3737# error "PAGE_SIZE is not 0x1000!"
3738# endif
3739# endif
3740#endif
3741
3742/**
3743 * Zeros a 4K memory page.
3744 *
3745 * @param pv Pointer to the memory block. This must be page aligned.
3746 */
3747#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3748DECLASM(void) ASMMemZeroPage(volatile void RT_FAR *pv);
3749# else
3750DECLINLINE(void) ASMMemZeroPage(volatile void RT_FAR *pv)
3751{
3752# if RT_INLINE_ASM_USES_INTRIN
3753# ifdef RT_ARCH_AMD64
3754 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
3755# else
3756 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
3757# endif
3758
3759# elif RT_INLINE_ASM_GNU_STYLE
3760 RTCCUINTREG uDummy;
3761# ifdef RT_ARCH_AMD64
3762 __asm__ __volatile__("rep stosq"
3763 : "=D" (pv),
3764 "=c" (uDummy)
3765 : "0" (pv),
3766 "c" (RT_ASM_PAGE_SIZE >> 3),
3767 "a" (0)
3768 : "memory");
3769# else
3770 __asm__ __volatile__("rep stosl"
3771 : "=D" (pv),
3772 "=c" (uDummy)
3773 : "0" (pv),
3774 "c" (RT_ASM_PAGE_SIZE >> 2),
3775 "a" (0)
3776 : "memory");
3777# endif
3778# else
3779 __asm
3780 {
3781# ifdef RT_ARCH_AMD64
3782 xor rax, rax
3783 mov ecx, 0200h
3784 mov rdi, [pv]
3785 rep stosq
3786# else
3787 xor eax, eax
3788 mov ecx, 0400h
3789 mov edi, [pv]
3790 rep stosd
3791# endif
3792 }
3793# endif
3794}
3795# endif
3796
3797
3798/**
3799 * Zeros a memory block with a 32-bit aligned size.
3800 *
3801 * @param pv Pointer to the memory block.
3802 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3803 */
3804#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3805DECLASM(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb);
3806#else
3807DECLINLINE(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb)
3808{
3809# if RT_INLINE_ASM_USES_INTRIN
3810# ifdef RT_ARCH_AMD64
3811 if (!(cb & 7))
3812 __stosq((unsigned __int64 RT_FAR *)pv, 0, cb / 8);
3813 else
3814# endif
3815 __stosd((unsigned long RT_FAR *)pv, 0, cb / 4);
3816
3817# elif RT_INLINE_ASM_GNU_STYLE
3818 __asm__ __volatile__("rep stosl"
3819 : "=D" (pv),
3820 "=c" (cb)
3821 : "0" (pv),
3822 "1" (cb >> 2),
3823 "a" (0)
3824 : "memory");
3825# else
3826 __asm
3827 {
3828 xor eax, eax
3829# ifdef RT_ARCH_AMD64
3830 mov rcx, [cb]
3831 shr rcx, 2
3832 mov rdi, [pv]
3833# else
3834 mov ecx, [cb]
3835 shr ecx, 2
3836 mov edi, [pv]
3837# endif
3838 rep stosd
3839 }
3840# endif
3841}
3842#endif
3843
3844
3845/**
3846 * Fills a memory block with a 32-bit aligned size.
3847 *
3848 * @param pv Pointer to the memory block.
3849 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3850 * @param u32 The value to fill with.
3851 */
3852#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3853DECLASM(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32);
3854#else
3855DECLINLINE(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32)
3856{
3857# if RT_INLINE_ASM_USES_INTRIN
3858# ifdef RT_ARCH_AMD64
3859 if (!(cb & 7))
3860 __stosq((unsigned __int64 RT_FAR *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3861 else
3862# endif
3863 __stosd((unsigned long RT_FAR *)pv, u32, cb / 4);
3864
3865# elif RT_INLINE_ASM_GNU_STYLE
3866 __asm__ __volatile__("rep stosl"
3867 : "=D" (pv),
3868 "=c" (cb)
3869 : "0" (pv),
3870 "1" (cb >> 2),
3871 "a" (u32)
3872 : "memory");
3873# else
3874 __asm
3875 {
3876# ifdef RT_ARCH_AMD64
3877 mov rcx, [cb]
3878 shr rcx, 2
3879 mov rdi, [pv]
3880# else
3881 mov ecx, [cb]
3882 shr ecx, 2
3883 mov edi, [pv]
3884# endif
3885 mov eax, [u32]
3886 rep stosd
3887 }
3888# endif
3889}
3890#endif
3891
3892
3893/**
3894 * Checks if a memory block is all zeros.
3895 *
3896 * @returns Pointer to the first non-zero byte.
3897 * @returns NULL if all zero.
3898 *
3899 * @param pv Pointer to the memory block.
3900 * @param cb Number of bytes in the block.
3901 *
3902 * @todo Fix name, it is a predicate function but it's not returning boolean!
3903 */
3904#if !defined(RDESKTOP) && (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
3905 && !defined(RT_ARCH_SPARC64) \
3906 && !defined(RT_ARCH_SPARC)
3907DECLASM(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb);
3908#else
3909DECLINLINE(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb)
3910{
3911 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
3912 for (; cb; cb--, pb++)
3913 if (RT_LIKELY(*pb == 0))
3914 { /* likely */ }
3915 else
3916 return (void RT_FAR *)pb;
3917 return NULL;
3918}
3919#endif
3920
3921
3922/**
3923 * Checks if a memory block is all zeros.
3924 *
3925 * @returns true if zero, false if not.
3926 *
3927 * @param pv Pointer to the memory block.
3928 * @param cb Number of bytes in the block.
3929 *
3930 * @sa ASMMemFirstNonZero
3931 */
3932DECLINLINE(bool) ASMMemIsZero(void const RT_FAR *pv, size_t cb)
3933{
3934 return ASMMemFirstNonZero(pv, cb) == NULL;
3935}
3936
3937
3938/**
3939 * Checks if a memory page is all zeros.
3940 *
3941 * @returns true / false.
3942 *
3943 * @param pvPage Pointer to the page. Must be aligned on 16 byte
3944 * boundary
3945 */
3946DECLINLINE(bool) ASMMemIsZeroPage(void const RT_FAR *pvPage)
3947{
3948# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
3949 union { RTCCUINTREG r; bool f; } uAX;
3950 RTCCUINTREG xCX, xDI;
3951 Assert(!((uintptr_t)pvPage & 15));
3952 __asm__ __volatile__("repe; "
3953# ifdef RT_ARCH_AMD64
3954 "scasq\n\t"
3955# else
3956 "scasl\n\t"
3957# endif
3958 "setnc %%al\n\t"
3959 : "=&c" (xCX),
3960 "=&D" (xDI),
3961 "=&a" (uAX.r)
3962 : "mr" (pvPage),
3963# ifdef RT_ARCH_AMD64
3964 "0" (RT_ASM_PAGE_SIZE/8),
3965# else
3966 "0" (RT_ASM_PAGE_SIZE/4),
3967# endif
3968 "1" (pvPage),
3969 "2" (0));
3970 return uAX.f;
3971# else
3972 uintptr_t const RT_FAR *puPtr = (uintptr_t const RT_FAR *)pvPage;
3973 size_t cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
3974 Assert(!((uintptr_t)pvPage & 15));
3975 for (;;)
3976 {
3977 if (puPtr[0]) return false;
3978 if (puPtr[4]) return false;
3979
3980 if (puPtr[2]) return false;
3981 if (puPtr[6]) return false;
3982
3983 if (puPtr[1]) return false;
3984 if (puPtr[5]) return false;
3985
3986 if (puPtr[3]) return false;
3987 if (puPtr[7]) return false;
3988
3989 if (!--cLeft)
3990 return true;
3991 puPtr += 8;
3992 }
3993# endif
3994}
3995
3996
3997/**
3998 * Checks if a memory block is filled with the specified byte, returning the
3999 * first mismatch.
4000 *
4001 * This is sort of an inverted memchr.
4002 *
4003 * @returns Pointer to the byte which doesn't equal u8.
4004 * @returns NULL if all equal to u8.
4005 *
4006 * @param pv Pointer to the memory block.
4007 * @param cb Number of bytes in the block.
4008 * @param u8 The value it's supposed to be filled with.
4009 *
4010 * @remarks No alignment requirements.
4011 */
4012#if (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
4013 && (!defined(RT_OS_FREEBSD) || !defined(_KERNEL)) \
4014 && !defined(RT_ARCH_SPARC64) \
4015 && !defined(RT_ARCH_SPARC)
4016DECLASM(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8);
4017#else
4018DECLINLINE(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8)
4019{
4020 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
4021 for (; cb; cb--, pb++)
4022 if (RT_LIKELY(*pb == u8))
4023 { /* likely */ }
4024 else
4025 return (void *)pb;
4026 return NULL;
4027}
4028#endif
4029
4030
4031/**
4032 * Checks if a memory block is filled with the specified byte.
4033 *
4034 * @returns true if all matching, false if not.
4035 *
4036 * @param pv Pointer to the memory block.
4037 * @param cb Number of bytes in the block.
4038 * @param u8 The value it's supposed to be filled with.
4039 *
4040 * @remarks No alignment requirements.
4041 */
4042DECLINLINE(bool) ASMMemIsAllU8(void const RT_FAR *pv, size_t cb, uint8_t u8)
4043{
4044 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
4045}
4046
4047
4048/**
4049 * Checks if a memory block is filled with the specified 32-bit value.
4050 *
4051 * This is a sort of inverted memchr.
4052 *
4053 * @returns Pointer to the first value which doesn't equal u32.
4054 * @returns NULL if all equal to u32.
4055 *
4056 * @param pv Pointer to the memory block.
4057 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4058 * @param u32 The value it's supposed to be filled with.
4059 */
4060DECLINLINE(uint32_t RT_FAR *) ASMMemFirstMismatchingU32(void const RT_FAR *pv, size_t cb, uint32_t u32)
4061{
4062/** @todo rewrite this in inline assembly? */
4063 uint32_t const RT_FAR *pu32 = (uint32_t const RT_FAR *)pv;
4064 for (; cb; cb -= 4, pu32++)
4065 if (RT_LIKELY(*pu32 == u32))
4066 { /* likely */ }
4067 else
4068 return (uint32_t RT_FAR *)pu32;
4069 return NULL;
4070}
4071
4072
4073/**
4074 * Probes a byte pointer for read access.
4075 *
4076 * While the function will not fault if the byte is not read accessible,
4077 * the idea is to do this in a safe place like before acquiring locks
4078 * and such like.
4079 *
4080 * Also, this functions guarantees that an eager compiler is not going
4081 * to optimize the probing away.
4082 *
4083 * @param pvByte Pointer to the byte.
4084 */
4085#if RT_INLINE_ASM_EXTERNAL
4086DECLASM(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte);
4087#else
4088DECLINLINE(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte)
4089{
4090 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4091 uint8_t u8;
4092# if RT_INLINE_ASM_GNU_STYLE
4093 __asm__ __volatile__("movb (%1), %0\n\t"
4094 : "=r" (u8)
4095 : "r" (pvByte));
4096# else
4097 __asm
4098 {
4099# ifdef RT_ARCH_AMD64
4100 mov rax, [pvByte]
4101 mov al, [rax]
4102# else
4103 mov eax, [pvByte]
4104 mov al, [eax]
4105# endif
4106 mov [u8], al
4107 }
4108# endif
4109 return u8;
4110}
4111#endif
4112
4113/**
4114 * Probes a buffer for read access page by page.
4115 *
4116 * While the function will fault if the buffer is not fully read
4117 * accessible, the idea is to do this in a safe place like before
4118 * acquiring locks and such like.
4119 *
4120 * Also, this functions guarantees that an eager compiler is not going
4121 * to optimize the probing away.
4122 *
4123 * @param pvBuf Pointer to the buffer.
4124 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4125 */
4126DECLINLINE(void) ASMProbeReadBuffer(const void RT_FAR *pvBuf, size_t cbBuf)
4127{
4128 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4129 /* the first byte */
4130 const uint8_t RT_FAR *pu8 = (const uint8_t RT_FAR *)pvBuf;
4131 ASMProbeReadByte(pu8);
4132
4133 /* the pages in between pages. */
4134 while (cbBuf > RT_ASM_PAGE_SIZE)
4135 {
4136 ASMProbeReadByte(pu8);
4137 cbBuf -= RT_ASM_PAGE_SIZE;
4138 pu8 += RT_ASM_PAGE_SIZE;
4139 }
4140
4141 /* the last byte */
4142 ASMProbeReadByte(pu8 + cbBuf - 1);
4143}
4144
4145
4146
4147/** @defgroup grp_inline_bits Bit Operations
4148 * @{
4149 */
4150
4151
4152/**
4153 * Sets a bit in a bitmap.
4154 *
4155 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
4156 * @param iBit The bit to set.
4157 *
4158 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4159 * However, doing so will yield better performance as well as avoiding
4160 * traps accessing the last bits in the bitmap.
4161 */
4162#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4163DECLASM(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4164#else
4165DECLINLINE(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4166{
4167# if RT_INLINE_ASM_USES_INTRIN
4168 _bittestandset((long RT_FAR *)pvBitmap, iBit);
4169
4170# elif RT_INLINE_ASM_GNU_STYLE
4171 __asm__ __volatile__("btsl %1, %0"
4172 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4173 : "Ir" (iBit),
4174 "m" (*(volatile long RT_FAR *)pvBitmap)
4175 : "memory");
4176# else
4177 __asm
4178 {
4179# ifdef RT_ARCH_AMD64
4180 mov rax, [pvBitmap]
4181 mov edx, [iBit]
4182 bts [rax], edx
4183# else
4184 mov eax, [pvBitmap]
4185 mov edx, [iBit]
4186 bts [eax], edx
4187# endif
4188 }
4189# endif
4190}
4191#endif
4192
4193
4194/**
4195 * Atomically sets a bit in a bitmap, ordered.
4196 *
4197 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4198 * the memory access isn't atomic!
4199 * @param iBit The bit to set.
4200 *
4201 * @remarks x86: Requires a 386 or later.
4202 */
4203#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4204DECLASM(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4205#else
4206DECLINLINE(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4207{
4208 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4209# if RT_INLINE_ASM_USES_INTRIN
4210 _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
4211# elif RT_INLINE_ASM_GNU_STYLE
4212 __asm__ __volatile__("lock; btsl %1, %0"
4213 : "=m" (*(volatile long *)pvBitmap)
4214 : "Ir" (iBit),
4215 "m" (*(volatile long *)pvBitmap)
4216 : "memory");
4217# else
4218 __asm
4219 {
4220# ifdef RT_ARCH_AMD64
4221 mov rax, [pvBitmap]
4222 mov edx, [iBit]
4223 lock bts [rax], edx
4224# else
4225 mov eax, [pvBitmap]
4226 mov edx, [iBit]
4227 lock bts [eax], edx
4228# endif
4229 }
4230# endif
4231}
4232#endif
4233
4234
4235/**
4236 * Clears a bit in a bitmap.
4237 *
4238 * @param pvBitmap Pointer to the bitmap.
4239 * @param iBit The bit to clear.
4240 *
4241 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4242 * However, doing so will yield better performance as well as avoiding
4243 * traps accessing the last bits in the bitmap.
4244 */
4245#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4246DECLASM(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4247#else
4248DECLINLINE(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4249{
4250# if RT_INLINE_ASM_USES_INTRIN
4251 _bittestandreset((long RT_FAR *)pvBitmap, iBit);
4252
4253# elif RT_INLINE_ASM_GNU_STYLE
4254 __asm__ __volatile__("btrl %1, %0"
4255 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4256 : "Ir" (iBit),
4257 "m" (*(volatile long RT_FAR *)pvBitmap)
4258 : "memory");
4259# else
4260 __asm
4261 {
4262# ifdef RT_ARCH_AMD64
4263 mov rax, [pvBitmap]
4264 mov edx, [iBit]
4265 btr [rax], edx
4266# else
4267 mov eax, [pvBitmap]
4268 mov edx, [iBit]
4269 btr [eax], edx
4270# endif
4271 }
4272# endif
4273}
4274#endif
4275
4276
4277/**
4278 * Atomically clears a bit in a bitmap, ordered.
4279 *
4280 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4281 * the memory access isn't atomic!
4282 * @param iBit The bit to toggle set.
4283 *
4284 * @remarks No memory barrier, take care on smp.
4285 * @remarks x86: Requires a 386 or later.
4286 */
4287#if RT_INLINE_ASM_EXTERNAL
4288DECLASM(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4289#else
4290DECLINLINE(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4291{
4292 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4293# if RT_INLINE_ASM_GNU_STYLE
4294 __asm__ __volatile__("lock; btrl %1, %0"
4295 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4296 : "Ir" (iBit),
4297 "m" (*(volatile long RT_FAR *)pvBitmap)
4298 : "memory");
4299# else
4300 __asm
4301 {
4302# ifdef RT_ARCH_AMD64
4303 mov rax, [pvBitmap]
4304 mov edx, [iBit]
4305 lock btr [rax], edx
4306# else
4307 mov eax, [pvBitmap]
4308 mov edx, [iBit]
4309 lock btr [eax], edx
4310# endif
4311 }
4312# endif
4313}
4314#endif
4315
4316
4317/**
4318 * Toggles a bit in a bitmap.
4319 *
4320 * @param pvBitmap Pointer to the bitmap.
4321 * @param iBit The bit to toggle.
4322 *
4323 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4324 * However, doing so will yield better performance as well as avoiding
4325 * traps accessing the last bits in the bitmap.
4326 */
4327#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4328DECLASM(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4329#else
4330DECLINLINE(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4331{
4332# if RT_INLINE_ASM_USES_INTRIN
4333 _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
4334# elif RT_INLINE_ASM_GNU_STYLE
4335 __asm__ __volatile__("btcl %1, %0"
4336 : "=m" (*(volatile long *)pvBitmap)
4337 : "Ir" (iBit),
4338 "m" (*(volatile long *)pvBitmap)
4339 : "memory");
4340# else
4341 __asm
4342 {
4343# ifdef RT_ARCH_AMD64
4344 mov rax, [pvBitmap]
4345 mov edx, [iBit]
4346 btc [rax], edx
4347# else
4348 mov eax, [pvBitmap]
4349 mov edx, [iBit]
4350 btc [eax], edx
4351# endif
4352 }
4353# endif
4354}
4355#endif
4356
4357
4358/**
4359 * Atomically toggles a bit in a bitmap, ordered.
4360 *
4361 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4362 * the memory access isn't atomic!
4363 * @param iBit The bit to test and set.
4364 *
4365 * @remarks x86: Requires a 386 or later.
4366 */
4367#if RT_INLINE_ASM_EXTERNAL
4368DECLASM(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4369#else
4370DECLINLINE(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4371{
4372 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4373# if RT_INLINE_ASM_GNU_STYLE
4374 __asm__ __volatile__("lock; btcl %1, %0"
4375 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4376 : "Ir" (iBit),
4377 "m" (*(volatile long RT_FAR *)pvBitmap)
4378 : "memory");
4379# else
4380 __asm
4381 {
4382# ifdef RT_ARCH_AMD64
4383 mov rax, [pvBitmap]
4384 mov edx, [iBit]
4385 lock btc [rax], edx
4386# else
4387 mov eax, [pvBitmap]
4388 mov edx, [iBit]
4389 lock btc [eax], edx
4390# endif
4391 }
4392# endif
4393}
4394#endif
4395
4396
4397/**
4398 * Tests and sets a bit in a bitmap.
4399 *
4400 * @returns true if the bit was set.
4401 * @returns false if the bit was clear.
4402 *
4403 * @param pvBitmap Pointer to the bitmap.
4404 * @param iBit The bit to test and set.
4405 *
4406 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4407 * However, doing so will yield better performance as well as avoiding
4408 * traps accessing the last bits in the bitmap.
4409 */
4410#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4411DECLASM(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4412#else
4413DECLINLINE(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4414{
4415 union { bool f; uint32_t u32; uint8_t u8; } rc;
4416# if RT_INLINE_ASM_USES_INTRIN
4417 rc.u8 = _bittestandset((long RT_FAR *)pvBitmap, iBit);
4418
4419# elif RT_INLINE_ASM_GNU_STYLE
4420 __asm__ __volatile__("btsl %2, %1\n\t"
4421 "setc %b0\n\t"
4422 "andl $1, %0\n\t"
4423 : "=q" (rc.u32),
4424 "=m" (*(volatile long RT_FAR *)pvBitmap)
4425 : "Ir" (iBit),
4426 "m" (*(volatile long RT_FAR *)pvBitmap)
4427 : "memory");
4428# else
4429 __asm
4430 {
4431 mov edx, [iBit]
4432# ifdef RT_ARCH_AMD64
4433 mov rax, [pvBitmap]
4434 bts [rax], edx
4435# else
4436 mov eax, [pvBitmap]
4437 bts [eax], edx
4438# endif
4439 setc al
4440 and eax, 1
4441 mov [rc.u32], eax
4442 }
4443# endif
4444 return rc.f;
4445}
4446#endif
4447
4448
4449/**
4450 * Atomically tests and sets a bit in a bitmap, ordered.
4451 *
4452 * @returns true if the bit was set.
4453 * @returns false if the bit was clear.
4454 *
4455 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4456 * the memory access isn't atomic!
4457 * @param iBit The bit to set.
4458 *
4459 * @remarks x86: Requires a 386 or later.
4460 */
4461#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4462DECLASM(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4463#else
4464DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4465{
4466 union { bool f; uint32_t u32; uint8_t u8; } rc;
4467 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4468# if RT_INLINE_ASM_USES_INTRIN
4469 rc.u8 = _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
4470# elif RT_INLINE_ASM_GNU_STYLE
4471 __asm__ __volatile__("lock; btsl %2, %1\n\t"
4472 "setc %b0\n\t"
4473 "andl $1, %0\n\t"
4474 : "=q" (rc.u32),
4475 "=m" (*(volatile long RT_FAR *)pvBitmap)
4476 : "Ir" (iBit),
4477 "m" (*(volatile long RT_FAR *)pvBitmap)
4478 : "memory");
4479# else
4480 __asm
4481 {
4482 mov edx, [iBit]
4483# ifdef RT_ARCH_AMD64
4484 mov rax, [pvBitmap]
4485 lock bts [rax], edx
4486# else
4487 mov eax, [pvBitmap]
4488 lock bts [eax], edx
4489# endif
4490 setc al
4491 and eax, 1
4492 mov [rc.u32], eax
4493 }
4494# endif
4495 return rc.f;
4496}
4497#endif
4498
4499
4500/**
4501 * Tests and clears a bit in a bitmap.
4502 *
4503 * @returns true if the bit was set.
4504 * @returns false if the bit was clear.
4505 *
4506 * @param pvBitmap Pointer to the bitmap.
4507 * @param iBit The bit to test and clear.
4508 *
4509 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4510 * However, doing so will yield better performance as well as avoiding
4511 * traps accessing the last bits in the bitmap.
4512 */
4513#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4514DECLASM(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4515#else
4516DECLINLINE(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4517{
4518 union { bool f; uint32_t u32; uint8_t u8; } rc;
4519# if RT_INLINE_ASM_USES_INTRIN
4520 rc.u8 = _bittestandreset((long RT_FAR *)pvBitmap, iBit);
4521
4522# elif RT_INLINE_ASM_GNU_STYLE
4523 __asm__ __volatile__("btrl %2, %1\n\t"
4524 "setc %b0\n\t"
4525 "andl $1, %0\n\t"
4526 : "=q" (rc.u32),
4527 "=m" (*(volatile long RT_FAR *)pvBitmap)
4528 : "Ir" (iBit),
4529 "m" (*(volatile long RT_FAR *)pvBitmap)
4530 : "memory");
4531# else
4532 __asm
4533 {
4534 mov edx, [iBit]
4535# ifdef RT_ARCH_AMD64
4536 mov rax, [pvBitmap]
4537 btr [rax], edx
4538# else
4539 mov eax, [pvBitmap]
4540 btr [eax], edx
4541# endif
4542 setc al
4543 and eax, 1
4544 mov [rc.u32], eax
4545 }
4546# endif
4547 return rc.f;
4548}
4549#endif
4550
4551
4552/**
4553 * Atomically tests and clears a bit in a bitmap, ordered.
4554 *
4555 * @returns true if the bit was set.
4556 * @returns false if the bit was clear.
4557 *
4558 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4559 * the memory access isn't atomic!
4560 * @param iBit The bit to test and clear.
4561 *
4562 * @remarks No memory barrier, take care on smp.
4563 * @remarks x86: Requires a 386 or later.
4564 */
4565#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4566DECLASM(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4567#else
4568DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4569{
4570 union { bool f; uint32_t u32; uint8_t u8; } rc;
4571 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4572# if RT_INLINE_ASM_USES_INTRIN
4573 rc.u8 = _interlockedbittestandreset((long RT_FAR *)pvBitmap, iBit);
4574
4575# elif RT_INLINE_ASM_GNU_STYLE
4576 __asm__ __volatile__("lock; btrl %2, %1\n\t"
4577 "setc %b0\n\t"
4578 "andl $1, %0\n\t"
4579 : "=q" (rc.u32),
4580 "=m" (*(volatile long RT_FAR *)pvBitmap)
4581 : "Ir" (iBit),
4582 "m" (*(volatile long RT_FAR *)pvBitmap)
4583 : "memory");
4584# else
4585 __asm
4586 {
4587 mov edx, [iBit]
4588# ifdef RT_ARCH_AMD64
4589 mov rax, [pvBitmap]
4590 lock btr [rax], edx
4591# else
4592 mov eax, [pvBitmap]
4593 lock btr [eax], edx
4594# endif
4595 setc al
4596 and eax, 1
4597 mov [rc.u32], eax
4598 }
4599# endif
4600 return rc.f;
4601}
4602#endif
4603
4604
4605/**
4606 * Tests and toggles a bit in a bitmap.
4607 *
4608 * @returns true if the bit was set.
4609 * @returns false if the bit was clear.
4610 *
4611 * @param pvBitmap Pointer to the bitmap.
4612 * @param iBit The bit to test and toggle.
4613 *
4614 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4615 * However, doing so will yield better performance as well as avoiding
4616 * traps accessing the last bits in the bitmap.
4617 */
4618#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4619DECLASM(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4620#else
4621DECLINLINE(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4622{
4623 union { bool f; uint32_t u32; uint8_t u8; } rc;
4624# if RT_INLINE_ASM_USES_INTRIN
4625 rc.u8 = _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
4626
4627# elif RT_INLINE_ASM_GNU_STYLE
4628 __asm__ __volatile__("btcl %2, %1\n\t"
4629 "setc %b0\n\t"
4630 "andl $1, %0\n\t"
4631 : "=q" (rc.u32),
4632 "=m" (*(volatile long RT_FAR *)pvBitmap)
4633 : "Ir" (iBit),
4634 "m" (*(volatile long RT_FAR *)pvBitmap)
4635 : "memory");
4636# else
4637 __asm
4638 {
4639 mov edx, [iBit]
4640# ifdef RT_ARCH_AMD64
4641 mov rax, [pvBitmap]
4642 btc [rax], edx
4643# else
4644 mov eax, [pvBitmap]
4645 btc [eax], edx
4646# endif
4647 setc al
4648 and eax, 1
4649 mov [rc.u32], eax
4650 }
4651# endif
4652 return rc.f;
4653}
4654#endif
4655
4656
4657/**
4658 * Atomically tests and toggles a bit in a bitmap, ordered.
4659 *
4660 * @returns true if the bit was set.
4661 * @returns false if the bit was clear.
4662 *
4663 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4664 * the memory access isn't atomic!
4665 * @param iBit The bit to test and toggle.
4666 *
4667 * @remarks x86: Requires a 386 or later.
4668 */
4669#if RT_INLINE_ASM_EXTERNAL
4670DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4671#else
4672DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4673{
4674 union { bool f; uint32_t u32; uint8_t u8; } rc;
4675 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4676# if RT_INLINE_ASM_GNU_STYLE
4677 __asm__ __volatile__("lock; btcl %2, %1\n\t"
4678 "setc %b0\n\t"
4679 "andl $1, %0\n\t"
4680 : "=q" (rc.u32),
4681 "=m" (*(volatile long RT_FAR *)pvBitmap)
4682 : "Ir" (iBit),
4683 "m" (*(volatile long RT_FAR *)pvBitmap)
4684 : "memory");
4685# else
4686 __asm
4687 {
4688 mov edx, [iBit]
4689# ifdef RT_ARCH_AMD64
4690 mov rax, [pvBitmap]
4691 lock btc [rax], edx
4692# else
4693 mov eax, [pvBitmap]
4694 lock btc [eax], edx
4695# endif
4696 setc al
4697 and eax, 1
4698 mov [rc.u32], eax
4699 }
4700# endif
4701 return rc.f;
4702}
4703#endif
4704
4705
4706/**
4707 * Tests if a bit in a bitmap is set.
4708 *
4709 * @returns true if the bit is set.
4710 * @returns false if the bit is clear.
4711 *
4712 * @param pvBitmap Pointer to the bitmap.
4713 * @param iBit The bit to test.
4714 *
4715 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4716 * However, doing so will yield better performance as well as avoiding
4717 * traps accessing the last bits in the bitmap.
4718 */
4719#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4720DECLASM(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit);
4721#else
4722DECLINLINE(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit)
4723{
4724 union { bool f; uint32_t u32; uint8_t u8; } rc;
4725# if RT_INLINE_ASM_USES_INTRIN
4726 rc.u32 = _bittest((long *)pvBitmap, iBit);
4727# elif RT_INLINE_ASM_GNU_STYLE
4728
4729 __asm__ __volatile__("btl %2, %1\n\t"
4730 "setc %b0\n\t"
4731 "andl $1, %0\n\t"
4732 : "=q" (rc.u32)
4733 : "m" (*(const volatile long RT_FAR *)pvBitmap),
4734 "Ir" (iBit)
4735 : "memory");
4736# else
4737 __asm
4738 {
4739 mov edx, [iBit]
4740# ifdef RT_ARCH_AMD64
4741 mov rax, [pvBitmap]
4742 bt [rax], edx
4743# else
4744 mov eax, [pvBitmap]
4745 bt [eax], edx
4746# endif
4747 setc al
4748 and eax, 1
4749 mov [rc.u32], eax
4750 }
4751# endif
4752 return rc.f;
4753}
4754#endif
4755
4756
4757/**
4758 * Clears a bit range within a bitmap.
4759 *
4760 * @param pvBitmap Pointer to the bitmap.
4761 * @param iBitStart The First bit to clear.
4762 * @param iBitEnd The first bit not to clear.
4763 */
4764DECLINLINE(void) ASMBitClearRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4765{
4766 if (iBitStart < iBitEnd)
4767 {
4768 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
4769 int32_t iStart = iBitStart & ~31;
4770 int32_t iEnd = iBitEnd & ~31;
4771 if (iStart == iEnd)
4772 *pu32 &= ((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4773 else
4774 {
4775 /* bits in first dword. */
4776 if (iBitStart & 31)
4777 {
4778 *pu32 &= (UINT32_C(1) << (iBitStart & 31)) - 1;
4779 pu32++;
4780 iBitStart = iStart + 32;
4781 }
4782
4783 /* whole dword. */
4784 if (iBitStart != iEnd)
4785 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4786
4787 /* bits in last dword. */
4788 if (iBitEnd & 31)
4789 {
4790 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4791 *pu32 &= ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4792 }
4793 }
4794 }
4795}
4796
4797
4798/**
4799 * Sets a bit range within a bitmap.
4800 *
4801 * @param pvBitmap Pointer to the bitmap.
4802 * @param iBitStart The First bit to set.
4803 * @param iBitEnd The first bit not to set.
4804 */
4805DECLINLINE(void) ASMBitSetRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4806{
4807 if (iBitStart < iBitEnd)
4808 {
4809 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
4810 int32_t iStart = iBitStart & ~31;
4811 int32_t iEnd = iBitEnd & ~31;
4812 if (iStart == iEnd)
4813 *pu32 |= ((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31);
4814 else
4815 {
4816 /* bits in first dword. */
4817 if (iBitStart & 31)
4818 {
4819 *pu32 |= ~((UINT32_C(1) << (iBitStart & 31)) - 1);
4820 pu32++;
4821 iBitStart = iStart + 32;
4822 }
4823
4824 /* whole dword. */
4825 if (iBitStart != iEnd)
4826 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
4827
4828 /* bits in last dword. */
4829 if (iBitEnd & 31)
4830 {
4831 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
4832 *pu32 |= (UINT32_C(1) << (iBitEnd & 31)) - 1;
4833 }
4834 }
4835 }
4836}
4837
4838
4839/**
4840 * Finds the first clear bit in a bitmap.
4841 *
4842 * @returns Index of the first zero bit.
4843 * @returns -1 if no clear bit was found.
4844 * @param pvBitmap Pointer to the bitmap.
4845 * @param cBits The number of bits in the bitmap. Multiple of 32.
4846 */
4847#if RT_INLINE_ASM_EXTERNAL
4848DECLASM(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits);
4849#else
4850DECLINLINE(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits)
4851{
4852 if (cBits)
4853 {
4854 int32_t iBit;
4855# if RT_INLINE_ASM_GNU_STYLE
4856 RTCCUINTREG uEAX, uECX, uEDI;
4857 cBits = RT_ALIGN_32(cBits, 32);
4858 __asm__ __volatile__("repe; scasl\n\t"
4859 "je 1f\n\t"
4860# ifdef RT_ARCH_AMD64
4861 "lea -4(%%rdi), %%rdi\n\t"
4862 "xorl (%%rdi), %%eax\n\t"
4863 "subq %5, %%rdi\n\t"
4864# else
4865 "lea -4(%%edi), %%edi\n\t"
4866 "xorl (%%edi), %%eax\n\t"
4867 "subl %5, %%edi\n\t"
4868# endif
4869 "shll $3, %%edi\n\t"
4870 "bsfl %%eax, %%edx\n\t"
4871 "addl %%edi, %%edx\n\t"
4872 "1:\t\n"
4873 : "=d" (iBit),
4874 "=&c" (uECX),
4875 "=&D" (uEDI),
4876 "=&a" (uEAX)
4877 : "0" (0xffffffff),
4878 "mr" (pvBitmap),
4879 "1" (cBits >> 5),
4880 "2" (pvBitmap),
4881 "3" (0xffffffff));
4882# else
4883 cBits = RT_ALIGN_32(cBits, 32);
4884 __asm
4885 {
4886# ifdef RT_ARCH_AMD64
4887 mov rdi, [pvBitmap]
4888 mov rbx, rdi
4889# else
4890 mov edi, [pvBitmap]
4891 mov ebx, edi
4892# endif
4893 mov edx, 0ffffffffh
4894 mov eax, edx
4895 mov ecx, [cBits]
4896 shr ecx, 5
4897 repe scasd
4898 je done
4899
4900# ifdef RT_ARCH_AMD64
4901 lea rdi, [rdi - 4]
4902 xor eax, [rdi]
4903 sub rdi, rbx
4904# else
4905 lea edi, [edi - 4]
4906 xor eax, [edi]
4907 sub edi, ebx
4908# endif
4909 shl edi, 3
4910 bsf edx, eax
4911 add edx, edi
4912 done:
4913 mov [iBit], edx
4914 }
4915# endif
4916 return iBit;
4917 }
4918 return -1;
4919}
4920#endif
4921
4922
4923/**
4924 * Finds the next clear bit in a bitmap.
4925 *
4926 * @returns Index of the first zero bit.
4927 * @returns -1 if no clear bit was found.
4928 * @param pvBitmap Pointer to the bitmap.
4929 * @param cBits The number of bits in the bitmap. Multiple of 32.
4930 * @param iBitPrev The bit returned from the last search.
4931 * The search will start at iBitPrev + 1.
4932 */
4933#if RT_INLINE_ASM_EXTERNAL
4934DECLASM(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4935#else
4936DECLINLINE(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4937{
4938 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
4939 int iBit = ++iBitPrev & 31;
4940 if (iBit)
4941 {
4942 /*
4943 * Inspect the 32-bit word containing the unaligned bit.
4944 */
4945 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
4946
4947# if RT_INLINE_ASM_USES_INTRIN
4948 unsigned long ulBit = 0;
4949 if (_BitScanForward(&ulBit, u32))
4950 return ulBit + iBitPrev;
4951# else
4952# if RT_INLINE_ASM_GNU_STYLE
4953 __asm__ __volatile__("bsf %1, %0\n\t"
4954 "jnz 1f\n\t"
4955 "movl $-1, %0\n\t"
4956 "1:\n\t"
4957 : "=r" (iBit)
4958 : "r" (u32));
4959# else
4960 __asm
4961 {
4962 mov edx, [u32]
4963 bsf eax, edx
4964 jnz done
4965 mov eax, 0ffffffffh
4966 done:
4967 mov [iBit], eax
4968 }
4969# endif
4970 if (iBit >= 0)
4971 return iBit + iBitPrev;
4972# endif
4973
4974 /*
4975 * Skip ahead and see if there is anything left to search.
4976 */
4977 iBitPrev |= 31;
4978 iBitPrev++;
4979 if (cBits <= (uint32_t)iBitPrev)
4980 return -1;
4981 }
4982
4983 /*
4984 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4985 */
4986 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4987 if (iBit >= 0)
4988 iBit += iBitPrev;
4989 return iBit;
4990}
4991#endif
4992
4993
4994/**
4995 * Finds the first set bit in a bitmap.
4996 *
4997 * @returns Index of the first set bit.
4998 * @returns -1 if no clear bit was found.
4999 * @param pvBitmap Pointer to the bitmap.
5000 * @param cBits The number of bits in the bitmap. Multiple of 32.
5001 */
5002#if RT_INLINE_ASM_EXTERNAL
5003DECLASM(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits);
5004#else
5005DECLINLINE(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits)
5006{
5007 if (cBits)
5008 {
5009 int32_t iBit;
5010# if RT_INLINE_ASM_GNU_STYLE
5011 RTCCUINTREG uEAX, uECX, uEDI;
5012 cBits = RT_ALIGN_32(cBits, 32);
5013 __asm__ __volatile__("repe; scasl\n\t"
5014 "je 1f\n\t"
5015# ifdef RT_ARCH_AMD64
5016 "lea -4(%%rdi), %%rdi\n\t"
5017 "movl (%%rdi), %%eax\n\t"
5018 "subq %5, %%rdi\n\t"
5019# else
5020 "lea -4(%%edi), %%edi\n\t"
5021 "movl (%%edi), %%eax\n\t"
5022 "subl %5, %%edi\n\t"
5023# endif
5024 "shll $3, %%edi\n\t"
5025 "bsfl %%eax, %%edx\n\t"
5026 "addl %%edi, %%edx\n\t"
5027 "1:\t\n"
5028 : "=d" (iBit),
5029 "=&c" (uECX),
5030 "=&D" (uEDI),
5031 "=&a" (uEAX)
5032 : "0" (0xffffffff),
5033 "mr" (pvBitmap),
5034 "1" (cBits >> 5),
5035 "2" (pvBitmap),
5036 "3" (0));
5037# else
5038 cBits = RT_ALIGN_32(cBits, 32);
5039 __asm
5040 {
5041# ifdef RT_ARCH_AMD64
5042 mov rdi, [pvBitmap]
5043 mov rbx, rdi
5044# else
5045 mov edi, [pvBitmap]
5046 mov ebx, edi
5047# endif
5048 mov edx, 0ffffffffh
5049 xor eax, eax
5050 mov ecx, [cBits]
5051 shr ecx, 5
5052 repe scasd
5053 je done
5054# ifdef RT_ARCH_AMD64
5055 lea rdi, [rdi - 4]
5056 mov eax, [rdi]
5057 sub rdi, rbx
5058# else
5059 lea edi, [edi - 4]
5060 mov eax, [edi]
5061 sub edi, ebx
5062# endif
5063 shl edi, 3
5064 bsf edx, eax
5065 add edx, edi
5066 done:
5067 mov [iBit], edx
5068 }
5069# endif
5070 return iBit;
5071 }
5072 return -1;
5073}
5074#endif
5075
5076
5077/**
5078 * Finds the next set bit in a bitmap.
5079 *
5080 * @returns Index of the next set bit.
5081 * @returns -1 if no set bit was found.
5082 * @param pvBitmap Pointer to the bitmap.
5083 * @param cBits The number of bits in the bitmap. Multiple of 32.
5084 * @param iBitPrev The bit returned from the last search.
5085 * The search will start at iBitPrev + 1.
5086 */
5087#if RT_INLINE_ASM_EXTERNAL
5088DECLASM(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5089#else
5090DECLINLINE(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5091{
5092 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
5093 int iBit = ++iBitPrev & 31;
5094 if (iBit)
5095 {
5096 /*
5097 * Inspect the 32-bit word containing the unaligned bit.
5098 */
5099 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
5100
5101# if RT_INLINE_ASM_USES_INTRIN
5102 unsigned long ulBit = 0;
5103 if (_BitScanForward(&ulBit, u32))
5104 return ulBit + iBitPrev;
5105# else
5106# if RT_INLINE_ASM_GNU_STYLE
5107 __asm__ __volatile__("bsf %1, %0\n\t"
5108 "jnz 1f\n\t"
5109 "movl $-1, %0\n\t"
5110 "1:\n\t"
5111 : "=r" (iBit)
5112 : "r" (u32));
5113# else
5114 __asm
5115 {
5116 mov edx, [u32]
5117 bsf eax, edx
5118 jnz done
5119 mov eax, 0ffffffffh
5120 done:
5121 mov [iBit], eax
5122 }
5123# endif
5124 if (iBit >= 0)
5125 return iBit + iBitPrev;
5126# endif
5127
5128 /*
5129 * Skip ahead and see if there is anything left to search.
5130 */
5131 iBitPrev |= 31;
5132 iBitPrev++;
5133 if (cBits <= (uint32_t)iBitPrev)
5134 return -1;
5135 }
5136
5137 /*
5138 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
5139 */
5140 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
5141 if (iBit >= 0)
5142 iBit += iBitPrev;
5143 return iBit;
5144}
5145#endif
5146
5147
5148/**
5149 * Finds the first bit which is set in the given 32-bit integer.
5150 * Bits are numbered from 1 (least significant) to 32.
5151 *
5152 * @returns index [1..32] of the first set bit.
5153 * @returns 0 if all bits are cleared.
5154 * @param u32 Integer to search for set bits.
5155 * @remarks Similar to ffs() in BSD.
5156 */
5157#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5158DECLASM(unsigned) ASMBitFirstSetU32(uint32_t u32);
5159#else
5160DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5161{
5162# if RT_INLINE_ASM_USES_INTRIN
5163 unsigned long iBit;
5164 if (_BitScanForward(&iBit, u32))
5165 iBit++;
5166 else
5167 iBit = 0;
5168# elif RT_INLINE_ASM_GNU_STYLE
5169 uint32_t iBit;
5170 __asm__ __volatile__("bsf %1, %0\n\t"
5171 "jnz 1f\n\t"
5172 "xorl %0, %0\n\t"
5173 "jmp 2f\n"
5174 "1:\n\t"
5175 "incl %0\n"
5176 "2:\n\t"
5177 : "=r" (iBit)
5178 : "rm" (u32));
5179# else
5180 uint32_t iBit;
5181 _asm
5182 {
5183 bsf eax, [u32]
5184 jnz found
5185 xor eax, eax
5186 jmp done
5187 found:
5188 inc eax
5189 done:
5190 mov [iBit], eax
5191 }
5192# endif
5193 return iBit;
5194}
5195#endif
5196
5197
5198/**
5199 * Finds the first bit which is set in the given 32-bit integer.
5200 * Bits are numbered from 1 (least significant) to 32.
5201 *
5202 * @returns index [1..32] of the first set bit.
5203 * @returns 0 if all bits are cleared.
5204 * @param i32 Integer to search for set bits.
5205 * @remark Similar to ffs() in BSD.
5206 */
5207DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5208{
5209 return ASMBitFirstSetU32((uint32_t)i32);
5210}
5211
5212
5213/**
5214 * Finds the first bit which is set in the given 64-bit integer.
5215 *
5216 * Bits are numbered from 1 (least significant) to 64.
5217 *
5218 * @returns index [1..64] of the first set bit.
5219 * @returns 0 if all bits are cleared.
5220 * @param u64 Integer to search for set bits.
5221 * @remarks Similar to ffs() in BSD.
5222 */
5223#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5224DECLASM(unsigned) ASMBitFirstSetU64(uint64_t u64);
5225#else
5226DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64)
5227{
5228# if RT_INLINE_ASM_USES_INTRIN
5229 unsigned long iBit;
5230# if ARCH_BITS == 64
5231 if (_BitScanForward64(&iBit, u64))
5232 iBit++;
5233 else
5234 iBit = 0;
5235# else
5236 if (_BitScanForward(&iBit, (uint32_t)u64))
5237 iBit++;
5238 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
5239 iBit += 33;
5240 else
5241 iBit = 0;
5242# endif
5243# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5244 uint64_t iBit;
5245 __asm__ __volatile__("bsfq %1, %0\n\t"
5246 "jnz 1f\n\t"
5247 "xorl %k0, %k0\n\t"
5248 "jmp 2f\n"
5249 "1:\n\t"
5250 "incl %k0\n"
5251 "2:\n\t"
5252 : "=r" (iBit)
5253 : "rm" (u64));
5254# else
5255 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
5256 if (!iBit)
5257 {
5258 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
5259 if (iBit)
5260 iBit += 32;
5261 }
5262# endif
5263 return (unsigned)iBit;
5264}
5265#endif
5266
5267
5268/**
5269 * Finds the first bit which is set in the given 16-bit integer.
5270 *
5271 * Bits are numbered from 1 (least significant) to 16.
5272 *
5273 * @returns index [1..16] of the first set bit.
5274 * @returns 0 if all bits are cleared.
5275 * @param u16 Integer to search for set bits.
5276 * @remarks For 16-bit bs3kit code.
5277 */
5278#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5279DECLASM(unsigned) ASMBitFirstSetU16(uint16_t u16);
5280#else
5281DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16)
5282{
5283 return ASMBitFirstSetU32((uint32_t)u16);
5284}
5285#endif
5286
5287
5288/**
5289 * Finds the last bit which is set in the given 32-bit integer.
5290 * Bits are numbered from 1 (least significant) to 32.
5291 *
5292 * @returns index [1..32] of the last set bit.
5293 * @returns 0 if all bits are cleared.
5294 * @param u32 Integer to search for set bits.
5295 * @remark Similar to fls() in BSD.
5296 */
5297#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5298DECLASM(unsigned) ASMBitLastSetU32(uint32_t u32);
5299#else
5300DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5301{
5302# if RT_INLINE_ASM_USES_INTRIN
5303 unsigned long iBit;
5304 if (_BitScanReverse(&iBit, u32))
5305 iBit++;
5306 else
5307 iBit = 0;
5308# elif RT_INLINE_ASM_GNU_STYLE
5309 uint32_t iBit;
5310 __asm__ __volatile__("bsrl %1, %0\n\t"
5311 "jnz 1f\n\t"
5312 "xorl %0, %0\n\t"
5313 "jmp 2f\n"
5314 "1:\n\t"
5315 "incl %0\n"
5316 "2:\n\t"
5317 : "=r" (iBit)
5318 : "rm" (u32));
5319# else
5320 uint32_t iBit;
5321 _asm
5322 {
5323 bsr eax, [u32]
5324 jnz found
5325 xor eax, eax
5326 jmp done
5327 found:
5328 inc eax
5329 done:
5330 mov [iBit], eax
5331 }
5332# endif
5333 return iBit;
5334}
5335#endif
5336
5337
5338/**
5339 * Finds the last bit which is set in the given 32-bit integer.
5340 * Bits are numbered from 1 (least significant) to 32.
5341 *
5342 * @returns index [1..32] of the last set bit.
5343 * @returns 0 if all bits are cleared.
5344 * @param i32 Integer to search for set bits.
5345 * @remark Similar to fls() in BSD.
5346 */
5347DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5348{
5349 return ASMBitLastSetU32((uint32_t)i32);
5350}
5351
5352
5353/**
5354 * Finds the last bit which is set in the given 64-bit integer.
5355 *
5356 * Bits are numbered from 1 (least significant) to 64.
5357 *
5358 * @returns index [1..64] of the last set bit.
5359 * @returns 0 if all bits are cleared.
5360 * @param u64 Integer to search for set bits.
5361 * @remark Similar to fls() in BSD.
5362 */
5363#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5364DECLASM(unsigned) ASMBitLastSetU64(uint64_t u64);
5365#else
5366DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64)
5367{
5368# if RT_INLINE_ASM_USES_INTRIN
5369 unsigned long iBit;
5370# if ARCH_BITS == 64
5371 if (_BitScanReverse64(&iBit, u64))
5372 iBit++;
5373 else
5374 iBit = 0;
5375# else
5376 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
5377 iBit += 33;
5378 else if (_BitScanReverse(&iBit, (uint32_t)u64))
5379 iBit++;
5380 else
5381 iBit = 0;
5382# endif
5383# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5384 uint64_t iBit;
5385 __asm__ __volatile__("bsrq %1, %0\n\t"
5386 "jnz 1f\n\t"
5387 "xorl %k0, %k0\n\t"
5388 "jmp 2f\n"
5389 "1:\n\t"
5390 "incl %k0\n"
5391 "2:\n\t"
5392 : "=r" (iBit)
5393 : "rm" (u64));
5394# else
5395 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
5396 if (iBit)
5397 iBit += 32;
5398 else
5399 iBit = ASMBitLastSetU32((uint32_t)u64);
5400#endif
5401 return (unsigned)iBit;
5402}
5403#endif
5404
5405
5406/**
5407 * Finds the last bit which is set in the given 16-bit integer.
5408 *
5409 * Bits are numbered from 1 (least significant) to 16.
5410 *
5411 * @returns index [1..16] of the last set bit.
5412 * @returns 0 if all bits are cleared.
5413 * @param u16 Integer to search for set bits.
5414 * @remarks For 16-bit bs3kit code.
5415 */
5416#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5417DECLASM(unsigned) ASMBitLastSetU16(uint16_t u16);
5418#else
5419DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16)
5420{
5421 return ASMBitLastSetU32((uint32_t)u16);
5422}
5423#endif
5424
5425
5426/**
5427 * Reverse the byte order of the given 16-bit integer.
5428 *
5429 * @returns Revert
5430 * @param u16 16-bit integer value.
5431 */
5432#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5433DECLASM(uint16_t) ASMByteSwapU16(uint16_t u16);
5434#else
5435DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
5436{
5437# if RT_INLINE_ASM_USES_INTRIN
5438 u16 = _byteswap_ushort(u16);
5439# elif RT_INLINE_ASM_GNU_STYLE
5440 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
5441# else
5442 _asm
5443 {
5444 mov ax, [u16]
5445 ror ax, 8
5446 mov [u16], ax
5447 }
5448# endif
5449 return u16;
5450}
5451#endif
5452
5453
5454/**
5455 * Reverse the byte order of the given 32-bit integer.
5456 *
5457 * @returns Revert
5458 * @param u32 32-bit integer value.
5459 */
5460#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5461DECLASM(uint32_t) ASMByteSwapU32(uint32_t u32);
5462#else
5463DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5464{
5465# if RT_INLINE_ASM_USES_INTRIN
5466 u32 = _byteswap_ulong(u32);
5467# elif RT_INLINE_ASM_GNU_STYLE
5468 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5469# else
5470 _asm
5471 {
5472 mov eax, [u32]
5473 bswap eax
5474 mov [u32], eax
5475 }
5476# endif
5477 return u32;
5478}
5479#endif
5480
5481
5482/**
5483 * Reverse the byte order of the given 64-bit integer.
5484 *
5485 * @returns Revert
5486 * @param u64 64-bit integer value.
5487 */
5488DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
5489{
5490#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
5491 u64 = _byteswap_uint64(u64);
5492#else
5493 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
5494 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
5495#endif
5496 return u64;
5497}
5498
5499
5500/**
5501 * Rotate 32-bit unsigned value to the left by @a cShift.
5502 *
5503 * @returns Rotated value.
5504 * @param u32 The value to rotate.
5505 * @param cShift How many bits to rotate by.
5506 */
5507#ifdef __WATCOMC__
5508DECLASM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift);
5509#else
5510DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift)
5511{
5512# if RT_INLINE_ASM_USES_INTRIN
5513 return _rotl(u32, cShift);
5514# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5515 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5516 return u32;
5517# else
5518 cShift &= 31;
5519 return (u32 << cShift) | (u32 >> (32 - cShift));
5520# endif
5521}
5522#endif
5523
5524
5525/**
5526 * Rotate 32-bit unsigned value to the right by @a cShift.
5527 *
5528 * @returns Rotated value.
5529 * @param u32 The value to rotate.
5530 * @param cShift How many bits to rotate by.
5531 */
5532#ifdef __WATCOMC__
5533DECLASM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift);
5534#else
5535DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift)
5536{
5537# if RT_INLINE_ASM_USES_INTRIN
5538 return _rotr(u32, cShift);
5539# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5540 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5541 return u32;
5542# else
5543 cShift &= 31;
5544 return (u32 >> cShift) | (u32 << (32 - cShift));
5545# endif
5546}
5547#endif
5548
5549
5550/**
5551 * Rotate 64-bit unsigned value to the left by @a cShift.
5552 *
5553 * @returns Rotated value.
5554 * @param u64 The value to rotate.
5555 * @param cShift How many bits to rotate by.
5556 */
5557DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift)
5558{
5559#if RT_INLINE_ASM_USES_INTRIN
5560 return _rotl64(u64, cShift);
5561#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5562 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5563 return u64;
5564#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5565 uint32_t uSpill;
5566 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5567 "jz 1f\n\t"
5568 "xchgl %%eax, %%edx\n\t"
5569 "1:\n\t"
5570 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5571 "jz 2f\n\t"
5572 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5573 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
5574 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
5575 "2:\n\t" /* } */
5576 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5577 : "0" (u64),
5578 "1" (cShift));
5579 return u64;
5580#else
5581 cShift &= 63;
5582 return (u64 << cShift) | (u64 >> (64 - cShift));
5583#endif
5584}
5585
5586
5587/**
5588 * Rotate 64-bit unsigned value to the right by @a cShift.
5589 *
5590 * @returns Rotated value.
5591 * @param u64 The value to rotate.
5592 * @param cShift How many bits to rotate by.
5593 */
5594DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift)
5595{
5596#if RT_INLINE_ASM_USES_INTRIN
5597 return _rotr64(u64, cShift);
5598#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5599 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5600 return u64;
5601#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5602 uint32_t uSpill;
5603 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5604 "jz 1f\n\t"
5605 "xchgl %%eax, %%edx\n\t"
5606 "1:\n\t"
5607 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5608 "jz 2f\n\t"
5609 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5610 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
5611 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
5612 "2:\n\t" /* } */
5613 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5614 : "0" (u64),
5615 "1" (cShift));
5616 return u64;
5617#else
5618 cShift &= 63;
5619 return (u64 >> cShift) | (u64 << (64 - cShift));
5620#endif
5621}
5622
5623/** @} */
5624
5625
5626/** @} */
5627
5628#endif
5629
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette