VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 97778

Last change on this file since 97778 was 96407, checked in by vboxsync, 2 years ago

scm copyright and license note update

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 259.8 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2022 Oracle and/or its affiliates.
7 *
8 * This file is part of VirtualBox base platform packages, as
9 * available from https://www.virtualbox.org.
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation, in version 3 of the
14 * License.
15 *
16 * This program is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, see <https://www.gnu.org/licenses>.
23 *
24 * The contents of this file may alternatively be used under the terms
25 * of the Common Development and Distribution License Version 1.0
26 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
27 * in the VirtualBox distribution, in which case the provisions of the
28 * CDDL are applicable instead of those of the GPL.
29 *
30 * You may elect to license modified versions of this file under the
31 * terms and conditions of either the GPL or the CDDL or both.
32 *
33 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
34 */
35
36#ifndef IPRT_INCLUDED_asm_h
37#define IPRT_INCLUDED_asm_h
38#ifndef RT_WITHOUT_PRAGMA_ONCE
39# pragma once
40#endif
41
42#include <iprt/cdefs.h>
43#include <iprt/types.h>
44#include <iprt/assert.h>
45/** @def RT_INLINE_ASM_USES_INTRIN
46 * Defined as 1 if we're using a _MSC_VER 1400.
47 * Otherwise defined as 0.
48 */
49
50/* Solaris 10 header ugliness */
51#ifdef u
52# undef u
53#endif
54
55#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
56/* Emit the intrinsics at all optimization levels. */
57# include <iprt/sanitized/intrin.h>
58# pragma intrinsic(_ReadWriteBarrier)
59# pragma intrinsic(__cpuid)
60# pragma intrinsic(__stosd)
61# pragma intrinsic(__stosw)
62# pragma intrinsic(__stosb)
63# pragma intrinsic(_BitScanForward)
64# pragma intrinsic(_BitScanReverse)
65# pragma intrinsic(_bittest)
66# pragma intrinsic(_bittestandset)
67# pragma intrinsic(_bittestandreset)
68# pragma intrinsic(_bittestandcomplement)
69# pragma intrinsic(_byteswap_ushort)
70# pragma intrinsic(_byteswap_ulong)
71# pragma intrinsic(_interlockedbittestandset)
72# pragma intrinsic(_interlockedbittestandreset)
73# pragma intrinsic(_InterlockedAnd)
74# pragma intrinsic(_InterlockedOr)
75# pragma intrinsic(_InterlockedXor)
76# pragma intrinsic(_InterlockedIncrement)
77# pragma intrinsic(_InterlockedDecrement)
78# pragma intrinsic(_InterlockedExchange)
79# pragma intrinsic(_InterlockedExchangeAdd)
80# pragma intrinsic(_InterlockedCompareExchange)
81# pragma intrinsic(_InterlockedCompareExchange8)
82# pragma intrinsic(_InterlockedCompareExchange16)
83# pragma intrinsic(_InterlockedCompareExchange64)
84# pragma intrinsic(_rotl)
85# pragma intrinsic(_rotr)
86# pragma intrinsic(_rotl64)
87# pragma intrinsic(_rotr64)
88# ifdef RT_ARCH_AMD64
89# pragma intrinsic(__stosq)
90# pragma intrinsic(_byteswap_uint64)
91# pragma intrinsic(_InterlockedCompareExchange128)
92# pragma intrinsic(_InterlockedExchange64)
93# pragma intrinsic(_InterlockedExchangeAdd64)
94# pragma intrinsic(_InterlockedAnd64)
95# pragma intrinsic(_InterlockedOr64)
96# pragma intrinsic(_InterlockedIncrement64)
97# pragma intrinsic(_InterlockedDecrement64)
98# endif
99#endif
100
101/*
102 * Undefine all symbols we have Watcom C/C++ #pragma aux'es for.
103 */
104#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
105# include "asm-watcom-x86-16.h"
106#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
107# include "asm-watcom-x86-32.h"
108#endif
109
110
111/** @defgroup grp_rt_asm ASM - Assembly Routines
112 * @ingroup grp_rt
113 *
114 * @remarks The difference between ordered and unordered atomic operations are
115 * that the former will complete outstanding reads and writes before
116 * continuing while the latter doesn't make any promises about the
117 * order. Ordered operations doesn't, it seems, make any 100% promise
118 * wrt to whether the operation will complete before any subsequent
119 * memory access. (please, correct if wrong.)
120 *
121 * ASMAtomicSomething operations are all ordered, while
122 * ASMAtomicUoSomething are unordered (note the Uo).
123 *
124 * Please note that ordered operations does not necessarily imply a
125 * compiler (memory) barrier. The user has to use the
126 * ASMCompilerBarrier() macro when that is deemed necessary.
127 *
128 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed
129 * to reorder or even optimize assembler instructions away. For
130 * instance, in the following code the second rdmsr instruction is
131 * optimized away because gcc treats that instruction as deterministic:
132 *
133 * @code
134 * static inline uint64_t rdmsr_low(int idx)
135 * {
136 * uint32_t low;
137 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
138 * }
139 * ...
140 * uint32_t msr1 = rdmsr_low(1);
141 * foo(msr1);
142 * msr1 = rdmsr_low(1);
143 * bar(msr1);
144 * @endcode
145 *
146 * The input parameter of rdmsr_low is the same for both calls and
147 * therefore gcc will use the result of the first call as input
148 * parameter for bar() as well. For rdmsr this is not acceptable as
149 * this instruction is _not_ deterministic. This applies to reading
150 * machine status information in general.
151 *
152 * @{
153 */
154
155
156/** @def RT_INLINE_ASM_GCC_4_3_X_X86
157 * Used to work around some 4.3.x register allocation issues in this version of
158 * the compiler. So far this workaround is still required for 4.4 and 4.5 but
159 * definitely not for 5.x */
160#if (RT_GNUC_PREREQ(4, 3) && !RT_GNUC_PREREQ(5, 0) && defined(__i386__))
161# define RT_INLINE_ASM_GCC_4_3_X_X86 1
162#else
163# define RT_INLINE_ASM_GCC_4_3_X_X86 0
164#endif
165
166/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
167 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
168 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
169 * mode, x86.
170 *
171 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
172 * when in PIC mode on x86.
173 */
174#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
175# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
176# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
177# elif defined(_MSC_VER) /* Visual C++ has trouble too, but it'll only tell us when C4688 is enabled. */
178# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
179# elif ( (defined(PIC) || defined(__PIC__)) \
180 && defined(RT_ARCH_X86) \
181 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
182 || defined(RT_OS_DARWIN)) )
183# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
184# else
185# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
186# endif
187#endif
188
189
190/** @def RT_INLINE_ASM_EXTERNAL_TMP_ARM
191 * Temporary version of RT_INLINE_ASM_EXTERNAL that excludes ARM. */
192#if RT_INLINE_ASM_EXTERNAL && !(defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32))
193# define RT_INLINE_ASM_EXTERNAL_TMP_ARM 1
194#else
195# define RT_INLINE_ASM_EXTERNAL_TMP_ARM 0
196#endif
197
198/*
199 * ARM is great fun.
200 */
201#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
202
203# define RTASM_ARM_NO_BARRIER
204# ifdef RT_ARCH_ARM64
205# define RTASM_ARM_NO_BARRIER_IN_REG
206# define RTASM_ARM_NO_BARRIER_COMMA_IN_REG
207# define RTASM_ARM_DSB_SY "dsb sy\n\t"
208# define RTASM_ARM_DSB_SY_IN_REG
209# define RTASM_ARM_DSB_SY_COMMA_IN_REG
210# define RTASM_ARM_DMB_SY "dmb sy\n\t"
211# define RTASM_ARM_DMB_SY_IN_REG
212# define RTASM_ARM_DMB_SY_COMMA_IN_REG
213# define RTASM_ARM_DMB_ST "dmb st\n\t"
214# define RTASM_ARM_DMB_ST_IN_REG
215# define RTASM_ARM_DMB_ST_COMMA_IN_REG
216# define RTASM_ARM_DMB_LD "dmb ld\n\t"
217# define RTASM_ARM_DMB_LD_IN_REG
218# define RTASM_ARM_DMB_LD_COMMA_IN_REG
219# define RTASM_ARM_PICK_6432(expr64, expr32) expr64
220# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
221 uint32_t rcSpill; \
222 uint32_t u32NewRet; \
223 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
224 RTASM_ARM_##barrier_type /* before lable? */ \
225 "ldaxr %w[uNew], %[pMem]\n\t" \
226 modify64 \
227 "stlxr %w[rc], %w[uNew], %[pMem]\n\t" \
228 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
229 : [pMem] "+Q" (*a_pu32Mem) \
230 , [uNew] "=&r" (u32NewRet) \
231 , [rc] "=&r" (rcSpill) \
232 : in_reg \
233 : "cc")
234# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
235 uint32_t rcSpill; \
236 uint32_t u32OldRet; \
237 uint32_t u32NewSpill; \
238 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
239 RTASM_ARM_##barrier_type /* before lable? */ \
240 "ldaxr %w[uOld], %[pMem]\n\t" \
241 modify64 \
242 "stlxr %w[rc], %w[uNew], %[pMem]\n\t" \
243 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
244 : [pMem] "+Q" (*a_pu32Mem) \
245 , [uOld] "=&r" (u32OldRet) \
246 , [uNew] "=&r" (u32NewSpill) \
247 , [rc] "=&r" (rcSpill) \
248 : in_reg \
249 : "cc")
250# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
251 uint32_t rcSpill; \
252 uint64_t u64NewRet; \
253 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
254 RTASM_ARM_##barrier_type /* before lable? */ \
255 "ldaxr %[uNew], %[pMem]\n\t" \
256 modify64 \
257 "stlxr %w[rc], %[uNew], %[pMem]\n\t" \
258 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
259 : [pMem] "+Q" (*a_pu64Mem) \
260 , [uNew] "=&r" (u64NewRet) \
261 , [rc] "=&r" (rcSpill) \
262 : in_reg \
263 : "cc")
264# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
265 uint32_t rcSpill; \
266 uint64_t u64OldRet; \
267 uint64_t u64NewSpill; \
268 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
269 RTASM_ARM_##barrier_type /* before lable? */ \
270 "ldaxr %[uOld], %[pMem]\n\t" \
271 modify64 \
272 "stlxr %w[rc], %[uNew], %[pMem]\n\t" \
273 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
274 : [pMem] "+Q" (*a_pu64Mem) \
275 , [uOld] "=&r" (u64OldRet) \
276 , [uNew] "=&r" (u64NewSpill) \
277 , [rc] "=&r" (rcSpill) \
278 : in_reg \
279 : "cc")
280
281# else /* RT_ARCH_ARM32 */
282# define RTASM_ARM_PICK_6432(expr64, expr32) expr32
283# if RT_ARCH_ARM32 >= 7
284# warning armv7
285# define RTASM_ARM_NO_BARRIER_IN_REG
286# define RTASM_ARM_NO_BARRIER_COMMA_IN_REG
287# define RTASM_ARM_DSB_SY "dsb sy\n\t"
288# define RTASM_ARM_DSB_SY_IN_REG "X" (0xfade)
289# define RTASM_ARM_DMB_SY "dmb sy\n\t"
290# define RTASM_ARM_DMB_SY_IN_REG "X" (0xfade)
291# define RTASM_ARM_DMB_ST "dmb st\n\t"
292# define RTASM_ARM_DMB_ST_IN_REG "X" (0xfade)
293# define RTASM_ARM_DMB_LD "dmb ld\n\t"
294# define RTASM_ARM_DMB_LD_IN_REG "X" (0xfade)
295
296# elif RT_ARCH_ARM32 >= 6
297# warning armv6
298# define RTASM_ARM_DSB_SY "mcr p15, 0, %[uZero], c7, c10, 4\n\t"
299# define RTASM_ARM_DSB_SY_IN_REG [uZero] "r" (0)
300# define RTASM_ARM_DMB_SY "mcr p15, 0, %[uZero], c7, c10, 5\n\t"
301# define RTASM_ARM_DMB_SY_IN_REG [uZero] "r" (0)
302# define RTASM_ARM_DMB_ST RTASM_ARM_DMB_SY
303# define RTASM_ARM_DMB_ST_IN_REG RTASM_ARM_DMB_SY_IN_REG
304# define RTASM_ARM_DMB_LD RTASM_ARM_DMB_SY
305# define RTASM_ARM_DMB_LD_IN_REG RTASM_ARM_DMB_SY_IN_REG
306# elif RT_ARCH_ARM32 >= 4
307# warning armv5 or older
308# define RTASM_ARM_DSB_SY "mcr p15, 0, %[uZero], c7, c10, 4\n\t"
309# define RTASM_ARM_DSB_SY_IN_REG [uZero] "r" (0)
310# define RTASM_ARM_DMB_SY RTASM_ARM_DSB_SY
311# define RTASM_ARM_DMB_SY_IN_REG RTASM_ARM_DSB_SY_IN_REG
312# define RTASM_ARM_DMB_ST RTASM_ARM_DSB_SY
313# define RTASM_ARM_DMB_ST_IN_REG RTASM_ARM_DSB_SY_IN_REG
314# define RTASM_ARM_DMB_LD RTASM_ARM_DSB_SY
315# define RTASM_ARM_DMB_LD_IN_REG RTASM_ARM_DSB_SY_IN_REG
316# else
317# error "huh? Odd RT_ARCH_ARM32 value!"
318# endif
319# define RTASM_ARM_DSB_SY_COMMA_IN_REG , RTASM_ARM_DSB_SY_IN_REG
320# define RTASM_ARM_DMB_SY_COMMA_IN_REG , RTASM_ARM_DMB_SY_IN_REG
321# define RTASM_ARM_DMB_ST_COMMA_IN_REG , RTASM_ARM_DMB_ST_IN_REG
322# define RTASM_ARM_DMB_LD_COMMA_IN_REG , RTASM_ARM_DMB_LD_IN_REG
323# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
324 uint32_t rcSpill; \
325 uint32_t u32NewRet; \
326 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
327 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
328 "ldrex %[uNew], %[pMem]\n\t" \
329 modify32 \
330 "strex %[rc], %[uNew], %[pMem]\n\t" \
331 "cmp %[rc], #0\n\t" \
332 "bne .Ltry_again_" #name "_%=\n\t" \
333 : [pMem] "+m" (*a_pu32Mem) \
334 , [uNew] "=&r" (u32NewRet) \
335 , [rc] "=&r" (rcSpill) \
336 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
337 , in_reg \
338 : "cc")
339# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
340 uint32_t rcSpill; \
341 uint32_t u32OldRet; \
342 uint32_t u32NewSpill; \
343 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
344 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
345 "ldrex %[uOld], %[pMem]\n\t" \
346 modify32 \
347 "strex %[rc], %[uNew], %[pMem]\n\t" \
348 "cmp %[rc], #0\n\t" \
349 "bne .Ltry_again_" #name "_%=\n\t" \
350 : [pMem] "+m" (*a_pu32Mem) \
351 , [uOld] "=&r" (u32OldRet) \
352 , [uNew] "=&r" (u32NewSpill) \
353 , [rc] "=&r" (rcSpill) \
354 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
355 , in_reg \
356 : "cc")
357# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
358 uint32_t rcSpill; \
359 uint64_t u64NewRet; \
360 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
361 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
362 "ldrexd %[uNew], %H[uNew], %[pMem]\n\t" \
363 modify32 \
364 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t" \
365 "cmp %[rc], #0\n\t" \
366 "bne .Ltry_again_" #name "_%=\n\t" \
367 : [pMem] "+m" (*a_pu64Mem), \
368 [uNew] "=&r" (u64NewRet), \
369 [rc] "=&r" (rcSpill) \
370 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
371 , in_reg \
372 : "cc")
373# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
374 uint32_t rcSpill; \
375 uint64_t u64OldRet; \
376 uint64_t u64NewSpill; \
377 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
378 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
379 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t" \
380 modify32 \
381 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t" \
382 "cmp %[rc], #0\n\t" \
383 "bne .Ltry_again_" #name "_%=\n\t" \
384 : [pMem] "+m" (*a_pu64Mem), \
385 [uOld] "=&r" (u64OldRet), \
386 [uNew] "=&r" (u64NewSpill), \
387 [rc] "=&r" (rcSpill) \
388 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
389 , in_reg \
390 : "cc")
391# endif /* RT_ARCH_ARM32 */
392#endif
393
394
395/** @def ASMReturnAddress
396 * Gets the return address of the current (or calling if you like) function or method.
397 */
398#ifdef _MSC_VER
399# ifdef __cplusplus
400extern "C"
401# endif
402void * _ReturnAddress(void);
403# pragma intrinsic(_ReturnAddress)
404# define ASMReturnAddress() _ReturnAddress()
405#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
406# define ASMReturnAddress() __builtin_return_address(0)
407#elif defined(__WATCOMC__)
408# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
409#else
410# error "Unsupported compiler."
411#endif
412
413
414/**
415 * Compiler memory barrier.
416 *
417 * Ensure that the compiler does not use any cached (register/tmp stack) memory
418 * values or any outstanding writes when returning from this function.
419 *
420 * This function must be used if non-volatile data is modified by a
421 * device or the VMM. Typical cases are port access, MMIO access,
422 * trapping instruction, etc.
423 */
424#if RT_INLINE_ASM_GNU_STYLE
425# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
426#elif RT_INLINE_ASM_USES_INTRIN
427# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
428#elif defined(__WATCOMC__)
429void ASMCompilerBarrier(void);
430#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
431DECLINLINE(void) ASMCompilerBarrier(void) RT_NOTHROW_DEF
432{
433 __asm
434 {
435 }
436}
437#endif
438
439
440/** @def ASMBreakpoint
441 * Debugger Breakpoint.
442 * @deprecated Use RT_BREAKPOINT instead.
443 * @internal
444 */
445#define ASMBreakpoint() RT_BREAKPOINT()
446
447
448/**
449 * Spinloop hint for platforms that have these, empty function on the other
450 * platforms.
451 *
452 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
453 * spin locks.
454 */
455#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
456RT_ASM_DECL_PRAGMA_WATCOM(void) ASMNopPause(void) RT_NOTHROW_PROTO;
457#else
458DECLINLINE(void) ASMNopPause(void) RT_NOTHROW_DEF
459{
460# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
461# if RT_INLINE_ASM_GNU_STYLE
462 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
463# else
464 __asm {
465 _emit 0f3h
466 _emit 090h
467 }
468# endif
469
470# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
471 __asm__ __volatile__("yield\n\t"); /* ARMv6K+ */
472
473# else
474 /* dummy */
475# endif
476}
477#endif
478
479
480/**
481 * Atomically Exchange an unsigned 8-bit value, ordered.
482 *
483 * @returns Current *pu8 value
484 * @param pu8 Pointer to the 8-bit variable to update.
485 * @param u8 The 8-bit value to assign to *pu8.
486 */
487#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
488RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_PROTO;
489#else
490DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
491{
492# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
493# if RT_INLINE_ASM_GNU_STYLE
494 __asm__ __volatile__("xchgb %0, %1\n\t"
495 : "=m" (*pu8)
496 , "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
497 : "1" (u8)
498 , "m" (*pu8));
499# else
500 __asm
501 {
502# ifdef RT_ARCH_AMD64
503 mov rdx, [pu8]
504 mov al, [u8]
505 xchg [rdx], al
506 mov [u8], al
507# else
508 mov edx, [pu8]
509 mov al, [u8]
510 xchg [edx], al
511 mov [u8], al
512# endif
513 }
514# endif
515 return u8;
516
517# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
518 uint32_t uOld;
519 uint32_t rcSpill;
520 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU8_%=:\n\t"
521 RTASM_ARM_DMB_SY
522# if defined(RT_ARCH_ARM64)
523 "ldaxrb %w[uOld], %[pMem]\n\t"
524 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
525 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU8_%=\n\t"
526# else
527 "ldrexb %[uOld], %[pMem]\n\t" /* ARMv6+ */
528 "strexb %[rc], %[uNew], %[pMem]\n\t"
529 "cmp %[rc], #0\n\t"
530 "bne .Ltry_again_ASMAtomicXchgU8_%=\n\t"
531# endif
532 : [pMem] "+Q" (*pu8)
533 , [uOld] "=&r" (uOld)
534 , [rc] "=&r" (rcSpill)
535 : [uNew] "r" ((uint32_t)u8)
536 RTASM_ARM_DMB_SY_COMMA_IN_REG
537 : "cc");
538 return (uint8_t)uOld;
539
540# else
541# error "Port me"
542# endif
543}
544#endif
545
546
547/**
548 * Atomically Exchange a signed 8-bit value, ordered.
549 *
550 * @returns Current *pu8 value
551 * @param pi8 Pointer to the 8-bit variable to update.
552 * @param i8 The 8-bit value to assign to *pi8.
553 */
554DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
555{
556 return (int8_t)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8);
557}
558
559
560/**
561 * Atomically Exchange a bool value, ordered.
562 *
563 * @returns Current *pf value
564 * @param pf Pointer to the 8-bit variable to update.
565 * @param f The 8-bit value to assign to *pi8.
566 */
567DECLINLINE(bool) ASMAtomicXchgBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
568{
569#ifdef _MSC_VER
570 return !!ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
571#else
572 return (bool)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
573#endif
574}
575
576
577/**
578 * Atomically Exchange an unsigned 16-bit value, ordered.
579 *
580 * @returns Current *pu16 value
581 * @param pu16 Pointer to the 16-bit variable to update.
582 * @param u16 The 16-bit value to assign to *pu16.
583 */
584#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
585RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_PROTO;
586#else
587DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
588{
589# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
590# if RT_INLINE_ASM_GNU_STYLE
591 __asm__ __volatile__("xchgw %0, %1\n\t"
592 : "=m" (*pu16)
593 , "=r" (u16)
594 : "1" (u16)
595 , "m" (*pu16));
596# else
597 __asm
598 {
599# ifdef RT_ARCH_AMD64
600 mov rdx, [pu16]
601 mov ax, [u16]
602 xchg [rdx], ax
603 mov [u16], ax
604# else
605 mov edx, [pu16]
606 mov ax, [u16]
607 xchg [edx], ax
608 mov [u16], ax
609# endif
610 }
611# endif
612 return u16;
613
614# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
615 uint32_t uOld;
616 uint32_t rcSpill;
617 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU16_%=:\n\t"
618 RTASM_ARM_DMB_SY
619# if defined(RT_ARCH_ARM64)
620 "ldaxrh %w[uOld], %[pMem]\n\t"
621 "stlxrh %w[rc], %w[uNew], %[pMem]\n\t"
622 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU16_%=\n\t"
623# else
624 "ldrexh %[uOld], %[pMem]\n\t" /* ARMv6+ */
625 "strexh %[rc], %[uNew], %[pMem]\n\t"
626 "cmp %[rc], #0\n\t"
627 "bne .Ltry_again_ASMAtomicXchgU16_%=\n\t"
628# endif
629 : [pMem] "+Q" (*pu16)
630 , [uOld] "=&r" (uOld)
631 , [rc] "=&r" (rcSpill)
632 : [uNew] "r" ((uint32_t)u16)
633 RTASM_ARM_DMB_SY_COMMA_IN_REG
634 : "cc");
635 return (uint16_t)uOld;
636
637# else
638# error "Port me"
639# endif
640}
641#endif
642
643
644/**
645 * Atomically Exchange a signed 16-bit value, ordered.
646 *
647 * @returns Current *pu16 value
648 * @param pi16 Pointer to the 16-bit variable to update.
649 * @param i16 The 16-bit value to assign to *pi16.
650 */
651DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
652{
653 return (int16_t)ASMAtomicXchgU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16);
654}
655
656
657/**
658 * Atomically Exchange an unsigned 32-bit value, ordered.
659 *
660 * @returns Current *pu32 value
661 * @param pu32 Pointer to the 32-bit variable to update.
662 * @param u32 The 32-bit value to assign to *pu32.
663 *
664 * @remarks Does not work on 286 and earlier.
665 */
666#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
667RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
668#else
669DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
670{
671# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
672# if RT_INLINE_ASM_GNU_STYLE
673 __asm__ __volatile__("xchgl %0, %1\n\t"
674 : "=m" (*pu32) /** @todo r=bird: +m rather than =m here? */
675 , "=r" (u32)
676 : "1" (u32)
677 , "m" (*pu32));
678
679# elif RT_INLINE_ASM_USES_INTRIN
680 u32 = _InterlockedExchange((long RT_FAR *)pu32, u32);
681
682# else
683 __asm
684 {
685# ifdef RT_ARCH_AMD64
686 mov rdx, [pu32]
687 mov eax, u32
688 xchg [rdx], eax
689 mov [u32], eax
690# else
691 mov edx, [pu32]
692 mov eax, u32
693 xchg [edx], eax
694 mov [u32], eax
695# endif
696 }
697# endif
698 return u32;
699
700# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
701 uint32_t uOld;
702 uint32_t rcSpill;
703 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU32_%=:\n\t"
704 RTASM_ARM_DMB_SY
705# if defined(RT_ARCH_ARM64)
706 "ldaxr %w[uOld], %[pMem]\n\t"
707 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
708 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU32_%=\n\t"
709# else
710 "ldrex %[uOld], %[pMem]\n\t" /* ARMv6+ */
711 "strex %[rc], %[uNew], %[pMem]\n\t"
712 "cmp %[rc], #0\n\t"
713 "bne .Ltry_again_ASMAtomicXchgU32_%=\n\t"
714# endif
715 : [pMem] "+Q" (*pu32)
716 , [uOld] "=&r" (uOld)
717 , [rc] "=&r" (rcSpill)
718 : [uNew] "r" (u32)
719 RTASM_ARM_DMB_SY_COMMA_IN_REG
720 : "cc");
721 return uOld;
722
723# else
724# error "Port me"
725# endif
726}
727#endif
728
729
730/**
731 * Atomically Exchange a signed 32-bit value, ordered.
732 *
733 * @returns Current *pu32 value
734 * @param pi32 Pointer to the 32-bit variable to update.
735 * @param i32 The 32-bit value to assign to *pi32.
736 */
737DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
738{
739 return (int32_t)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32);
740}
741
742
743/**
744 * Atomically Exchange an unsigned 64-bit value, ordered.
745 *
746 * @returns Current *pu64 value
747 * @param pu64 Pointer to the 64-bit variable to update.
748 * @param u64 The 64-bit value to assign to *pu64.
749 *
750 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
751 */
752#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
753 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
754RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
755#else
756DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
757{
758# if defined(RT_ARCH_AMD64)
759# if RT_INLINE_ASM_USES_INTRIN
760 return _InterlockedExchange64((__int64 *)pu64, u64);
761
762# elif RT_INLINE_ASM_GNU_STYLE
763 __asm__ __volatile__("xchgq %0, %1\n\t"
764 : "=m" (*pu64)
765 , "=r" (u64)
766 : "1" (u64)
767 , "m" (*pu64));
768 return u64;
769# else
770 __asm
771 {
772 mov rdx, [pu64]
773 mov rax, [u64]
774 xchg [rdx], rax
775 mov [u64], rax
776 }
777 return u64;
778# endif
779
780# elif defined(RT_ARCH_X86)
781# if RT_INLINE_ASM_GNU_STYLE
782# if defined(PIC) || defined(__PIC__)
783 uint32_t u32EBX = (uint32_t)u64;
784 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
785 "xchgl %%ebx, %3\n\t"
786 "1:\n\t"
787 "lock; cmpxchg8b (%5)\n\t"
788 "jnz 1b\n\t"
789 "movl %3, %%ebx\n\t"
790 /*"xchgl %%esi, %5\n\t"*/
791 : "=A" (u64)
792 , "=m" (*pu64)
793 : "0" (*pu64)
794 , "m" ( u32EBX )
795 , "c" ( (uint32_t)(u64 >> 32) )
796 , "S" (pu64)
797 : "cc");
798# else /* !PIC */
799 __asm__ __volatile__("1:\n\t"
800 "lock; cmpxchg8b %1\n\t"
801 "jnz 1b\n\t"
802 : "=A" (u64)
803 , "=m" (*pu64)
804 : "0" (*pu64)
805 , "b" ( (uint32_t)u64 )
806 , "c" ( (uint32_t)(u64 >> 32) )
807 : "cc");
808# endif
809# else
810 __asm
811 {
812 mov ebx, dword ptr [u64]
813 mov ecx, dword ptr [u64 + 4]
814 mov edi, pu64
815 mov eax, dword ptr [edi]
816 mov edx, dword ptr [edi + 4]
817 retry:
818 lock cmpxchg8b [edi]
819 jnz retry
820 mov dword ptr [u64], eax
821 mov dword ptr [u64 + 4], edx
822 }
823# endif
824 return u64;
825
826# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
827 uint32_t rcSpill;
828 uint64_t uOld;
829 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU64_%=:\n\t"
830 RTASM_ARM_DMB_SY
831# if defined(RT_ARCH_ARM64)
832 "ldaxr %[uOld], %[pMem]\n\t"
833 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
834 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU64_%=\n\t"
835# else
836 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t" /* ARMv6+ */
837 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
838 "cmp %[rc], #0\n\t"
839 "bne .Ltry_again_ASMAtomicXchgU64_%=\n\t"
840# endif
841 : [pMem] "+Q" (*pu64)
842 , [uOld] "=&r" (uOld)
843 , [rc] "=&r" (rcSpill)
844 : [uNew] "r" (u64)
845 RTASM_ARM_DMB_SY_COMMA_IN_REG
846 : "cc");
847 return uOld;
848
849# else
850# error "Port me"
851# endif
852}
853#endif
854
855
856/**
857 * Atomically Exchange an signed 64-bit value, ordered.
858 *
859 * @returns Current *pi64 value
860 * @param pi64 Pointer to the 64-bit variable to update.
861 * @param i64 The 64-bit value to assign to *pi64.
862 */
863DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
864{
865 return (int64_t)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64);
866}
867
868
869/**
870 * Atomically Exchange a size_t value, ordered.
871 *
872 * @returns Current *ppv value
873 * @param puDst Pointer to the size_t variable to update.
874 * @param uNew The new value to assign to *puDst.
875 */
876DECLINLINE(size_t) ASMAtomicXchgZ(size_t volatile RT_FAR *puDst, const size_t uNew) RT_NOTHROW_DEF
877{
878#if ARCH_BITS == 16
879 AssertCompile(sizeof(size_t) == 2);
880 return ASMAtomicXchgU16((volatile uint16_t RT_FAR *)puDst, uNew);
881#elif ARCH_BITS == 32
882 return ASMAtomicXchgU32((volatile uint32_t RT_FAR *)puDst, uNew);
883#elif ARCH_BITS == 64
884 return ASMAtomicXchgU64((volatile uint64_t RT_FAR *)puDst, uNew);
885#else
886# error "ARCH_BITS is bogus"
887#endif
888}
889
890
891/**
892 * Atomically Exchange a pointer value, ordered.
893 *
894 * @returns Current *ppv value
895 * @param ppv Pointer to the pointer variable to update.
896 * @param pv The pointer value to assign to *ppv.
897 */
898DECLINLINE(void RT_FAR *) ASMAtomicXchgPtr(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pv) RT_NOTHROW_DEF
899{
900#if ARCH_BITS == 32 || ARCH_BITS == 16
901 return (void RT_FAR *)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
902#elif ARCH_BITS == 64
903 return (void RT_FAR *)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
904#else
905# error "ARCH_BITS is bogus"
906#endif
907}
908
909
910/**
911 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
912 *
913 * @returns Current *pv value
914 * @param ppv Pointer to the pointer variable to update.
915 * @param pv The pointer value to assign to *ppv.
916 * @param Type The type of *ppv, sans volatile.
917 */
918#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
919# define ASMAtomicXchgPtrT(ppv, pv, Type) \
920 __extension__ \
921 ({\
922 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
923 Type const pvTypeChecked = (pv); \
924 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
925 pvTypeCheckedRet; \
926 })
927#else
928# define ASMAtomicXchgPtrT(ppv, pv, Type) \
929 (Type)ASMAtomicXchgPtr((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv))
930#endif
931
932
933/**
934 * Atomically Exchange a raw-mode context pointer value, ordered.
935 *
936 * @returns Current *ppv value
937 * @param ppvRC Pointer to the pointer variable to update.
938 * @param pvRC The pointer value to assign to *ppv.
939 */
940DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile RT_FAR *ppvRC, RTRCPTR pvRC) RT_NOTHROW_DEF
941{
942 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(void RT_FAR *)ppvRC, (uint32_t)pvRC);
943}
944
945
946/**
947 * Atomically Exchange a ring-0 pointer value, ordered.
948 *
949 * @returns Current *ppv value
950 * @param ppvR0 Pointer to the pointer variable to update.
951 * @param pvR0 The pointer value to assign to *ppv.
952 */
953DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile RT_FAR *ppvR0, RTR0PTR pvR0) RT_NOTHROW_DEF
954{
955#if R0_ARCH_BITS == 32 || ARCH_BITS == 16
956 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR0, (uint32_t)pvR0);
957#elif R0_ARCH_BITS == 64
958 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR0, (uint64_t)pvR0);
959#else
960# error "R0_ARCH_BITS is bogus"
961#endif
962}
963
964
965/**
966 * Atomically Exchange a ring-3 pointer value, ordered.
967 *
968 * @returns Current *ppv value
969 * @param ppvR3 Pointer to the pointer variable to update.
970 * @param pvR3 The pointer value to assign to *ppv.
971 */
972DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile RT_FAR *ppvR3, RTR3PTR pvR3) RT_NOTHROW_DEF
973{
974#if R3_ARCH_BITS == 32 || ARCH_BITS == 16
975 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR3, (uint32_t)pvR3);
976#elif R3_ARCH_BITS == 64
977 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR3, (uint64_t)pvR3);
978#else
979# error "R3_ARCH_BITS is bogus"
980#endif
981}
982
983
984/** @def ASMAtomicXchgHandle
985 * Atomically Exchange a typical IPRT handle value, ordered.
986 *
987 * @param ph Pointer to the value to update.
988 * @param hNew The new value to assigned to *pu.
989 * @param phRes Where to store the current *ph value.
990 *
991 * @remarks This doesn't currently work for all handles (like RTFILE).
992 */
993#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
994# define ASMAtomicXchgHandle(ph, hNew, phRes) \
995 do { \
996 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
997 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
998 *(uint32_t RT_FAR *)(phRes) = ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
999 } while (0)
1000#elif HC_ARCH_BITS == 64
1001# define ASMAtomicXchgHandle(ph, hNew, phRes) \
1002 do { \
1003 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1004 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
1005 *(uint64_t RT_FAR *)(phRes) = ASMAtomicXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
1006 } while (0)
1007#else
1008# error HC_ARCH_BITS
1009#endif
1010
1011
1012/**
1013 * Atomically Exchange a value which size might differ
1014 * between platforms or compilers, ordered.
1015 *
1016 * @param pu Pointer to the variable to update.
1017 * @param uNew The value to assign to *pu.
1018 * @todo This is busted as its missing the result argument.
1019 */
1020#define ASMAtomicXchgSize(pu, uNew) \
1021 do { \
1022 switch (sizeof(*(pu))) { \
1023 case 1: ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
1024 case 2: ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
1025 case 4: ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
1026 case 8: ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
1027 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1028 } \
1029 } while (0)
1030
1031/**
1032 * Atomically Exchange a value which size might differ
1033 * between platforms or compilers, ordered.
1034 *
1035 * @param pu Pointer to the variable to update.
1036 * @param uNew The value to assign to *pu.
1037 * @param puRes Where to store the current *pu value.
1038 */
1039#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
1040 do { \
1041 switch (sizeof(*(pu))) { \
1042 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
1043 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
1044 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
1045 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
1046 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1047 } \
1048 } while (0)
1049
1050
1051
1052/**
1053 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
1054 *
1055 * @returns true if xchg was done.
1056 * @returns false if xchg wasn't done.
1057 *
1058 * @param pu8 Pointer to the value to update.
1059 * @param u8New The new value to assigned to *pu8.
1060 * @param u8Old The old value to *pu8 compare with.
1061 *
1062 * @remarks x86: Requires a 486 or later.
1063 * @todo Rename ASMAtomicCmpWriteU8
1064 */
1065#if RT_INLINE_ASM_EXTERNAL_TMP_ARM || !RT_INLINE_ASM_GNU_STYLE
1066RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old) RT_NOTHROW_PROTO;
1067#else
1068DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, uint8_t u8Old) RT_NOTHROW_DEF
1069{
1070# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1071 uint8_t u8Ret;
1072 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
1073 "setz %1\n\t"
1074 : "=m" (*pu8)
1075 , "=qm" (u8Ret)
1076 , "=a" (u8Old)
1077 : "q" (u8New)
1078 , "2" (u8Old)
1079 , "m" (*pu8)
1080 : "cc");
1081 return (bool)u8Ret;
1082
1083# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1084 union { uint32_t u; bool f; } fXchg;
1085 uint32_t u32Spill;
1086 uint32_t rcSpill;
1087 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU8_%=:\n\t"
1088 RTASM_ARM_DMB_SY
1089# if defined(RT_ARCH_ARM64)
1090 "ldaxrb %w[uOld], %[pMem]\n\t"
1091 "cmp %w[uOld], %w[uCmp]\n\t"
1092 "bne 1f\n\t" /* stop here if not equal */
1093 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
1094 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU8_%=\n\t"
1095 "mov %w[fXchg], #1\n\t"
1096# else
1097 "ldrexb %[uOld], %[pMem]\n\t"
1098 "teq %[uOld], %[uCmp]\n\t"
1099 "strexbeq %[rc], %[uNew], %[pMem]\n\t"
1100 "bne 1f\n\t" /* stop here if not equal */
1101 "cmp %[rc], #0\n\t"
1102 "bne .Ltry_again_ASMAtomicCmpXchgU8_%=\n\t"
1103 "mov %[fXchg], #1\n\t"
1104# endif
1105 "1:\n\t"
1106 : [pMem] "+Q" (*pu8)
1107 , [uOld] "=&r" (u32Spill)
1108 , [rc] "=&r" (rcSpill)
1109 , [fXchg] "=&r" (fXchg.u)
1110 : [uCmp] "r" ((uint32_t)u8Old)
1111 , [uNew] "r" ((uint32_t)u8New)
1112 , "[fXchg]" (0)
1113 RTASM_ARM_DMB_SY_COMMA_IN_REG
1114 : "cc");
1115 return fXchg.f;
1116
1117# else
1118# error "Port me"
1119# endif
1120}
1121#endif
1122
1123
1124/**
1125 * Atomically Compare and Exchange a signed 8-bit value, ordered.
1126 *
1127 * @returns true if xchg was done.
1128 * @returns false if xchg wasn't done.
1129 *
1130 * @param pi8 Pointer to the value to update.
1131 * @param i8New The new value to assigned to *pi8.
1132 * @param i8Old The old value to *pi8 compare with.
1133 *
1134 * @remarks x86: Requires a 486 or later.
1135 * @todo Rename ASMAtomicCmpWriteS8
1136 */
1137DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old) RT_NOTHROW_DEF
1138{
1139 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8New, (uint8_t)i8Old);
1140}
1141
1142
1143/**
1144 * Atomically Compare and Exchange a bool value, ordered.
1145 *
1146 * @returns true if xchg was done.
1147 * @returns false if xchg wasn't done.
1148 *
1149 * @param pf Pointer to the value to update.
1150 * @param fNew The new value to assigned to *pf.
1151 * @param fOld The old value to *pf compare with.
1152 *
1153 * @remarks x86: Requires a 486 or later.
1154 * @todo Rename ASMAtomicCmpWriteBool
1155 */
1156DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool RT_FAR *pf, const bool fNew, const bool fOld) RT_NOTHROW_DEF
1157{
1158 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)fNew, (uint8_t)fOld);
1159}
1160
1161
1162/**
1163 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
1164 *
1165 * @returns true if xchg was done.
1166 * @returns false if xchg wasn't done.
1167 *
1168 * @param pu32 Pointer to the value to update.
1169 * @param u32New The new value to assigned to *pu32.
1170 * @param u32Old The old value to *pu32 compare with.
1171 *
1172 * @remarks x86: Requires a 486 or later.
1173 * @todo Rename ASMAtomicCmpWriteU32
1174 */
1175#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1176RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old) RT_NOTHROW_PROTO;
1177#else
1178DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, uint32_t u32Old) RT_NOTHROW_DEF
1179{
1180# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1181# if RT_INLINE_ASM_GNU_STYLE
1182 uint8_t u8Ret;
1183 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1184 "setz %1\n\t"
1185 : "=m" (*pu32)
1186 , "=qm" (u8Ret)
1187 , "=a" (u32Old)
1188 : "r" (u32New)
1189 , "2" (u32Old)
1190 , "m" (*pu32)
1191 : "cc");
1192 return (bool)u8Ret;
1193
1194# elif RT_INLINE_ASM_USES_INTRIN
1195 return (uint32_t)_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old) == u32Old;
1196
1197# else
1198 uint32_t u32Ret;
1199 __asm
1200 {
1201# ifdef RT_ARCH_AMD64
1202 mov rdx, [pu32]
1203# else
1204 mov edx, [pu32]
1205# endif
1206 mov eax, [u32Old]
1207 mov ecx, [u32New]
1208# ifdef RT_ARCH_AMD64
1209 lock cmpxchg [rdx], ecx
1210# else
1211 lock cmpxchg [edx], ecx
1212# endif
1213 setz al
1214 movzx eax, al
1215 mov [u32Ret], eax
1216 }
1217 return !!u32Ret;
1218# endif
1219
1220# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1221 union { uint32_t u; bool f; } fXchg;
1222 uint32_t u32Spill;
1223 uint32_t rcSpill;
1224 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU32_%=:\n\t"
1225 RTASM_ARM_DMB_SY
1226# if defined(RT_ARCH_ARM64)
1227 "ldaxr %w[uOld], %[pMem]\n\t"
1228 "cmp %w[uOld], %w[uCmp]\n\t"
1229 "bne 1f\n\t" /* stop here if not equal */
1230 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
1231 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU32_%=\n\t"
1232 "mov %w[fXchg], #1\n\t"
1233# else
1234 "ldrex %[uOld], %[pMem]\n\t"
1235 "teq %[uOld], %[uCmp]\n\t"
1236 "strexeq %[rc], %[uNew], %[pMem]\n\t"
1237 "bne 1f\n\t" /* stop here if not equal */
1238 "cmp %[rc], #0\n\t"
1239 "bne .Ltry_again_ASMAtomicCmpXchgU32_%=\n\t"
1240 "mov %[fXchg], #1\n\t"
1241# endif
1242 "1:\n\t"
1243 : [pMem] "+Q" (*pu32)
1244 , [uOld] "=&r" (u32Spill)
1245 , [rc] "=&r" (rcSpill)
1246 , [fXchg] "=&r" (fXchg.u)
1247 : [uCmp] "r" (u32Old)
1248 , [uNew] "r" (u32New)
1249 , "[fXchg]" (0)
1250 RTASM_ARM_DMB_SY_COMMA_IN_REG
1251 : "cc");
1252 return fXchg.f;
1253
1254# else
1255# error "Port me"
1256# endif
1257}
1258#endif
1259
1260
1261/**
1262 * Atomically Compare and Exchange a signed 32-bit value, ordered.
1263 *
1264 * @returns true if xchg was done.
1265 * @returns false if xchg wasn't done.
1266 *
1267 * @param pi32 Pointer to the value to update.
1268 * @param i32New The new value to assigned to *pi32.
1269 * @param i32Old The old value to *pi32 compare with.
1270 *
1271 * @remarks x86: Requires a 486 or later.
1272 * @todo Rename ASMAtomicCmpWriteS32
1273 */
1274DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old) RT_NOTHROW_DEF
1275{
1276 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
1277}
1278
1279
1280/**
1281 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
1282 *
1283 * @returns true if xchg was done.
1284 * @returns false if xchg wasn't done.
1285 *
1286 * @param pu64 Pointer to the 64-bit variable to update.
1287 * @param u64New The 64-bit value to assign to *pu64.
1288 * @param u64Old The value to compare with.
1289 *
1290 * @remarks x86: Requires a Pentium or later.
1291 * @todo Rename ASMAtomicCmpWriteU64
1292 */
1293#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
1294 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1295RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old) RT_NOTHROW_PROTO;
1296#else
1297DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64New, uint64_t u64Old) RT_NOTHROW_DEF
1298{
1299# if RT_INLINE_ASM_USES_INTRIN
1300 return (uint64_t)_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old) == u64Old;
1301
1302# elif defined(RT_ARCH_AMD64)
1303# if RT_INLINE_ASM_GNU_STYLE
1304 uint8_t u8Ret;
1305 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1306 "setz %1\n\t"
1307 : "=m" (*pu64)
1308 , "=qm" (u8Ret)
1309 , "=a" (u64Old)
1310 : "r" (u64New)
1311 , "2" (u64Old)
1312 , "m" (*pu64)
1313 : "cc");
1314 return (bool)u8Ret;
1315# else
1316 bool fRet;
1317 __asm
1318 {
1319 mov rdx, [pu32]
1320 mov rax, [u64Old]
1321 mov rcx, [u64New]
1322 lock cmpxchg [rdx], rcx
1323 setz al
1324 mov [fRet], al
1325 }
1326 return fRet;
1327# endif
1328
1329# elif defined(RT_ARCH_X86)
1330 uint32_t u32Ret;
1331# if RT_INLINE_ASM_GNU_STYLE
1332# if defined(PIC) || defined(__PIC__)
1333 uint32_t u32EBX = (uint32_t)u64New;
1334 uint32_t u32Spill;
1335 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
1336 "lock; cmpxchg8b (%6)\n\t"
1337 "setz %%al\n\t"
1338 "movl %4, %%ebx\n\t"
1339 "movzbl %%al, %%eax\n\t"
1340 : "=a" (u32Ret)
1341 , "=d" (u32Spill)
1342# if RT_GNUC_PREREQ(4, 3)
1343 , "+m" (*pu64)
1344# else
1345 , "=m" (*pu64)
1346# endif
1347 : "A" (u64Old)
1348 , "m" ( u32EBX )
1349 , "c" ( (uint32_t)(u64New >> 32) )
1350 , "S" (pu64)
1351 : "cc");
1352# else /* !PIC */
1353 uint32_t u32Spill;
1354 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
1355 "setz %%al\n\t"
1356 "movzbl %%al, %%eax\n\t"
1357 : "=a" (u32Ret)
1358 , "=d" (u32Spill)
1359 , "+m" (*pu64)
1360 : "A" (u64Old)
1361 , "b" ( (uint32_t)u64New )
1362 , "c" ( (uint32_t)(u64New >> 32) )
1363 : "cc");
1364# endif
1365 return (bool)u32Ret;
1366# else
1367 __asm
1368 {
1369 mov ebx, dword ptr [u64New]
1370 mov ecx, dword ptr [u64New + 4]
1371 mov edi, [pu64]
1372 mov eax, dword ptr [u64Old]
1373 mov edx, dword ptr [u64Old + 4]
1374 lock cmpxchg8b [edi]
1375 setz al
1376 movzx eax, al
1377 mov dword ptr [u32Ret], eax
1378 }
1379 return !!u32Ret;
1380# endif
1381
1382# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1383 union { uint32_t u; bool f; } fXchg;
1384 uint64_t u64Spill;
1385 uint32_t rcSpill;
1386 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU64_%=:\n\t"
1387 RTASM_ARM_DMB_SY
1388# if defined(RT_ARCH_ARM64)
1389 "ldaxr %[uOld], %[pMem]\n\t"
1390 "cmp %[uOld], %[uCmp]\n\t"
1391 "bne 1f\n\t" /* stop here if not equal */
1392 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
1393 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1394 "mov %w[fXchg], #1\n\t"
1395# else
1396 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t"
1397 "teq %[uOld], %[uCmp]\n\t"
1398 "teqeq %H[uOld], %H[uCmp]\n\t"
1399 "strexdeq %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
1400 "bne 1f\n\t" /* stop here if not equal */
1401 "cmp %[rc], #0\n\t"
1402 "bne .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1403 "mov %[fXchg], #1\n\t"
1404# endif
1405 "1:\n\t"
1406 : [pMem] "+Q" (*pu64)
1407 , [uOld] "=&r" (u64Spill)
1408 , [rc] "=&r" (rcSpill)
1409 , [fXchg] "=&r" (fXchg.u)
1410 : [uCmp] "r" (u64Old)
1411 , [uNew] "r" (u64New)
1412 , "[fXchg]" (0)
1413 RTASM_ARM_DMB_SY_COMMA_IN_REG
1414 : "cc");
1415 return fXchg.f;
1416
1417# else
1418# error "Port me"
1419# endif
1420}
1421#endif
1422
1423
1424/**
1425 * Atomically Compare and exchange a signed 64-bit value, ordered.
1426 *
1427 * @returns true if xchg was done.
1428 * @returns false if xchg wasn't done.
1429 *
1430 * @param pi64 Pointer to the 64-bit variable to update.
1431 * @param i64 The 64-bit value to assign to *pu64.
1432 * @param i64Old The value to compare with.
1433 *
1434 * @remarks x86: Requires a Pentium or later.
1435 * @todo Rename ASMAtomicCmpWriteS64
1436 */
1437DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old) RT_NOTHROW_DEF
1438{
1439 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old);
1440}
1441
1442#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64) || defined(DOXYGEN_RUNNING)
1443
1444/** @def RTASM_HAVE_CMP_WRITE_U128
1445 * Indicates that we've got ASMAtomicCmpWriteU128(), ASMAtomicCmpWriteU128v2()
1446 * and ASMAtomicCmpWriteExU128() available. */
1447# define RTASM_HAVE_CMP_WRITE_U128 1
1448
1449
1450/**
1451 * Atomically compare and write an unsigned 128-bit value, ordered.
1452 *
1453 * @returns true if write was done.
1454 * @returns false if write wasn't done.
1455 *
1456 * @param pu128 Pointer to the 128-bit variable to update.
1457 * @param u64NewHi The high 64 bits of the value to assign to *pu128.
1458 * @param u64NewLo The low 64 bits of the value to assign to *pu128.
1459 * @param u64OldHi The high 64-bit of the value to compare with.
1460 * @param u64OldLo The low 64-bit of the value to compare with.
1461 *
1462 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
1463 */
1464# if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN)
1465DECLASM(bool) ASMAtomicCmpWriteU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
1466 const uint64_t u64OldHi, const uint64_t u64OldLo) RT_NOTHROW_PROTO;
1467# else
1468DECLINLINE(bool) ASMAtomicCmpWriteU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
1469 const uint64_t u64OldHi, const uint64_t u64OldLo) RT_NOTHROW_DEF
1470{
1471# if RT_INLINE_ASM_USES_INTRIN
1472 __int64 ai64Cmp[2];
1473 ai64Cmp[0] = u64OldLo;
1474 ai64Cmp[1] = u64OldHi;
1475 return _InterlockedCompareExchange128((__int64 volatile *)pu128, u64NewHi, u64NewLo, ai64Cmp) != 0;
1476
1477# elif (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
1478 return __sync_bool_compare_and_swap(pu128, ((uint128_t)u64OldHi << 64) | u64OldLo, ((uint128_t)u64NewHi << 64) | u64NewLo);
1479
1480# elif defined(RT_ARCH_AMD64)
1481# if RT_INLINE_ASM_GNU_STYLE
1482 uint64_t u64Ret;
1483 uint64_t u64Spill;
1484 __asm__ __volatile__("lock; cmpxchg16b %2\n\t"
1485 "setz %%al\n\t"
1486 "movzbl %%al, %%eax\n\t"
1487 : "=a" (u64Ret)
1488 , "=d" (u64Spill)
1489 , "+m" (*pu128)
1490 : "a" (u64OldLo)
1491 , "d" (u64OldHi)
1492 , "b" (u64NewLo)
1493 , "c" (u64NewHi)
1494 : "cc");
1495
1496 return (bool)u64Ret;
1497# else
1498# error "Port me"
1499# endif
1500# else
1501# error "Port me"
1502# endif
1503}
1504# endif
1505
1506
1507/**
1508 * Atomically compare and write an unsigned 128-bit value, ordered.
1509 *
1510 * @returns true if write was done.
1511 * @returns false if write wasn't done.
1512 *
1513 * @param pu128 Pointer to the 128-bit variable to update.
1514 * @param u128New The 128-bit value to assign to *pu128.
1515 * @param u128Old The value to compare with.
1516 *
1517 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
1518 */
1519DECLINLINE(bool) ASMAtomicCmpWriteU128(volatile uint128_t *pu128, const uint128_t u128New, const uint128_t u128Old) RT_NOTHROW_DEF
1520{
1521# ifdef RT_COMPILER_WITH_128BIT_INT_TYPES
1522# if (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
1523 return __sync_bool_compare_and_swap(pu128, u128Old, u128New);
1524# else
1525 return ASMAtomicCmpWriteU128v2(pu128, (uint64_t)(u128New >> 64), (uint64_t)u128New,
1526 (uint64_t)(u128Old >> 64), (uint64_t)u128Old);
1527# endif
1528# else
1529 return ASMAtomicCmpWriteU128v2(pu128, u128New.Hi, u128New.Lo, u128Old.Hi, u128Old.Lo);
1530# endif
1531}
1532
1533
1534/**
1535 * RTUINT128U wrapper for ASMAtomicCmpWriteU128.
1536 */
1537DECLINLINE(bool) ASMAtomicCmpWriteU128U(volatile RTUINT128U *pu128, const RTUINT128U u128New,
1538 const RTUINT128U u128Old) RT_NOTHROW_DEF
1539{
1540# if (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
1541 return ASMAtomicCmpWriteU128(&pu128->u, u128New.u, u128Old.u);
1542# else
1543 return ASMAtomicCmpWriteU128v2(&pu128->u, u128New.s.Hi, u128New.s.Lo, u128Old.s.Hi, u128Old.s.Lo);
1544# endif
1545}
1546
1547#endif /* RT_ARCH_AMD64 || RT_ARCH_ARM64 */
1548
1549/**
1550 * Atomically Compare and Exchange a pointer value, ordered.
1551 *
1552 * @returns true if xchg was done.
1553 * @returns false if xchg wasn't done.
1554 *
1555 * @param ppv Pointer to the value to update.
1556 * @param pvNew The new value to assigned to *ppv.
1557 * @param pvOld The old value to *ppv compare with.
1558 *
1559 * @remarks x86: Requires a 486 or later.
1560 * @todo Rename ASMAtomicCmpWritePtrVoid
1561 */
1562DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld) RT_NOTHROW_DEF
1563{
1564#if ARCH_BITS == 32 || ARCH_BITS == 16
1565 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
1566#elif ARCH_BITS == 64
1567 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
1568#else
1569# error "ARCH_BITS is bogus"
1570#endif
1571}
1572
1573
1574/**
1575 * Atomically Compare and Exchange a pointer value, ordered.
1576 *
1577 * @returns true if xchg was done.
1578 * @returns false if xchg wasn't done.
1579 *
1580 * @param ppv Pointer to the value to update.
1581 * @param pvNew The new value to assigned to *ppv.
1582 * @param pvOld The old value to *ppv compare with.
1583 *
1584 * @remarks This is relatively type safe on GCC platforms.
1585 * @remarks x86: Requires a 486 or later.
1586 * @todo Rename ASMAtomicCmpWritePtr
1587 */
1588#ifdef __GNUC__
1589# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1590 __extension__ \
1591 ({\
1592 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1593 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1594 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1595 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
1596 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
1597 fMacroRet; \
1598 })
1599#else
1600# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1601 ASMAtomicCmpXchgPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld))
1602#endif
1603
1604
1605/** @def ASMAtomicCmpXchgHandle
1606 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1607 *
1608 * @param ph Pointer to the value to update.
1609 * @param hNew The new value to assigned to *pu.
1610 * @param hOld The old value to *pu compare with.
1611 * @param fRc Where to store the result.
1612 *
1613 * @remarks This doesn't currently work for all handles (like RTFILE).
1614 * @remarks x86: Requires a 486 or later.
1615 * @todo Rename ASMAtomicCmpWriteHandle
1616 */
1617#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1618# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1619 do { \
1620 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1621 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1622 } while (0)
1623#elif HC_ARCH_BITS == 64
1624# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1625 do { \
1626 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1627 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1628 } while (0)
1629#else
1630# error HC_ARCH_BITS
1631#endif
1632
1633
1634/** @def ASMAtomicCmpXchgSize
1635 * Atomically Compare and Exchange a value which size might differ
1636 * between platforms or compilers, ordered.
1637 *
1638 * @param pu Pointer to the value to update.
1639 * @param uNew The new value to assigned to *pu.
1640 * @param uOld The old value to *pu compare with.
1641 * @param fRc Where to store the result.
1642 *
1643 * @remarks x86: Requires a 486 or later.
1644 * @todo Rename ASMAtomicCmpWriteSize
1645 */
1646#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1647 do { \
1648 switch (sizeof(*(pu))) { \
1649 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1650 break; \
1651 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1652 break; \
1653 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1654 (fRc) = false; \
1655 break; \
1656 } \
1657 } while (0)
1658
1659
1660/**
1661 * Atomically Compare and Exchange an unsigned 8-bit value, additionally passes
1662 * back old value, ordered.
1663 *
1664 * @returns true if xchg was done.
1665 * @returns false if xchg wasn't done.
1666 *
1667 * @param pu8 Pointer to the value to update.
1668 * @param u8New The new value to assigned to *pu32.
1669 * @param u8Old The old value to *pu8 compare with.
1670 * @param pu8Old Pointer store the old value at.
1671 *
1672 * @remarks x86: Requires a 486 or later.
1673 */
1674#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1675RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old, uint8_t RT_FAR *pu8Old) RT_NOTHROW_PROTO;
1676#else
1677DECLINLINE(bool) ASMAtomicCmpXchgExU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old, uint8_t RT_FAR *pu8Old) RT_NOTHROW_DEF
1678{
1679# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1680# if RT_INLINE_ASM_GNU_STYLE
1681 uint8_t u8Ret;
1682 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
1683 "setz %1\n\t"
1684 : "=m" (*pu8)
1685 , "=qm" (u8Ret)
1686 , "=a" (*pu8Old)
1687# if defined(RT_ARCH_X86)
1688 : "q" (u8New)
1689# else
1690 : "r" (u8New)
1691# endif
1692 , "a" (u8Old)
1693 , "m" (*pu8)
1694 : "cc");
1695 return (bool)u8Ret;
1696
1697# elif RT_INLINE_ASM_USES_INTRIN
1698 return (*pu8Old = _InterlockedCompareExchange8((char RT_FAR *)pu8, u8New, u8Old)) == u8Old;
1699
1700# else
1701 uint8_t u8Ret;
1702 __asm
1703 {
1704# ifdef RT_ARCH_AMD64
1705 mov rdx, [pu8]
1706# else
1707 mov edx, [pu8]
1708# endif
1709 mov eax, [u8Old]
1710 mov ecx, [u8New]
1711# ifdef RT_ARCH_AMD64
1712 lock cmpxchg [rdx], ecx
1713 mov rdx, [pu8Old]
1714 mov [rdx], eax
1715# else
1716 lock cmpxchg [edx], ecx
1717 mov edx, [pu8Old]
1718 mov [edx], eax
1719# endif
1720 setz al
1721 movzx eax, al
1722 mov [u8Ret], eax
1723 }
1724 return !!u8Ret;
1725# endif
1726
1727# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1728 union { uint8_t u; bool f; } fXchg;
1729 uint8_t u8ActualOld;
1730 uint8_t rcSpill;
1731 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgExU8_%=:\n\t"
1732 RTASM_ARM_DMB_SY
1733# if defined(RT_ARCH_ARM64)
1734 "ldaxrb %w[uOld], %[pMem]\n\t"
1735 "cmp %w[uOld], %w[uCmp]\n\t"
1736 "bne 1f\n\t" /* stop here if not equal */
1737 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
1738 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgExU8_%=\n\t"
1739 "mov %w[fXchg], #1\n\t"
1740# else
1741 "ldrexb %[uOld], %[pMem]\n\t"
1742 "teq %[uOld], %[uCmp]\n\t"
1743 "strexbeq %[rc], %[uNew], %[pMem]\n\t"
1744 "bne 1f\n\t" /* stop here if not equal */
1745 "cmp %[rc], #0\n\t"
1746 "bne .Ltry_again_ASMAtomicCmpXchgExU8_%=\n\t"
1747 "mov %[fXchg], #1\n\t"
1748# endif
1749 "1:\n\t"
1750 : [pMem] "+Q" (*pu8)
1751 , [uOld] "=&r" (u8ActualOld)
1752 , [rc] "=&r" (rcSpill)
1753 , [fXchg] "=&r" (fXchg.u)
1754 : [uCmp] "r" (u8Old)
1755 , [uNew] "r" (u8New)
1756 , "[fXchg]" (0)
1757 RTASM_ARM_DMB_SY_COMMA_IN_REG
1758 : "cc");
1759 *pu8Old = u8ActualOld;
1760 return fXchg.f;
1761
1762# else
1763# error "Port me"
1764# endif
1765}
1766#endif
1767
1768
1769/**
1770 * Atomically Compare and Exchange a signed 8-bit value, additionally
1771 * passes back old value, ordered.
1772 *
1773 * @returns true if xchg was done.
1774 * @returns false if xchg wasn't done.
1775 *
1776 * @param pi8 Pointer to the value to update.
1777 * @param i8New The new value to assigned to *pi8.
1778 * @param i8Old The old value to *pi8 compare with.
1779 * @param pi8Old Pointer store the old value at.
1780 *
1781 * @remarks x86: Requires a 486 or later.
1782 */
1783DECLINLINE(bool) ASMAtomicCmpXchgExS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old, int8_t RT_FAR *pi8Old) RT_NOTHROW_DEF
1784{
1785 return ASMAtomicCmpXchgExU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8New, (uint8_t)i8Old, (uint8_t RT_FAR *)pi8Old);
1786}
1787
1788
1789/**
1790 * Atomically Compare and Exchange an unsigned 16-bit value, additionally passes
1791 * back old value, ordered.
1792 *
1793 * @returns true if xchg was done.
1794 * @returns false if xchg wasn't done.
1795 *
1796 * @param pu16 Pointer to the value to update.
1797 * @param u16New The new value to assigned to *pu16.
1798 * @param u16Old The old value to *pu32 compare with.
1799 * @param pu16Old Pointer store the old value at.
1800 *
1801 * @remarks x86: Requires a 486 or later.
1802 */
1803#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1804RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU16(volatile uint16_t RT_FAR *pu16, const uint16_t u16New, const uint16_t u16Old, uint16_t RT_FAR *pu16Old) RT_NOTHROW_PROTO;
1805#else
1806DECLINLINE(bool) ASMAtomicCmpXchgExU16(volatile uint16_t RT_FAR *pu16, const uint16_t u16New, const uint16_t u16Old, uint16_t RT_FAR *pu16Old) RT_NOTHROW_DEF
1807{
1808# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1809# if RT_INLINE_ASM_GNU_STYLE
1810 uint8_t u8Ret;
1811 __asm__ __volatile__("lock; cmpxchgw %3, %0\n\t"
1812 "setz %1\n\t"
1813 : "=m" (*pu16)
1814 , "=qm" (u8Ret)
1815 , "=a" (*pu16Old)
1816 : "r" (u16New)
1817 , "a" (u16Old)
1818 , "m" (*pu16)
1819 : "cc");
1820 return (bool)u8Ret;
1821
1822# elif RT_INLINE_ASM_USES_INTRIN
1823 return (*pu16Old = _InterlockedCompareExchange16((short RT_FAR *)pu16, u16New, u16Old)) == u16Old;
1824
1825# else
1826 uint16_t u16Ret;
1827 __asm
1828 {
1829# ifdef RT_ARCH_AMD64
1830 mov rdx, [pu16]
1831# else
1832 mov edx, [pu16]
1833# endif
1834 mov eax, [u16Old]
1835 mov ecx, [u16New]
1836# ifdef RT_ARCH_AMD64
1837 lock cmpxchg [rdx], ecx
1838 mov rdx, [pu16Old]
1839 mov [rdx], eax
1840# else
1841 lock cmpxchg [edx], ecx
1842 mov edx, [pu16Old]
1843 mov [edx], eax
1844# endif
1845 setz al
1846 movzx eax, al
1847 mov [u16Ret], eax
1848 }
1849 return !!u16Ret;
1850# endif
1851
1852# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1853 union { uint16_t u; bool f; } fXchg;
1854 uint16_t u16ActualOld;
1855 uint16_t rcSpill;
1856 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgExU16_%=:\n\t"
1857 RTASM_ARM_DMB_SY
1858# if defined(RT_ARCH_ARM64)
1859 "ldaxrh %w[uOld], %[pMem]\n\t"
1860 "cmp %w[uOld], %w[uCmp]\n\t"
1861 "bne 1f\n\t" /* stop here if not equal */
1862 "stlxrh %w[rc], %w[uNew], %[pMem]\n\t"
1863 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgExU16_%=\n\t"
1864 "mov %w[fXchg], #1\n\t"
1865# else
1866 "ldrexh %[uOld], %[pMem]\n\t"
1867 "teq %[uOld], %[uCmp]\n\t"
1868 "strexheq %[rc], %[uNew], %[pMem]\n\t"
1869 "bne 1f\n\t" /* stop here if not equal */
1870 "cmp %[rc], #0\n\t"
1871 "bne .Ltry_again_ASMAtomicCmpXchgExU16_%=\n\t"
1872 "mov %[fXchg], #1\n\t"
1873# endif
1874 "1:\n\t"
1875 : [pMem] "+Q" (*pu16)
1876 , [uOld] "=&r" (u16ActualOld)
1877 , [rc] "=&r" (rcSpill)
1878 , [fXchg] "=&r" (fXchg.u)
1879 : [uCmp] "r" (u16Old)
1880 , [uNew] "r" (u16New)
1881 , "[fXchg]" (0)
1882 RTASM_ARM_DMB_SY_COMMA_IN_REG
1883 : "cc");
1884 *pu16Old = u16ActualOld;
1885 return fXchg.f;
1886
1887# else
1888# error "Port me"
1889# endif
1890}
1891#endif
1892
1893
1894/**
1895 * Atomically Compare and Exchange a signed 16-bit value, additionally
1896 * passes back old value, ordered.
1897 *
1898 * @returns true if xchg was done.
1899 * @returns false if xchg wasn't done.
1900 *
1901 * @param pi16 Pointer to the value to update.
1902 * @param i16New The new value to assigned to *pi16.
1903 * @param i16Old The old value to *pi16 compare with.
1904 * @param pi16Old Pointer store the old value at.
1905 *
1906 * @remarks x86: Requires a 486 or later.
1907 */
1908DECLINLINE(bool) ASMAtomicCmpXchgExS16(volatile int16_t RT_FAR *pi16, const int16_t i16New, const int16_t i16Old, int16_t RT_FAR *pi16Old) RT_NOTHROW_DEF
1909{
1910 return ASMAtomicCmpXchgExU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16New, (uint16_t)i16Old, (uint16_t RT_FAR *)pi16Old);
1911}
1912
1913
1914/**
1915 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1916 * passes back old value, ordered.
1917 *
1918 * @returns true if xchg was done.
1919 * @returns false if xchg wasn't done.
1920 *
1921 * @param pu32 Pointer to the value to update.
1922 * @param u32New The new value to assigned to *pu32.
1923 * @param u32Old The old value to *pu32 compare with.
1924 * @param pu32Old Pointer store the old value at.
1925 *
1926 * @remarks x86: Requires a 486 or later.
1927 */
1928#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1929RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_PROTO;
1930#else
1931DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_DEF
1932{
1933# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1934# if RT_INLINE_ASM_GNU_STYLE
1935 uint8_t u8Ret;
1936 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1937 "setz %1\n\t"
1938 : "=m" (*pu32)
1939 , "=qm" (u8Ret)
1940 , "=a" (*pu32Old)
1941 : "r" (u32New)
1942 , "a" (u32Old)
1943 , "m" (*pu32)
1944 : "cc");
1945 return (bool)u8Ret;
1946
1947# elif RT_INLINE_ASM_USES_INTRIN
1948 return (*pu32Old = _InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old)) == u32Old;
1949
1950# else
1951 uint32_t u32Ret;
1952 __asm
1953 {
1954# ifdef RT_ARCH_AMD64
1955 mov rdx, [pu32]
1956# else
1957 mov edx, [pu32]
1958# endif
1959 mov eax, [u32Old]
1960 mov ecx, [u32New]
1961# ifdef RT_ARCH_AMD64
1962 lock cmpxchg [rdx], ecx
1963 mov rdx, [pu32Old]
1964 mov [rdx], eax
1965# else
1966 lock cmpxchg [edx], ecx
1967 mov edx, [pu32Old]
1968 mov [edx], eax
1969# endif
1970 setz al
1971 movzx eax, al
1972 mov [u32Ret], eax
1973 }
1974 return !!u32Ret;
1975# endif
1976
1977# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1978 union { uint32_t u; bool f; } fXchg;
1979 uint32_t u32ActualOld;
1980 uint32_t rcSpill;
1981 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgExU32_%=:\n\t"
1982 RTASM_ARM_DMB_SY
1983# if defined(RT_ARCH_ARM64)
1984 "ldaxr %w[uOld], %[pMem]\n\t"
1985 "cmp %w[uOld], %w[uCmp]\n\t"
1986 "bne 1f\n\t" /* stop here if not equal */
1987 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
1988 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgExU32_%=\n\t"
1989 "mov %w[fXchg], #1\n\t"
1990# else
1991 "ldrex %[uOld], %[pMem]\n\t"
1992 "teq %[uOld], %[uCmp]\n\t"
1993 "strexeq %[rc], %[uNew], %[pMem]\n\t"
1994 "bne 1f\n\t" /* stop here if not equal */
1995 "cmp %[rc], #0\n\t"
1996 "bne .Ltry_again_ASMAtomicCmpXchgExU32_%=\n\t"
1997 "mov %[fXchg], #1\n\t"
1998# endif
1999 "1:\n\t"
2000 : [pMem] "+Q" (*pu32)
2001 , [uOld] "=&r" (u32ActualOld)
2002 , [rc] "=&r" (rcSpill)
2003 , [fXchg] "=&r" (fXchg.u)
2004 : [uCmp] "r" (u32Old)
2005 , [uNew] "r" (u32New)
2006 , "[fXchg]" (0)
2007 RTASM_ARM_DMB_SY_COMMA_IN_REG
2008 : "cc");
2009 *pu32Old = u32ActualOld;
2010 return fXchg.f;
2011
2012# else
2013# error "Port me"
2014# endif
2015}
2016#endif
2017
2018
2019/**
2020 * Atomically Compare and Exchange a signed 32-bit value, additionally
2021 * passes back old value, ordered.
2022 *
2023 * @returns true if xchg was done.
2024 * @returns false if xchg wasn't done.
2025 *
2026 * @param pi32 Pointer to the value to update.
2027 * @param i32New The new value to assigned to *pi32.
2028 * @param i32Old The old value to *pi32 compare with.
2029 * @param pi32Old Pointer store the old value at.
2030 *
2031 * @remarks x86: Requires a 486 or later.
2032 */
2033DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old, int32_t RT_FAR *pi32Old) RT_NOTHROW_DEF
2034{
2035 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t RT_FAR *)pi32Old);
2036}
2037
2038
2039/**
2040 * Atomically Compare and exchange an unsigned 64-bit value, additionally
2041 * passing back old value, ordered.
2042 *
2043 * @returns true if xchg was done.
2044 * @returns false if xchg wasn't done.
2045 *
2046 * @param pu64 Pointer to the 64-bit variable to update.
2047 * @param u64New The 64-bit value to assign to *pu64.
2048 * @param u64Old The value to compare with.
2049 * @param pu64Old Pointer store the old value at.
2050 *
2051 * @remarks x86: Requires a Pentium or later.
2052 */
2053#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
2054 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
2055RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_PROTO;
2056#else
2057DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_DEF
2058{
2059# if RT_INLINE_ASM_USES_INTRIN
2060 return (*pu64Old =_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old)) == u64Old;
2061
2062# elif defined(RT_ARCH_AMD64)
2063# if RT_INLINE_ASM_GNU_STYLE
2064 uint8_t u8Ret;
2065 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2066 "setz %1\n\t"
2067 : "=m" (*pu64)
2068 , "=qm" (u8Ret)
2069 , "=a" (*pu64Old)
2070 : "r" (u64New)
2071 , "a" (u64Old)
2072 , "m" (*pu64)
2073 : "cc");
2074 return (bool)u8Ret;
2075# else
2076 bool fRet;
2077 __asm
2078 {
2079 mov rdx, [pu32]
2080 mov rax, [u64Old]
2081 mov rcx, [u64New]
2082 lock cmpxchg [rdx], rcx
2083 mov rdx, [pu64Old]
2084 mov [rdx], rax
2085 setz al
2086 mov [fRet], al
2087 }
2088 return fRet;
2089# endif
2090
2091# elif defined(RT_ARCH_X86)
2092# if RT_INLINE_ASM_GNU_STYLE
2093 uint64_t u64Ret;
2094# if defined(PIC) || defined(__PIC__)
2095 /* Note #1: This code uses a memory clobber description, because the clean
2096 solution with an output value for *pu64 makes gcc run out of
2097 registers. This will cause suboptimal code, and anyone with a
2098 better solution is welcome to improve this.
2099
2100 Note #2: We must prevent gcc from encoding the memory access, as it
2101 may go via the GOT if we're working on a global variable (like
2102 in the testcase). Thus we request a register (%3) and
2103 dereference it ourselves. */
2104 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
2105 "lock; cmpxchg8b (%3)\n\t"
2106 "xchgl %%ebx, %1\n\t"
2107 : "=A" (u64Ret)
2108 : "DS" ((uint32_t)u64New)
2109 , "c" ((uint32_t)(u64New >> 32))
2110 , "r" (pu64) /* Do not use "m" here*/
2111 , "0" (u64Old)
2112 : "memory"
2113 , "cc" );
2114# else /* !PIC */
2115 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
2116 : "=A" (u64Ret)
2117 , "=m" (*pu64)
2118 : "b" ((uint32_t)u64New)
2119 , "c" ((uint32_t)(u64New >> 32))
2120 , "m" (*pu64)
2121 , "0" (u64Old)
2122 : "cc");
2123# endif
2124 *pu64Old = u64Ret;
2125 return u64Ret == u64Old;
2126# else
2127 uint32_t u32Ret;
2128 __asm
2129 {
2130 mov ebx, dword ptr [u64New]
2131 mov ecx, dword ptr [u64New + 4]
2132 mov edi, [pu64]
2133 mov eax, dword ptr [u64Old]
2134 mov edx, dword ptr [u64Old + 4]
2135 lock cmpxchg8b [edi]
2136 mov ebx, [pu64Old]
2137 mov [ebx], eax
2138 setz al
2139 movzx eax, al
2140 add ebx, 4
2141 mov [ebx], edx
2142 mov dword ptr [u32Ret], eax
2143 }
2144 return !!u32Ret;
2145# endif
2146
2147# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2148 union { uint32_t u; bool f; } fXchg;
2149 uint64_t u64ActualOld;
2150 uint32_t rcSpill;
2151 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU64_%=:\n\t"
2152 RTASM_ARM_DMB_SY
2153# if defined(RT_ARCH_ARM64)
2154 "ldaxr %[uOld], %[pMem]\n\t"
2155 "cmp %[uOld], %[uCmp]\n\t"
2156 "bne 1f\n\t" /* stop here if not equal */
2157 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
2158 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
2159 "mov %w[fXchg], #1\n\t"
2160# else
2161 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t"
2162 "teq %[uOld], %[uCmp]\n\t"
2163 "teqeq %H[uOld], %H[uCmp]\n\t"
2164 "strexdeq %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
2165 "bne 1f\n\t" /* stop here if not equal */
2166 "cmp %[rc], #0\n\t"
2167 "bne .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
2168 "mov %[fXchg], #1\n\t"
2169# endif
2170 "1:\n\t"
2171 : [pMem] "+Q" (*pu64)
2172 , [uOld] "=&r" (u64ActualOld)
2173 , [rc] "=&r" (rcSpill)
2174 , [fXchg] "=&r" (fXchg.u)
2175 : [uCmp] "r" (u64Old)
2176 , [uNew] "r" (u64New)
2177 , "[fXchg]" (0)
2178 RTASM_ARM_DMB_SY_COMMA_IN_REG
2179 : "cc");
2180 *pu64Old = u64ActualOld;
2181 return fXchg.f;
2182
2183# else
2184# error "Port me"
2185# endif
2186}
2187#endif
2188
2189
2190/**
2191 * Atomically Compare and exchange a signed 64-bit value, additionally
2192 * passing back old value, ordered.
2193 *
2194 * @returns true if xchg was done.
2195 * @returns false if xchg wasn't done.
2196 *
2197 * @param pi64 Pointer to the 64-bit variable to update.
2198 * @param i64 The 64-bit value to assign to *pu64.
2199 * @param i64Old The value to compare with.
2200 * @param pi64Old Pointer store the old value at.
2201 *
2202 * @remarks x86: Requires a Pentium or later.
2203 */
2204DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old, int64_t RT_FAR *pi64Old) RT_NOTHROW_DEF
2205{
2206 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t RT_FAR *)pi64Old);
2207}
2208
2209#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64) || defined(DOXYGEN_RUNNING)
2210
2211/** @def RTASM_HAVE_CMP_XCHG_U128
2212 * Indicates that we've got ASMAtomicCmpSwapU128(), ASMAtomicCmpSwapU128v2()
2213 * and ASMAtomicCmpSwapExU128() available. */
2214# define RTASM_HAVE_CMP_XCHG_U128 1
2215
2216
2217/**
2218 * Atomically compare and exchange an unsigned 128-bit value, ordered.
2219 *
2220 * @returns true if exchange was done.
2221 * @returns false if exchange wasn't done.
2222 *
2223 * @param pu128 Pointer to the 128-bit variable to update.
2224 * @param u64NewHi The high 64 bits of the value to assign to *pu128.
2225 * @param u64NewLo The low 64 bits of the value to assign to *pu128.
2226 * @param u64OldHi The high 64-bit of the value to compare with.
2227 * @param u64OldLo The low 64-bit of the value to compare with.
2228 * @param pu128Old Where to return the old value.
2229 *
2230 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
2231 */
2232# if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN)
2233DECLASM(bool) ASMAtomicCmpXchgU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
2234 const uint64_t u64OldHi, const uint64_t u64OldLo, uint128_t *pu128Old) RT_NOTHROW_PROTO;
2235# else
2236DECLINLINE(bool) ASMAtomicCmpXchgU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
2237 const uint64_t u64OldHi, const uint64_t u64OldLo, uint128_t *pu128Old) RT_NOTHROW_DEF
2238{
2239# if RT_INLINE_ASM_USES_INTRIN
2240 pu128Old->Hi = u64OldHi;
2241 pu128Old->Lo = u64OldLo;
2242 AssertCompileMemberOffset(uint128_t, Lo, 0);
2243 return _InterlockedCompareExchange128((__int64 volatile *)pu128, u64NewHi, u64NewLo, (__int64 *)&pu128Old->Lo) != 0;
2244
2245# elif (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
2246 uint128_t const uCmp = ((uint128_t)u64OldHi << 64) | u64OldLo;
2247 uint128_t const uOld = __sync_val_compare_and_swap(pu128, uCmp, ((uint128_t)u64NewHi << 64) | u64NewLo);
2248 *pu128Old = uOld;
2249 return uCmp == uOld;
2250
2251# elif defined(RT_ARCH_AMD64)
2252# if RT_INLINE_ASM_GNU_STYLE
2253 uint8_t bRet;
2254 uint64_t u64RetHi, u64RetLo;
2255 __asm__ __volatile__("lock; cmpxchg16b %3\n\t"
2256 "setz %b0\n\t"
2257 : "=r" (bRet)
2258 , "=a" (u64RetLo)
2259 , "=d" (u64RetHi)
2260 , "+m" (*pu128)
2261 : "a" (u64OldLo)
2262 , "d" (u64OldHi)
2263 , "b" (u64NewLo)
2264 , "c" (u64NewHi)
2265 : "cc");
2266 *pu128Old = ((uint128_t)u64RetHi << 64) | u64RetLo;
2267 return (bool)bRet;
2268# else
2269# error "Port me"
2270# endif
2271# else
2272# error "Port me"
2273# endif
2274}
2275# endif
2276
2277
2278/**
2279 * Atomically compare and exchange an unsigned 128-bit value, ordered.
2280 *
2281 * @returns true if exchange was done.
2282 * @returns false if exchange wasn't done.
2283 *
2284 * @param pu128 Pointer to the 128-bit variable to update.
2285 * @param u128New The 128-bit value to assign to *pu128.
2286 * @param u128Old The value to compare with.
2287 * @param pu128Old Where to return the old value.
2288 *
2289 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
2290 */
2291DECLINLINE(bool) ASMAtomicCmpXchgU128(volatile uint128_t *pu128, const uint128_t u128New,
2292 const uint128_t u128Old, uint128_t *pu128Old) RT_NOTHROW_DEF
2293{
2294# ifdef RT_COMPILER_WITH_128BIT_INT_TYPES
2295# if (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
2296 uint128_t const uSwapped = __sync_val_compare_and_swap(pu128, u128Old, u128New);
2297 *pu128Old = uSwapped;
2298 return uSwapped == u128Old;
2299# else
2300 return ASMAtomicCmpXchgU128v2(pu128, (uint64_t)(u128New >> 64), (uint64_t)u128New,
2301 (uint64_t)(u128Old >> 64), (uint64_t)u128Old, pu128Old);
2302# endif
2303# else
2304 return ASMAtomicCmpXchgU128v2(pu128, u128New.Hi, u128New.Lo, u128Old.Hi, u128Old.Lo, pu128Old);
2305# endif
2306}
2307
2308
2309/**
2310 * RTUINT128U wrapper for ASMAtomicCmpXchgU128.
2311 */
2312DECLINLINE(bool) ASMAtomicCmpXchgU128U(volatile RTUINT128U *pu128, const RTUINT128U u128New,
2313 const RTUINT128U u128Old, PRTUINT128U pu128Old) RT_NOTHROW_DEF
2314{
2315# if (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
2316 return ASMAtomicCmpXchgU128(&pu128->u, u128New.u, u128Old.u, &pu128Old->u);
2317# else
2318 return ASMAtomicCmpXchgU128v2(&pu128->u, u128New.s.Hi, u128New.s.Lo, u128Old.s.Hi, u128Old.s.Lo, &pu128Old->u);
2319# endif
2320}
2321
2322#endif /* RT_ARCH_AMD64 || RT_ARCH_ARM64 */
2323
2324
2325
2326/** @def ASMAtomicCmpXchgExHandle
2327 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
2328 *
2329 * @param ph Pointer to the value to update.
2330 * @param hNew The new value to assigned to *pu.
2331 * @param hOld The old value to *pu compare with.
2332 * @param fRc Where to store the result.
2333 * @param phOldVal Pointer to where to store the old value.
2334 *
2335 * @remarks This doesn't currently work for all handles (like RTFILE).
2336 */
2337#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2338# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
2339 do { \
2340 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
2341 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
2342 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(ph), (uint32_t)(hNew), (uint32_t)(hOld), (uint32_t RT_FAR *)(phOldVal)); \
2343 } while (0)
2344#elif HC_ARCH_BITS == 64
2345# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
2346 do { \
2347 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2348 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
2349 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(ph), (uint64_t)(hNew), (uint64_t)(hOld), (uint64_t RT_FAR *)(phOldVal)); \
2350 } while (0)
2351#else
2352# error HC_ARCH_BITS
2353#endif
2354
2355
2356/** @def ASMAtomicCmpXchgExSize
2357 * Atomically Compare and Exchange a value which size might differ
2358 * between platforms or compilers. Additionally passes back old value.
2359 *
2360 * @param pu Pointer to the value to update.
2361 * @param uNew The new value to assigned to *pu.
2362 * @param uOld The old value to *pu compare with.
2363 * @param fRc Where to store the result.
2364 * @param puOldVal Pointer to where to store the old value.
2365 *
2366 * @remarks x86: Requires a 486 or later.
2367 */
2368#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
2369 do { \
2370 switch (sizeof(*(pu))) { \
2371 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(uOldVal)); \
2372 break; \
2373 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(uOldVal)); \
2374 break; \
2375 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2376 (fRc) = false; \
2377 (uOldVal) = 0; \
2378 break; \
2379 } \
2380 } while (0)
2381
2382
2383/**
2384 * Atomically Compare and Exchange a pointer value, additionally
2385 * passing back old value, ordered.
2386 *
2387 * @returns true if xchg was done.
2388 * @returns false if xchg wasn't done.
2389 *
2390 * @param ppv Pointer to the value to update.
2391 * @param pvNew The new value to assigned to *ppv.
2392 * @param pvOld The old value to *ppv compare with.
2393 * @param ppvOld Pointer store the old value at.
2394 *
2395 * @remarks x86: Requires a 486 or later.
2396 */
2397DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld,
2398 void RT_FAR * RT_FAR *ppvOld) RT_NOTHROW_DEF
2399{
2400#if ARCH_BITS == 32 || ARCH_BITS == 16
2401 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t RT_FAR *)ppvOld);
2402#elif ARCH_BITS == 64
2403 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t RT_FAR *)ppvOld);
2404#else
2405# error "ARCH_BITS is bogus"
2406#endif
2407}
2408
2409
2410/**
2411 * Atomically Compare and Exchange a pointer value, additionally
2412 * passing back old value, ordered.
2413 *
2414 * @returns true if xchg was done.
2415 * @returns false if xchg wasn't done.
2416 *
2417 * @param ppv Pointer to the value to update.
2418 * @param pvNew The new value to assigned to *ppv.
2419 * @param pvOld The old value to *ppv compare with.
2420 * @param ppvOld Pointer store the old value at.
2421 *
2422 * @remarks This is relatively type safe on GCC platforms.
2423 * @remarks x86: Requires a 486 or later.
2424 */
2425#ifdef __GNUC__
2426# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
2427 __extension__ \
2428 ({\
2429 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2430 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
2431 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
2432 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
2433 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
2434 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
2435 (void **)ppvOldTypeChecked); \
2436 fMacroRet; \
2437 })
2438#else
2439# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
2440 ASMAtomicCmpXchgExPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld), (void RT_FAR * RT_FAR *)(ppvOld))
2441#endif
2442
2443
2444/**
2445 * Virtualization unfriendly serializing instruction, always exits.
2446 */
2447#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2448RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_PROTO;
2449#else
2450DECLINLINE(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_DEF
2451{
2452# if RT_INLINE_ASM_GNU_STYLE
2453 RTCCUINTREG xAX = 0;
2454# ifdef RT_ARCH_AMD64
2455 __asm__ __volatile__ ("cpuid"
2456 : "=a" (xAX)
2457 : "0" (xAX)
2458 : "rbx", "rcx", "rdx", "memory");
2459# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
2460 __asm__ __volatile__ ("push %%ebx\n\t"
2461 "cpuid\n\t"
2462 "pop %%ebx\n\t"
2463 : "=a" (xAX)
2464 : "0" (xAX)
2465 : "ecx", "edx", "memory");
2466# else
2467 __asm__ __volatile__ ("cpuid"
2468 : "=a" (xAX)
2469 : "0" (xAX)
2470 : "ebx", "ecx", "edx", "memory");
2471# endif
2472
2473# elif RT_INLINE_ASM_USES_INTRIN
2474 int aInfo[4];
2475 _ReadWriteBarrier();
2476 __cpuid(aInfo, 0);
2477
2478# else
2479 __asm
2480 {
2481 push ebx
2482 xor eax, eax
2483 cpuid
2484 pop ebx
2485 }
2486# endif
2487}
2488#endif
2489
2490/**
2491 * Virtualization friendly serializing instruction, though more expensive.
2492 */
2493#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2494RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_PROTO;
2495#else
2496DECLINLINE(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_DEF
2497{
2498# if RT_INLINE_ASM_GNU_STYLE
2499# ifdef RT_ARCH_AMD64
2500 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
2501 "subq $128, %%rsp\n\t" /*redzone*/
2502 "mov %%ss, %%eax\n\t"
2503 "pushq %%rax\n\t"
2504 "pushq %%r10\n\t"
2505 "pushfq\n\t"
2506 "movl %%cs, %%eax\n\t"
2507 "pushq %%rax\n\t"
2508 "leaq 1f(%%rip), %%rax\n\t"
2509 "pushq %%rax\n\t"
2510 "iretq\n\t"
2511 "1:\n\t"
2512 ::: "rax", "r10", "memory", "cc");
2513# else
2514 __asm__ __volatile__ ("pushfl\n\t"
2515 "pushl %%cs\n\t"
2516 "pushl $1f\n\t"
2517 "iretl\n\t"
2518 "1:\n\t"
2519 ::: "memory");
2520# endif
2521
2522# else
2523 __asm
2524 {
2525 pushfd
2526 push cs
2527 push la_ret
2528 iretd
2529 la_ret:
2530 }
2531# endif
2532}
2533#endif
2534
2535/**
2536 * Virtualization friendlier serializing instruction, may still cause exits.
2537 */
2538#if (RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < RT_MSC_VER_VS2008) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2539RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_PROTO;
2540#else
2541DECLINLINE(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_DEF
2542{
2543# if RT_INLINE_ASM_GNU_STYLE
2544 /* rdtscp is not supported by ancient linux build VM of course :-( */
2545# ifdef RT_ARCH_AMD64
2546 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
2547 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
2548# else
2549 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
2550 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
2551# endif
2552# else
2553# if RT_INLINE_ASM_USES_INTRIN >= RT_MSC_VER_VS2008
2554 uint32_t uIgnore;
2555 _ReadWriteBarrier();
2556 (void)__rdtscp(&uIgnore);
2557 (void)uIgnore;
2558# else
2559 __asm
2560 {
2561 rdtscp
2562 }
2563# endif
2564# endif
2565}
2566#endif
2567
2568
2569/**
2570 * Serialize Instruction (both data store and instruction flush).
2571 */
2572#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
2573# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
2574#elif defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
2575# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
2576#elif defined(RT_ARCH_SPARC64)
2577RTDECL(void) ASMSerializeInstruction(void) RT_NOTHROW_PROTO;
2578#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2579DECLINLINE(void) ASMSerializeInstruction(void) RT_NOTHROW_DEF
2580{
2581 __asm__ __volatile__ (RTASM_ARM_DSB_SY :: RTASM_ARM_DSB_SY_IN_REG :);
2582}
2583#else
2584# error "Port me"
2585#endif
2586
2587
2588/**
2589 * Memory fence, waits for any pending writes and reads to complete.
2590 * @note No implicit compiler barrier (which is probably stupid).
2591 */
2592DECLINLINE(void) ASMMemoryFence(void) RT_NOTHROW_DEF
2593{
2594#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2595# if RT_INLINE_ASM_GNU_STYLE
2596 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
2597# elif RT_INLINE_ASM_USES_INTRIN
2598 _mm_mfence();
2599# else
2600 __asm
2601 {
2602 _emit 0x0f
2603 _emit 0xae
2604 _emit 0xf0
2605 }
2606# endif
2607#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2608 __asm__ __volatile__ (RTASM_ARM_DMB_SY :: RTASM_ARM_DMB_SY_IN_REG :);
2609#elif ARCH_BITS == 16
2610 uint16_t volatile u16;
2611 ASMAtomicXchgU16(&u16, 0);
2612#else
2613 uint32_t volatile u32;
2614 ASMAtomicXchgU32(&u32, 0);
2615#endif
2616}
2617
2618
2619/**
2620 * Write fence, waits for any pending writes to complete.
2621 * @note No implicit compiler barrier (which is probably stupid).
2622 */
2623DECLINLINE(void) ASMWriteFence(void) RT_NOTHROW_DEF
2624{
2625#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2626# if RT_INLINE_ASM_GNU_STYLE
2627 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
2628# elif RT_INLINE_ASM_USES_INTRIN
2629 _mm_sfence();
2630# else
2631 __asm
2632 {
2633 _emit 0x0f
2634 _emit 0xae
2635 _emit 0xf8
2636 }
2637# endif
2638#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2639 __asm__ __volatile__ (RTASM_ARM_DMB_ST :: RTASM_ARM_DMB_ST_IN_REG :);
2640#else
2641 ASMMemoryFence();
2642#endif
2643}
2644
2645
2646/**
2647 * Read fence, waits for any pending reads to complete.
2648 * @note No implicit compiler barrier (which is probably stupid).
2649 */
2650DECLINLINE(void) ASMReadFence(void) RT_NOTHROW_DEF
2651{
2652#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2653# if RT_INLINE_ASM_GNU_STYLE
2654 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
2655# elif RT_INLINE_ASM_USES_INTRIN
2656 _mm_lfence();
2657# else
2658 __asm
2659 {
2660 _emit 0x0f
2661 _emit 0xae
2662 _emit 0xe8
2663 }
2664# endif
2665#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2666 __asm__ __volatile__ (RTASM_ARM_DMB_LD :: RTASM_ARM_DMB_LD_IN_REG :);
2667#else
2668 ASMMemoryFence();
2669#endif
2670}
2671
2672
2673/**
2674 * Atomically reads an unsigned 8-bit value, ordered.
2675 *
2676 * @returns Current *pu8 value
2677 * @param pu8 Pointer to the 8-bit variable to read.
2678 */
2679DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
2680{
2681#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2682 uint32_t u32;
2683 __asm__ __volatile__(".Lstart_ASMAtomicReadU8_%=:\n\t"
2684 RTASM_ARM_DMB_SY
2685# if defined(RT_ARCH_ARM64)
2686 "ldxrb %w[uDst], %[pMem]\n\t"
2687# else
2688 "ldrexb %[uDst], %[pMem]\n\t"
2689# endif
2690 : [uDst] "=&r" (u32)
2691 : [pMem] "Q" (*pu8)
2692 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2693 return (uint8_t)u32;
2694#else
2695 ASMMemoryFence();
2696 return *pu8; /* byte reads are atomic on x86 */
2697#endif
2698}
2699
2700
2701/**
2702 * Atomically reads an unsigned 8-bit value, unordered.
2703 *
2704 * @returns Current *pu8 value
2705 * @param pu8 Pointer to the 8-bit variable to read.
2706 */
2707DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
2708{
2709#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2710 uint32_t u32;
2711 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU8_%=:\n\t"
2712# if defined(RT_ARCH_ARM64)
2713 "ldxrb %w[uDst], %[pMem]\n\t"
2714# else
2715 "ldrexb %[uDst], %[pMem]\n\t"
2716# endif
2717 : [uDst] "=&r" (u32)
2718 : [pMem] "Q" (*pu8));
2719 return (uint8_t)u32;
2720#else
2721 return *pu8; /* byte reads are atomic on x86 */
2722#endif
2723}
2724
2725
2726/**
2727 * Atomically reads a signed 8-bit value, ordered.
2728 *
2729 * @returns Current *pi8 value
2730 * @param pi8 Pointer to the 8-bit variable to read.
2731 */
2732DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
2733{
2734 ASMMemoryFence();
2735#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2736 int32_t i32;
2737 __asm__ __volatile__(".Lstart_ASMAtomicReadS8_%=:\n\t"
2738 RTASM_ARM_DMB_SY
2739# if defined(RT_ARCH_ARM64)
2740 "ldxrb %w[iDst], %[pMem]\n\t"
2741# else
2742 "ldrexb %[iDst], %[pMem]\n\t"
2743# endif
2744 : [iDst] "=&r" (i32)
2745 : [pMem] "Q" (*pi8)
2746 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2747 return (int8_t)i32;
2748#else
2749 return *pi8; /* byte reads are atomic on x86 */
2750#endif
2751}
2752
2753
2754/**
2755 * Atomically reads a signed 8-bit value, unordered.
2756 *
2757 * @returns Current *pi8 value
2758 * @param pi8 Pointer to the 8-bit variable to read.
2759 */
2760DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
2761{
2762#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2763 int32_t i32;
2764 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS8_%=:\n\t"
2765# if defined(RT_ARCH_ARM64)
2766 "ldxrb %w[iDst], %[pMem]\n\t"
2767# else
2768 "ldrexb %[iDst], %[pMem]\n\t"
2769# endif
2770 : [iDst] "=&r" (i32)
2771 : [pMem] "Q" (*pi8));
2772 return (int8_t)i32;
2773#else
2774 return *pi8; /* byte reads are atomic on x86 */
2775#endif
2776}
2777
2778
2779/**
2780 * Atomically reads an unsigned 16-bit value, ordered.
2781 *
2782 * @returns Current *pu16 value
2783 * @param pu16 Pointer to the 16-bit variable to read.
2784 */
2785DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
2786{
2787 Assert(!((uintptr_t)pu16 & 1));
2788#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2789 uint32_t u32;
2790 __asm__ __volatile__(".Lstart_ASMAtomicReadU16_%=:\n\t"
2791 RTASM_ARM_DMB_SY
2792# if defined(RT_ARCH_ARM64)
2793 "ldxrh %w[uDst], %[pMem]\n\t"
2794# else
2795 "ldrexh %[uDst], %[pMem]\n\t"
2796# endif
2797 : [uDst] "=&r" (u32)
2798 : [pMem] "Q" (*pu16)
2799 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2800 return (uint16_t)u32;
2801#else
2802 ASMMemoryFence();
2803 return *pu16;
2804#endif
2805}
2806
2807
2808/**
2809 * Atomically reads an unsigned 16-bit value, unordered.
2810 *
2811 * @returns Current *pu16 value
2812 * @param pu16 Pointer to the 16-bit variable to read.
2813 */
2814DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
2815{
2816 Assert(!((uintptr_t)pu16 & 1));
2817#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2818 uint32_t u32;
2819 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU16_%=:\n\t"
2820# if defined(RT_ARCH_ARM64)
2821 "ldxrh %w[uDst], %[pMem]\n\t"
2822# else
2823 "ldrexh %[uDst], %[pMem]\n\t"
2824# endif
2825 : [uDst] "=&r" (u32)
2826 : [pMem] "Q" (*pu16));
2827 return (uint16_t)u32;
2828#else
2829 return *pu16;
2830#endif
2831}
2832
2833
2834/**
2835 * Atomically reads a signed 16-bit value, ordered.
2836 *
2837 * @returns Current *pi16 value
2838 * @param pi16 Pointer to the 16-bit variable to read.
2839 */
2840DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
2841{
2842 Assert(!((uintptr_t)pi16 & 1));
2843#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2844 int32_t i32;
2845 __asm__ __volatile__(".Lstart_ASMAtomicReadS16_%=:\n\t"
2846 RTASM_ARM_DMB_SY
2847# if defined(RT_ARCH_ARM64)
2848 "ldxrh %w[iDst], %[pMem]\n\t"
2849# else
2850 "ldrexh %[iDst], %[pMem]\n\t"
2851# endif
2852 : [iDst] "=&r" (i32)
2853 : [pMem] "Q" (*pi16)
2854 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2855 return (int16_t)i32;
2856#else
2857 ASMMemoryFence();
2858 return *pi16;
2859#endif
2860}
2861
2862
2863/**
2864 * Atomically reads a signed 16-bit value, unordered.
2865 *
2866 * @returns Current *pi16 value
2867 * @param pi16 Pointer to the 16-bit variable to read.
2868 */
2869DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
2870{
2871 Assert(!((uintptr_t)pi16 & 1));
2872#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2873 int32_t i32;
2874 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS16_%=:\n\t"
2875# if defined(RT_ARCH_ARM64)
2876 "ldxrh %w[iDst], %[pMem]\n\t"
2877# else
2878 "ldrexh %[iDst], %[pMem]\n\t"
2879# endif
2880 : [iDst] "=&r" (i32)
2881 : [pMem] "Q" (*pi16));
2882 return (int16_t)i32;
2883#else
2884 return *pi16;
2885#endif
2886}
2887
2888
2889/**
2890 * Atomically reads an unsigned 32-bit value, ordered.
2891 *
2892 * @returns Current *pu32 value
2893 * @param pu32 Pointer to the 32-bit variable to read.
2894 */
2895DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
2896{
2897 Assert(!((uintptr_t)pu32 & 3));
2898#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2899 uint32_t u32;
2900 __asm__ __volatile__(".Lstart_ASMAtomicReadU32_%=:\n\t"
2901 RTASM_ARM_DMB_SY
2902# if defined(RT_ARCH_ARM64)
2903 "ldxr %w[uDst], %[pMem]\n\t"
2904# else
2905 "ldrex %[uDst], %[pMem]\n\t"
2906# endif
2907 : [uDst] "=&r" (u32)
2908 : [pMem] "Q" (*pu32)
2909 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2910 return u32;
2911#else
2912 ASMMemoryFence();
2913# if ARCH_BITS == 16
2914 AssertFailed(); /** @todo 16-bit */
2915# endif
2916 return *pu32;
2917#endif
2918}
2919
2920
2921/**
2922 * Atomically reads an unsigned 32-bit value, unordered.
2923 *
2924 * @returns Current *pu32 value
2925 * @param pu32 Pointer to the 32-bit variable to read.
2926 */
2927DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
2928{
2929 Assert(!((uintptr_t)pu32 & 3));
2930#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2931 uint32_t u32;
2932 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU32_%=:\n\t"
2933# if defined(RT_ARCH_ARM64)
2934 "ldxr %w[uDst], %[pMem]\n\t"
2935# else
2936 "ldrex %[uDst], %[pMem]\n\t"
2937# endif
2938 : [uDst] "=&r" (u32)
2939 : [pMem] "Q" (*pu32));
2940 return u32;
2941#else
2942# if ARCH_BITS == 16
2943 AssertFailed(); /** @todo 16-bit */
2944# endif
2945 return *pu32;
2946#endif
2947}
2948
2949
2950/**
2951 * Atomically reads a signed 32-bit value, ordered.
2952 *
2953 * @returns Current *pi32 value
2954 * @param pi32 Pointer to the 32-bit variable to read.
2955 */
2956DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
2957{
2958 Assert(!((uintptr_t)pi32 & 3));
2959#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2960 int32_t i32;
2961 __asm__ __volatile__(".Lstart_ASMAtomicReadS32_%=:\n\t"
2962 RTASM_ARM_DMB_SY
2963# if defined(RT_ARCH_ARM64)
2964 "ldxr %w[iDst], %[pMem]\n\t"
2965# else
2966 "ldrex %[iDst], %[pMem]\n\t"
2967# endif
2968 : [iDst] "=&r" (i32)
2969 : [pMem] "Q" (*pi32)
2970 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2971 return i32;
2972#else
2973 ASMMemoryFence();
2974# if ARCH_BITS == 16
2975 AssertFailed(); /** @todo 16-bit */
2976# endif
2977 return *pi32;
2978#endif
2979}
2980
2981
2982/**
2983 * Atomically reads a signed 32-bit value, unordered.
2984 *
2985 * @returns Current *pi32 value
2986 * @param pi32 Pointer to the 32-bit variable to read.
2987 */
2988DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
2989{
2990 Assert(!((uintptr_t)pi32 & 3));
2991#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2992 int32_t i32;
2993 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS32_%=:\n\t"
2994# if defined(RT_ARCH_ARM64)
2995 "ldxr %w[iDst], %[pMem]\n\t"
2996# else
2997 "ldrex %[iDst], %[pMem]\n\t"
2998# endif
2999 : [iDst] "=&r" (i32)
3000 : [pMem] "Q" (*pi32));
3001 return i32;
3002
3003#else
3004# if ARCH_BITS == 16
3005 AssertFailed(); /** @todo 16-bit */
3006# endif
3007 return *pi32;
3008#endif
3009}
3010
3011
3012/**
3013 * Atomically reads an unsigned 64-bit value, ordered.
3014 *
3015 * @returns Current *pu64 value
3016 * @param pu64 Pointer to the 64-bit variable to read.
3017 * The memory pointed to must be writable.
3018 *
3019 * @remarks This may fault if the memory is read-only!
3020 * @remarks x86: Requires a Pentium or later.
3021 */
3022#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !defined(RT_ARCH_AMD64)) \
3023 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
3024RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
3025#else
3026DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
3027{
3028 uint64_t u64;
3029# ifdef RT_ARCH_AMD64
3030 Assert(!((uintptr_t)pu64 & 7));
3031/*# if RT_INLINE_ASM_GNU_STYLE
3032 __asm__ __volatile__( "mfence\n\t"
3033 "movq %1, %0\n\t"
3034 : "=r" (u64)
3035 : "m" (*pu64));
3036# else
3037 __asm
3038 {
3039 mfence
3040 mov rdx, [pu64]
3041 mov rax, [rdx]
3042 mov [u64], rax
3043 }
3044# endif*/
3045 ASMMemoryFence();
3046 u64 = *pu64;
3047
3048# elif defined(RT_ARCH_X86)
3049# if RT_INLINE_ASM_GNU_STYLE
3050# if defined(PIC) || defined(__PIC__)
3051 uint32_t u32EBX = 0;
3052 Assert(!((uintptr_t)pu64 & 7));
3053 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3054 "lock; cmpxchg8b (%5)\n\t"
3055 "movl %3, %%ebx\n\t"
3056 : "=A" (u64)
3057# if RT_GNUC_PREREQ(4, 3)
3058 , "+m" (*pu64)
3059# else
3060 , "=m" (*pu64)
3061# endif
3062 : "0" (0ULL)
3063 , "m" (u32EBX)
3064 , "c" (0)
3065 , "S" (pu64)
3066 : "cc");
3067# else /* !PIC */
3068 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3069 : "=A" (u64)
3070 , "+m" (*pu64)
3071 : "0" (0ULL)
3072 , "b" (0)
3073 , "c" (0)
3074 : "cc");
3075# endif
3076# else
3077 Assert(!((uintptr_t)pu64 & 7));
3078 __asm
3079 {
3080 xor eax, eax
3081 xor edx, edx
3082 mov edi, pu64
3083 xor ecx, ecx
3084 xor ebx, ebx
3085 lock cmpxchg8b [edi]
3086 mov dword ptr [u64], eax
3087 mov dword ptr [u64 + 4], edx
3088 }
3089# endif
3090
3091# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3092 Assert(!((uintptr_t)pu64 & 7));
3093 __asm__ __volatile__(".Lstart_ASMAtomicReadU64_%=:\n\t"
3094 RTASM_ARM_DMB_SY
3095# if defined(RT_ARCH_ARM64)
3096 "ldxr %[uDst], %[pMem]\n\t"
3097# else
3098 "ldrexd %[uDst], %H[uDst], %[pMem]\n\t"
3099# endif
3100 : [uDst] "=&r" (u64)
3101 : [pMem] "Q" (*pu64)
3102 RTASM_ARM_DMB_SY_COMMA_IN_REG);
3103
3104# else
3105# error "Port me"
3106# endif
3107 return u64;
3108}
3109#endif
3110
3111
3112/**
3113 * Atomically reads an unsigned 64-bit value, unordered.
3114 *
3115 * @returns Current *pu64 value
3116 * @param pu64 Pointer to the 64-bit variable to read.
3117 * The memory pointed to must be writable.
3118 *
3119 * @remarks This may fault if the memory is read-only!
3120 * @remarks x86: Requires a Pentium or later.
3121 */
3122#if !defined(RT_ARCH_AMD64) \
3123 && ( (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
3124 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
3125RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
3126#else
3127DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
3128{
3129 uint64_t u64;
3130# ifdef RT_ARCH_AMD64
3131 Assert(!((uintptr_t)pu64 & 7));
3132/*# if RT_INLINE_ASM_GNU_STYLE
3133 Assert(!((uintptr_t)pu64 & 7));
3134 __asm__ __volatile__("movq %1, %0\n\t"
3135 : "=r" (u64)
3136 : "m" (*pu64));
3137# else
3138 __asm
3139 {
3140 mov rdx, [pu64]
3141 mov rax, [rdx]
3142 mov [u64], rax
3143 }
3144# endif */
3145 u64 = *pu64;
3146
3147# elif defined(RT_ARCH_X86)
3148# if RT_INLINE_ASM_GNU_STYLE
3149# if defined(PIC) || defined(__PIC__)
3150 uint32_t u32EBX = 0;
3151 uint32_t u32Spill;
3152 Assert(!((uintptr_t)pu64 & 7));
3153 __asm__ __volatile__("xor %%eax,%%eax\n\t"
3154 "xor %%ecx,%%ecx\n\t"
3155 "xor %%edx,%%edx\n\t"
3156 "xchgl %%ebx, %3\n\t"
3157 "lock; cmpxchg8b (%4)\n\t"
3158 "movl %3, %%ebx\n\t"
3159 : "=A" (u64)
3160# if RT_GNUC_PREREQ(4, 3)
3161 , "+m" (*pu64)
3162# else
3163 , "=m" (*pu64)
3164# endif
3165 , "=c" (u32Spill)
3166 : "m" (u32EBX)
3167 , "S" (pu64)
3168 : "cc");
3169# else /* !PIC */
3170 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3171 : "=A" (u64)
3172 , "+m" (*pu64)
3173 : "0" (0ULL)
3174 , "b" (0)
3175 , "c" (0)
3176 : "cc");
3177# endif
3178# else
3179 Assert(!((uintptr_t)pu64 & 7));
3180 __asm
3181 {
3182 xor eax, eax
3183 xor edx, edx
3184 mov edi, pu64
3185 xor ecx, ecx
3186 xor ebx, ebx
3187 lock cmpxchg8b [edi]
3188 mov dword ptr [u64], eax
3189 mov dword ptr [u64 + 4], edx
3190 }
3191# endif
3192
3193# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3194 Assert(!((uintptr_t)pu64 & 7));
3195 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU64_%=:\n\t"
3196# if defined(RT_ARCH_ARM64)
3197 "ldxr %[uDst], %[pMem]\n\t"
3198# else
3199 "ldrexd %[uDst], %H[uDst], %[pMem]\n\t"
3200# endif
3201 : [uDst] "=&r" (u64)
3202 : [pMem] "Q" (*pu64));
3203
3204# else
3205# error "Port me"
3206# endif
3207 return u64;
3208}
3209#endif
3210
3211
3212/**
3213 * Atomically reads a signed 64-bit value, ordered.
3214 *
3215 * @returns Current *pi64 value
3216 * @param pi64 Pointer to the 64-bit variable to read.
3217 * The memory pointed to must be writable.
3218 *
3219 * @remarks This may fault if the memory is read-only!
3220 * @remarks x86: Requires a Pentium or later.
3221 */
3222DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
3223{
3224 return (int64_t)ASMAtomicReadU64((volatile uint64_t RT_FAR *)pi64);
3225}
3226
3227
3228/**
3229 * Atomically reads a signed 64-bit value, unordered.
3230 *
3231 * @returns Current *pi64 value
3232 * @param pi64 Pointer to the 64-bit variable to read.
3233 * The memory pointed to must be writable.
3234 *
3235 * @remarks This will fault if the memory is read-only!
3236 * @remarks x86: Requires a Pentium or later.
3237 */
3238DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
3239{
3240 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)pi64);
3241}
3242
3243
3244/**
3245 * Atomically reads a size_t value, ordered.
3246 *
3247 * @returns Current *pcb value
3248 * @param pcb Pointer to the size_t variable to read.
3249 */
3250DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
3251{
3252#if ARCH_BITS == 64
3253 return ASMAtomicReadU64((uint64_t volatile RT_FAR *)pcb);
3254#elif ARCH_BITS == 32
3255 return ASMAtomicReadU32((uint32_t volatile RT_FAR *)pcb);
3256#elif ARCH_BITS == 16
3257 AssertCompileSize(size_t, 2);
3258 return ASMAtomicReadU16((uint16_t volatile RT_FAR *)pcb);
3259#else
3260# error "Unsupported ARCH_BITS value"
3261#endif
3262}
3263
3264
3265/**
3266 * Atomically reads a size_t value, unordered.
3267 *
3268 * @returns Current *pcb value
3269 * @param pcb Pointer to the size_t variable to read.
3270 */
3271DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
3272{
3273#if ARCH_BITS == 64 || ARCH_BITS == 16
3274 return ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)pcb);
3275#elif ARCH_BITS == 32
3276 return ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)pcb);
3277#elif ARCH_BITS == 16
3278 AssertCompileSize(size_t, 2);
3279 return ASMAtomicUoReadU16((uint16_t volatile RT_FAR *)pcb);
3280#else
3281# error "Unsupported ARCH_BITS value"
3282#endif
3283}
3284
3285
3286/**
3287 * Atomically reads a pointer value, ordered.
3288 *
3289 * @returns Current *pv value
3290 * @param ppv Pointer to the pointer variable to read.
3291 *
3292 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
3293 * requires less typing (no casts).
3294 */
3295DECLINLINE(void RT_FAR *) ASMAtomicReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
3296{
3297#if ARCH_BITS == 32 || ARCH_BITS == 16
3298 return (void RT_FAR *)ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
3299#elif ARCH_BITS == 64
3300 return (void RT_FAR *)ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
3301#else
3302# error "ARCH_BITS is bogus"
3303#endif
3304}
3305
3306/**
3307 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
3308 *
3309 * @returns Current *pv value
3310 * @param ppv Pointer to the pointer variable to read.
3311 * @param Type The type of *ppv, sans volatile.
3312 */
3313#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
3314# define ASMAtomicReadPtrT(ppv, Type) \
3315 __extension__ \
3316 ({\
3317 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
3318 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
3319 pvTypeChecked; \
3320 })
3321#else
3322# define ASMAtomicReadPtrT(ppv, Type) \
3323 (Type)ASMAtomicReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
3324#endif
3325
3326
3327/**
3328 * Atomically reads a pointer value, unordered.
3329 *
3330 * @returns Current *pv value
3331 * @param ppv Pointer to the pointer variable to read.
3332 *
3333 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
3334 * requires less typing (no casts).
3335 */
3336DECLINLINE(void RT_FAR *) ASMAtomicUoReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
3337{
3338#if ARCH_BITS == 32 || ARCH_BITS == 16
3339 return (void RT_FAR *)ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
3340#elif ARCH_BITS == 64
3341 return (void RT_FAR *)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
3342#else
3343# error "ARCH_BITS is bogus"
3344#endif
3345}
3346
3347
3348/**
3349 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
3350 *
3351 * @returns Current *pv value
3352 * @param ppv Pointer to the pointer variable to read.
3353 * @param Type The type of *ppv, sans volatile.
3354 */
3355#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
3356# define ASMAtomicUoReadPtrT(ppv, Type) \
3357 __extension__ \
3358 ({\
3359 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3360 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
3361 pvTypeChecked; \
3362 })
3363#else
3364# define ASMAtomicUoReadPtrT(ppv, Type) \
3365 (Type)ASMAtomicUoReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
3366#endif
3367
3368
3369/**
3370 * Atomically reads a boolean value, ordered.
3371 *
3372 * @returns Current *pf value
3373 * @param pf Pointer to the boolean variable to read.
3374 */
3375DECLINLINE(bool) ASMAtomicReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
3376{
3377 ASMMemoryFence();
3378 return *pf; /* byte reads are atomic on x86 */
3379}
3380
3381
3382/**
3383 * Atomically reads a boolean value, unordered.
3384 *
3385 * @returns Current *pf value
3386 * @param pf Pointer to the boolean variable to read.
3387 */
3388DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
3389{
3390 return *pf; /* byte reads are atomic on x86 */
3391}
3392
3393
3394/**
3395 * Atomically read a typical IPRT handle value, ordered.
3396 *
3397 * @param ph Pointer to the handle variable to read.
3398 * @param phRes Where to store the result.
3399 *
3400 * @remarks This doesn't currently work for all handles (like RTFILE).
3401 */
3402#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3403# define ASMAtomicReadHandle(ph, phRes) \
3404 do { \
3405 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3406 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
3407 *(uint32_t RT_FAR *)(phRes) = ASMAtomicReadU32((uint32_t volatile RT_FAR *)(ph)); \
3408 } while (0)
3409#elif HC_ARCH_BITS == 64
3410# define ASMAtomicReadHandle(ph, phRes) \
3411 do { \
3412 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3413 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
3414 *(uint64_t RT_FAR *)(phRes) = ASMAtomicReadU64((uint64_t volatile RT_FAR *)(ph)); \
3415 } while (0)
3416#else
3417# error HC_ARCH_BITS
3418#endif
3419
3420
3421/**
3422 * Atomically read a typical IPRT handle value, unordered.
3423 *
3424 * @param ph Pointer to the handle variable to read.
3425 * @param phRes Where to store the result.
3426 *
3427 * @remarks This doesn't currently work for all handles (like RTFILE).
3428 */
3429#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3430# define ASMAtomicUoReadHandle(ph, phRes) \
3431 do { \
3432 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3433 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
3434 *(uint32_t RT_FAR *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)(ph)); \
3435 } while (0)
3436#elif HC_ARCH_BITS == 64
3437# define ASMAtomicUoReadHandle(ph, phRes) \
3438 do { \
3439 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3440 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
3441 *(uint64_t RT_FAR *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)(ph)); \
3442 } while (0)
3443#else
3444# error HC_ARCH_BITS
3445#endif
3446
3447
3448/**
3449 * Atomically read a value which size might differ
3450 * between platforms or compilers, ordered.
3451 *
3452 * @param pu Pointer to the variable to read.
3453 * @param puRes Where to store the result.
3454 */
3455#define ASMAtomicReadSize(pu, puRes) \
3456 do { \
3457 switch (sizeof(*(pu))) { \
3458 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3459 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3460 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3461 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3462 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3463 } \
3464 } while (0)
3465
3466
3467/**
3468 * Atomically read a value which size might differ
3469 * between platforms or compilers, unordered.
3470 *
3471 * @param pu Pointer to the variable to read.
3472 * @param puRes Where to store the result.
3473 */
3474#define ASMAtomicUoReadSize(pu, puRes) \
3475 do { \
3476 switch (sizeof(*(pu))) { \
3477 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3478 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3479 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3480 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3481 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3482 } \
3483 } while (0)
3484
3485
3486/**
3487 * Atomically writes an unsigned 8-bit value, ordered.
3488 *
3489 * @param pu8 Pointer to the 8-bit variable.
3490 * @param u8 The 8-bit value to assign to *pu8.
3491 */
3492DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
3493{
3494 /** @todo Any possible ARM32/ARM64 optimizations here? */
3495 ASMAtomicXchgU8(pu8, u8);
3496}
3497
3498
3499/**
3500 * Atomically writes an unsigned 8-bit value, unordered.
3501 *
3502 * @param pu8 Pointer to the 8-bit variable.
3503 * @param u8 The 8-bit value to assign to *pu8.
3504 */
3505DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
3506{
3507 /** @todo Any possible ARM32/ARM64 improvements here? */
3508 *pu8 = u8; /* byte writes are atomic on x86 */
3509}
3510
3511
3512/**
3513 * Atomically writes a signed 8-bit value, ordered.
3514 *
3515 * @param pi8 Pointer to the 8-bit variable to read.
3516 * @param i8 The 8-bit value to assign to *pi8.
3517 */
3518DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
3519{
3520 /** @todo Any possible ARM32/ARM64 optimizations here? */
3521 ASMAtomicXchgS8(pi8, i8);
3522}
3523
3524
3525/**
3526 * Atomically writes a signed 8-bit value, unordered.
3527 *
3528 * @param pi8 Pointer to the 8-bit variable to write.
3529 * @param i8 The 8-bit value to assign to *pi8.
3530 */
3531DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
3532{
3533 *pi8 = i8; /* byte writes are atomic on x86 */
3534}
3535
3536
3537/**
3538 * Atomically writes an unsigned 16-bit value, ordered.
3539 *
3540 * @param pu16 Pointer to the 16-bit variable to write.
3541 * @param u16 The 16-bit value to assign to *pu16.
3542 */
3543DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
3544{
3545 /** @todo Any possible ARM32/ARM64 optimizations here? */
3546 ASMAtomicXchgU16(pu16, u16);
3547}
3548
3549
3550/**
3551 * Atomically writes an unsigned 16-bit value, unordered.
3552 *
3553 * @param pu16 Pointer to the 16-bit variable to write.
3554 * @param u16 The 16-bit value to assign to *pu16.
3555 */
3556DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
3557{
3558 Assert(!((uintptr_t)pu16 & 1));
3559 *pu16 = u16;
3560}
3561
3562
3563/**
3564 * Atomically writes a signed 16-bit value, ordered.
3565 *
3566 * @param pi16 Pointer to the 16-bit variable to write.
3567 * @param i16 The 16-bit value to assign to *pi16.
3568 */
3569DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3570{
3571 /** @todo Any possible ARM32/ARM64 optimizations here? */
3572 ASMAtomicXchgS16(pi16, i16);
3573}
3574
3575
3576/**
3577 * Atomically writes a signed 16-bit value, unordered.
3578 *
3579 * @param pi16 Pointer to the 16-bit variable to write.
3580 * @param i16 The 16-bit value to assign to *pi16.
3581 */
3582DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3583{
3584 Assert(!((uintptr_t)pi16 & 1));
3585 *pi16 = i16;
3586}
3587
3588
3589/**
3590 * Atomically writes an unsigned 32-bit value, ordered.
3591 *
3592 * @param pu32 Pointer to the 32-bit variable to write.
3593 * @param u32 The 32-bit value to assign to *pu32.
3594 */
3595DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3596{
3597 /** @todo Any possible ARM32/ARM64 optimizations here? */
3598 ASMAtomicXchgU32(pu32, u32);
3599}
3600
3601
3602/**
3603 * Atomically writes an unsigned 32-bit value, unordered.
3604 *
3605 * @param pu32 Pointer to the 32-bit variable to write.
3606 * @param u32 The 32-bit value to assign to *pu32.
3607 */
3608DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3609{
3610 Assert(!((uintptr_t)pu32 & 3));
3611#if ARCH_BITS >= 32
3612 *pu32 = u32;
3613#else
3614 ASMAtomicXchgU32(pu32, u32);
3615#endif
3616}
3617
3618
3619/**
3620 * Atomically writes a signed 32-bit value, ordered.
3621 *
3622 * @param pi32 Pointer to the 32-bit variable to write.
3623 * @param i32 The 32-bit value to assign to *pi32.
3624 */
3625DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3626{
3627 ASMAtomicXchgS32(pi32, i32);
3628}
3629
3630
3631/**
3632 * Atomically writes a signed 32-bit value, unordered.
3633 *
3634 * @param pi32 Pointer to the 32-bit variable to write.
3635 * @param i32 The 32-bit value to assign to *pi32.
3636 */
3637DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3638{
3639 Assert(!((uintptr_t)pi32 & 3));
3640#if ARCH_BITS >= 32
3641 *pi32 = i32;
3642#else
3643 ASMAtomicXchgS32(pi32, i32);
3644#endif
3645}
3646
3647
3648/**
3649 * Atomically writes an unsigned 64-bit value, ordered.
3650 *
3651 * @param pu64 Pointer to the 64-bit variable to write.
3652 * @param u64 The 64-bit value to assign to *pu64.
3653 */
3654DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3655{
3656 /** @todo Any possible ARM32/ARM64 optimizations here? */
3657 ASMAtomicXchgU64(pu64, u64);
3658}
3659
3660
3661/**
3662 * Atomically writes an unsigned 64-bit value, unordered.
3663 *
3664 * @param pu64 Pointer to the 64-bit variable to write.
3665 * @param u64 The 64-bit value to assign to *pu64.
3666 */
3667DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3668{
3669 Assert(!((uintptr_t)pu64 & 7));
3670#if ARCH_BITS == 64
3671 *pu64 = u64;
3672#else
3673 ASMAtomicXchgU64(pu64, u64);
3674#endif
3675}
3676
3677
3678/**
3679 * Atomically writes a signed 64-bit value, ordered.
3680 *
3681 * @param pi64 Pointer to the 64-bit variable to write.
3682 * @param i64 The 64-bit value to assign to *pi64.
3683 */
3684DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3685{
3686 /** @todo Any possible ARM32/ARM64 optimizations here? */
3687 ASMAtomicXchgS64(pi64, i64);
3688}
3689
3690
3691/**
3692 * Atomically writes a signed 64-bit value, unordered.
3693 *
3694 * @param pi64 Pointer to the 64-bit variable to write.
3695 * @param i64 The 64-bit value to assign to *pi64.
3696 */
3697DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3698{
3699 Assert(!((uintptr_t)pi64 & 7));
3700#if ARCH_BITS == 64
3701 *pi64 = i64;
3702#else
3703 ASMAtomicXchgS64(pi64, i64);
3704#endif
3705}
3706
3707
3708/**
3709 * Atomically writes a size_t value, ordered.
3710 *
3711 * @returns nothing.
3712 * @param pcb Pointer to the size_t variable to write.
3713 * @param cb The value to assign to *pcb.
3714 */
3715DECLINLINE(void) ASMAtomicWriteZ(volatile size_t RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3716{
3717#if ARCH_BITS == 64
3718 ASMAtomicWriteU64((uint64_t volatile *)pcb, cb);
3719#elif ARCH_BITS == 32
3720 ASMAtomicWriteU32((uint32_t volatile *)pcb, cb);
3721#elif ARCH_BITS == 16
3722 AssertCompileSize(size_t, 2);
3723 ASMAtomicWriteU16((uint16_t volatile *)pcb, cb);
3724#else
3725# error "Unsupported ARCH_BITS value"
3726#endif
3727}
3728
3729
3730/**
3731 * Atomically writes a size_t value, unordered.
3732 *
3733 * @returns nothing.
3734 * @param pcb Pointer to the size_t variable to write.
3735 * @param cb The value to assign to *pcb.
3736 */
3737DECLINLINE(void) ASMAtomicUoWriteZ(volatile size_t RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3738{
3739#if ARCH_BITS == 64
3740 ASMAtomicUoWriteU64((uint64_t volatile *)pcb, cb);
3741#elif ARCH_BITS == 32
3742 ASMAtomicUoWriteU32((uint32_t volatile *)pcb, cb);
3743#elif ARCH_BITS == 16
3744 AssertCompileSize(size_t, 2);
3745 ASMAtomicUoWriteU16((uint16_t volatile *)pcb, cb);
3746#else
3747# error "Unsupported ARCH_BITS value"
3748#endif
3749}
3750
3751
3752/**
3753 * Atomically writes a boolean value, unordered.
3754 *
3755 * @param pf Pointer to the boolean variable to write.
3756 * @param f The boolean value to assign to *pf.
3757 */
3758DECLINLINE(void) ASMAtomicWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
3759{
3760 ASMAtomicWriteU8((uint8_t volatile RT_FAR *)pf, f);
3761}
3762
3763
3764/**
3765 * Atomically writes a boolean value, unordered.
3766 *
3767 * @param pf Pointer to the boolean variable to write.
3768 * @param f The boolean value to assign to *pf.
3769 */
3770DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
3771{
3772 *pf = f; /* byte writes are atomic on x86 */
3773}
3774
3775
3776/**
3777 * Atomically writes a pointer value, ordered.
3778 *
3779 * @param ppv Pointer to the pointer variable to write.
3780 * @param pv The pointer value to assign to *ppv.
3781 */
3782DECLINLINE(void) ASMAtomicWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv) RT_NOTHROW_DEF
3783{
3784#if ARCH_BITS == 32 || ARCH_BITS == 16
3785 ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
3786#elif ARCH_BITS == 64
3787 ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
3788#else
3789# error "ARCH_BITS is bogus"
3790#endif
3791}
3792
3793
3794/**
3795 * Atomically writes a pointer value, unordered.
3796 *
3797 * @param ppv Pointer to the pointer variable to write.
3798 * @param pv The pointer value to assign to *ppv.
3799 */
3800DECLINLINE(void) ASMAtomicUoWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv) RT_NOTHROW_DEF
3801{
3802#if ARCH_BITS == 32 || ARCH_BITS == 16
3803 ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
3804#elif ARCH_BITS == 64
3805 ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
3806#else
3807# error "ARCH_BITS is bogus"
3808#endif
3809}
3810
3811
3812/**
3813 * Atomically writes a pointer value, ordered.
3814 *
3815 * @param ppv Pointer to the pointer variable to write.
3816 * @param pv The pointer value to assign to *ppv. If NULL use
3817 * ASMAtomicWriteNullPtr or you'll land in trouble.
3818 *
3819 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
3820 * NULL.
3821 */
3822#ifdef __GNUC__
3823# define ASMAtomicWritePtr(ppv, pv) \
3824 do \
3825 { \
3826 __typeof__(*(ppv)) volatile RT_FAR * const ppvTypeChecked = (ppv); \
3827 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
3828 \
3829 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3830 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3831 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3832 \
3833 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), (void RT_FAR *)(pvTypeChecked)); \
3834 } while (0)
3835#else
3836# define ASMAtomicWritePtr(ppv, pv) \
3837 do \
3838 { \
3839 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3840 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3841 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3842 \
3843 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv)); \
3844 } while (0)
3845#endif
3846
3847
3848/**
3849 * Atomically sets a pointer to NULL, ordered.
3850 *
3851 * @param ppv Pointer to the pointer variable that should be set to NULL.
3852 *
3853 * @remarks This is relatively type safe on GCC platforms.
3854 */
3855#if RT_GNUC_PREREQ(4, 2)
3856# define ASMAtomicWriteNullPtr(ppv) \
3857 do \
3858 { \
3859 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
3860 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3861 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3862 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), NULL); \
3863 } while (0)
3864#else
3865# define ASMAtomicWriteNullPtr(ppv) \
3866 do \
3867 { \
3868 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3869 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3870 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), NULL); \
3871 } while (0)
3872#endif
3873
3874
3875/**
3876 * Atomically writes a pointer value, unordered.
3877 *
3878 * @returns Current *pv value
3879 * @param ppv Pointer to the pointer variable.
3880 * @param pv The pointer value to assign to *ppv. If NULL use
3881 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
3882 *
3883 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
3884 * NULL.
3885 */
3886#if RT_GNUC_PREREQ(4, 2)
3887# define ASMAtomicUoWritePtr(ppv, pv) \
3888 do \
3889 { \
3890 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3891 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
3892 \
3893 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
3894 AssertCompile(sizeof(pv) == sizeof(void *)); \
3895 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3896 \
3897 *(ppvTypeChecked) = pvTypeChecked; \
3898 } while (0)
3899#else
3900# define ASMAtomicUoWritePtr(ppv, pv) \
3901 do \
3902 { \
3903 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3904 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3905 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3906 *(ppv) = pv; \
3907 } while (0)
3908#endif
3909
3910
3911/**
3912 * Atomically sets a pointer to NULL, unordered.
3913 *
3914 * @param ppv Pointer to the pointer variable that should be set to NULL.
3915 *
3916 * @remarks This is relatively type safe on GCC platforms.
3917 */
3918#ifdef __GNUC__
3919# define ASMAtomicUoWriteNullPtr(ppv) \
3920 do \
3921 { \
3922 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3923 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
3924 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3925 *(ppvTypeChecked) = NULL; \
3926 } while (0)
3927#else
3928# define ASMAtomicUoWriteNullPtr(ppv) \
3929 do \
3930 { \
3931 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3932 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3933 *(ppv) = NULL; \
3934 } while (0)
3935#endif
3936
3937
3938/**
3939 * Atomically write a typical IPRT handle value, ordered.
3940 *
3941 * @param ph Pointer to the variable to update.
3942 * @param hNew The value to assign to *ph.
3943 *
3944 * @remarks This doesn't currently work for all handles (like RTFILE).
3945 */
3946#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3947# define ASMAtomicWriteHandle(ph, hNew) \
3948 do { \
3949 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3950 ASMAtomicWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
3951 } while (0)
3952#elif HC_ARCH_BITS == 64
3953# define ASMAtomicWriteHandle(ph, hNew) \
3954 do { \
3955 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3956 ASMAtomicWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
3957 } while (0)
3958#else
3959# error HC_ARCH_BITS
3960#endif
3961
3962
3963/**
3964 * Atomically write a typical IPRT handle value, unordered.
3965 *
3966 * @param ph Pointer to the variable to update.
3967 * @param hNew The value to assign to *ph.
3968 *
3969 * @remarks This doesn't currently work for all handles (like RTFILE).
3970 */
3971#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3972# define ASMAtomicUoWriteHandle(ph, hNew) \
3973 do { \
3974 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3975 ASMAtomicUoWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)hNew); \
3976 } while (0)
3977#elif HC_ARCH_BITS == 64
3978# define ASMAtomicUoWriteHandle(ph, hNew) \
3979 do { \
3980 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3981 ASMAtomicUoWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)hNew); \
3982 } while (0)
3983#else
3984# error HC_ARCH_BITS
3985#endif
3986
3987
3988/**
3989 * Atomically write a value which size might differ
3990 * between platforms or compilers, ordered.
3991 *
3992 * @param pu Pointer to the variable to update.
3993 * @param uNew The value to assign to *pu.
3994 */
3995#define ASMAtomicWriteSize(pu, uNew) \
3996 do { \
3997 switch (sizeof(*(pu))) { \
3998 case 1: ASMAtomicWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
3999 case 2: ASMAtomicWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
4000 case 4: ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
4001 case 8: ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
4002 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4003 } \
4004 } while (0)
4005
4006/**
4007 * Atomically write a value which size might differ
4008 * between platforms or compilers, unordered.
4009 *
4010 * @param pu Pointer to the variable to update.
4011 * @param uNew The value to assign to *pu.
4012 */
4013#define ASMAtomicUoWriteSize(pu, uNew) \
4014 do { \
4015 switch (sizeof(*(pu))) { \
4016 case 1: ASMAtomicUoWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
4017 case 2: ASMAtomicUoWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
4018 case 4: ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
4019 case 8: ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
4020 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4021 } \
4022 } while (0)
4023
4024
4025
4026/**
4027 * Atomically exchanges and adds to a 16-bit value, ordered.
4028 *
4029 * @returns The old value.
4030 * @param pu16 Pointer to the value.
4031 * @param u16 Number to add.
4032 *
4033 * @remarks Currently not implemented, just to make 16-bit code happy.
4034 * @remarks x86: Requires a 486 or later.
4035 */
4036RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicAddU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_PROTO;
4037
4038
4039/**
4040 * Atomically exchanges and adds to a 32-bit value, ordered.
4041 *
4042 * @returns The old value.
4043 * @param pu32 Pointer to the value.
4044 * @param u32 Number to add.
4045 *
4046 * @remarks x86: Requires a 486 or later.
4047 */
4048#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4049RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4050#else
4051DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4052{
4053# if RT_INLINE_ASM_USES_INTRIN
4054 u32 = _InterlockedExchangeAdd((long RT_FAR *)pu32, u32);
4055 return u32;
4056
4057# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4058# if RT_INLINE_ASM_GNU_STYLE
4059 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
4060 : "=r" (u32)
4061 , "=m" (*pu32)
4062 : "0" (u32)
4063 , "m" (*pu32)
4064 : "memory"
4065 , "cc");
4066 return u32;
4067# else
4068 __asm
4069 {
4070 mov eax, [u32]
4071# ifdef RT_ARCH_AMD64
4072 mov rdx, [pu32]
4073 lock xadd [rdx], eax
4074# else
4075 mov edx, [pu32]
4076 lock xadd [edx], eax
4077# endif
4078 mov [u32], eax
4079 }
4080 return u32;
4081# endif
4082
4083# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4084 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicAddU32, pu32, DMB_SY,
4085 "add %w[uNew], %w[uOld], %w[uVal]\n\t",
4086 "add %[uNew], %[uOld], %[uVal]\n\t",
4087 [uVal] "r" (u32));
4088 return u32OldRet;
4089
4090# else
4091# error "Port me"
4092# endif
4093}
4094#endif
4095
4096
4097/**
4098 * Atomically exchanges and adds to a signed 32-bit value, ordered.
4099 *
4100 * @returns The old value.
4101 * @param pi32 Pointer to the value.
4102 * @param i32 Number to add.
4103 *
4104 * @remarks x86: Requires a 486 or later.
4105 */
4106DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4107{
4108 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4109}
4110
4111
4112/**
4113 * Atomically exchanges and adds to a 64-bit value, ordered.
4114 *
4115 * @returns The old value.
4116 * @param pu64 Pointer to the value.
4117 * @param u64 Number to add.
4118 *
4119 * @remarks x86: Requires a Pentium or later.
4120 */
4121#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4122DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4123#else
4124DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4125{
4126# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4127 u64 = _InterlockedExchangeAdd64((__int64 RT_FAR *)pu64, u64);
4128 return u64;
4129
4130# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4131 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
4132 : "=r" (u64)
4133 , "=m" (*pu64)
4134 : "0" (u64)
4135 , "m" (*pu64)
4136 : "memory"
4137 , "cc");
4138 return u64;
4139
4140# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4141 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(ASMAtomicAddU64, pu64, DMB_SY,
4142 "add %[uNew], %[uOld], %[uVal]\n\t"
4143 ,
4144 "add %[uNew], %[uOld], %[uVal]\n\t"
4145 "adc %H[uNew], %H[uOld], %H[uVal]\n\t",
4146 [uVal] "r" (u64));
4147 return u64OldRet;
4148
4149# else
4150 uint64_t u64Old;
4151 for (;;)
4152 {
4153 uint64_t u64New;
4154 u64Old = ASMAtomicUoReadU64(pu64);
4155 u64New = u64Old + u64;
4156 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4157 break;
4158 ASMNopPause();
4159 }
4160 return u64Old;
4161# endif
4162}
4163#endif
4164
4165
4166/**
4167 * Atomically exchanges and adds to a signed 64-bit value, ordered.
4168 *
4169 * @returns The old value.
4170 * @param pi64 Pointer to the value.
4171 * @param i64 Number to add.
4172 *
4173 * @remarks x86: Requires a Pentium or later.
4174 */
4175DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4176{
4177 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4178}
4179
4180
4181/**
4182 * Atomically exchanges and adds to a size_t value, ordered.
4183 *
4184 * @returns The old value.
4185 * @param pcb Pointer to the size_t value.
4186 * @param cb Number to add.
4187 */
4188DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
4189{
4190#if ARCH_BITS == 64
4191 AssertCompileSize(size_t, 8);
4192 return ASMAtomicAddU64((uint64_t volatile RT_FAR *)pcb, cb);
4193#elif ARCH_BITS == 32
4194 AssertCompileSize(size_t, 4);
4195 return ASMAtomicAddU32((uint32_t volatile RT_FAR *)pcb, cb);
4196#elif ARCH_BITS == 16
4197 AssertCompileSize(size_t, 2);
4198 return ASMAtomicAddU16((uint16_t volatile RT_FAR *)pcb, cb);
4199#else
4200# error "Unsupported ARCH_BITS value"
4201#endif
4202}
4203
4204
4205/**
4206 * Atomically exchanges and adds a value which size might differ between
4207 * platforms or compilers, ordered.
4208 *
4209 * @param pu Pointer to the variable to update.
4210 * @param uNew The value to add to *pu.
4211 * @param puOld Where to store the old value.
4212 */
4213#define ASMAtomicAddSize(pu, uNew, puOld) \
4214 do { \
4215 switch (sizeof(*(pu))) { \
4216 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
4217 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
4218 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
4219 } \
4220 } while (0)
4221
4222
4223
4224/**
4225 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
4226 *
4227 * @returns The old value.
4228 * @param pu16 Pointer to the value.
4229 * @param u16 Number to subtract.
4230 *
4231 * @remarks x86: Requires a 486 or later.
4232 */
4233DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_DEF
4234{
4235 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
4236}
4237
4238
4239/**
4240 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
4241 *
4242 * @returns The old value.
4243 * @param pi16 Pointer to the value.
4244 * @param i16 Number to subtract.
4245 *
4246 * @remarks x86: Requires a 486 or later.
4247 */
4248DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
4249{
4250 return (int16_t)ASMAtomicAddU16((uint16_t volatile RT_FAR *)pi16, (uint16_t)-i16);
4251}
4252
4253
4254/**
4255 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
4256 *
4257 * @returns The old value.
4258 * @param pu32 Pointer to the value.
4259 * @param u32 Number to subtract.
4260 *
4261 * @remarks x86: Requires a 486 or later.
4262 */
4263DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4264{
4265 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
4266}
4267
4268
4269/**
4270 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
4271 *
4272 * @returns The old value.
4273 * @param pi32 Pointer to the value.
4274 * @param i32 Number to subtract.
4275 *
4276 * @remarks x86: Requires a 486 or later.
4277 */
4278DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4279{
4280 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)-i32);
4281}
4282
4283
4284/**
4285 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
4286 *
4287 * @returns The old value.
4288 * @param pu64 Pointer to the value.
4289 * @param u64 Number to subtract.
4290 *
4291 * @remarks x86: Requires a Pentium or later.
4292 */
4293DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4294{
4295 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
4296}
4297
4298
4299/**
4300 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
4301 *
4302 * @returns The old value.
4303 * @param pi64 Pointer to the value.
4304 * @param i64 Number to subtract.
4305 *
4306 * @remarks x86: Requires a Pentium or later.
4307 */
4308DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4309{
4310 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)-i64);
4311}
4312
4313
4314/**
4315 * Atomically exchanges and subtracts to a size_t value, ordered.
4316 *
4317 * @returns The old value.
4318 * @param pcb Pointer to the size_t value.
4319 * @param cb Number to subtract.
4320 *
4321 * @remarks x86: Requires a 486 or later.
4322 */
4323DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
4324{
4325#if ARCH_BITS == 64
4326 return ASMAtomicSubU64((uint64_t volatile RT_FAR *)pcb, cb);
4327#elif ARCH_BITS == 32
4328 return ASMAtomicSubU32((uint32_t volatile RT_FAR *)pcb, cb);
4329#elif ARCH_BITS == 16
4330 AssertCompileSize(size_t, 2);
4331 return ASMAtomicSubU16((uint16_t volatile RT_FAR *)pcb, cb);
4332#else
4333# error "Unsupported ARCH_BITS value"
4334#endif
4335}
4336
4337
4338/**
4339 * Atomically exchanges and subtracts a value which size might differ between
4340 * platforms or compilers, ordered.
4341 *
4342 * @param pu Pointer to the variable to update.
4343 * @param uNew The value to subtract to *pu.
4344 * @param puOld Where to store the old value.
4345 *
4346 * @remarks x86: Requires a 486 or later.
4347 */
4348#define ASMAtomicSubSize(pu, uNew, puOld) \
4349 do { \
4350 switch (sizeof(*(pu))) { \
4351 case 4: *(uint32_t RT_FAR *)(puOld) = ASMAtomicSubU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
4352 case 8: *(uint64_t RT_FAR *)(puOld) = ASMAtomicSubU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
4353 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
4354 } \
4355 } while (0)
4356
4357
4358
4359/**
4360 * Atomically increment a 16-bit value, ordered.
4361 *
4362 * @returns The new value.
4363 * @param pu16 Pointer to the value to increment.
4364 * @remarks Not implemented. Just to make 16-bit code happy.
4365 *
4366 * @remarks x86: Requires a 486 or later.
4367 */
4368RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicIncU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
4369
4370
4371/**
4372 * Atomically increment a 32-bit value, ordered.
4373 *
4374 * @returns The new value.
4375 * @param pu32 Pointer to the value to increment.
4376 *
4377 * @remarks x86: Requires a 486 or later.
4378 */
4379#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4380RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
4381#else
4382DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
4383{
4384# if RT_INLINE_ASM_USES_INTRIN
4385 return (uint32_t)_InterlockedIncrement((long RT_FAR *)pu32);
4386
4387# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4388# if RT_INLINE_ASM_GNU_STYLE
4389 uint32_t u32;
4390 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
4391 : "=r" (u32)
4392 , "=m" (*pu32)
4393 : "0" (1)
4394 , "m" (*pu32)
4395 : "memory"
4396 , "cc");
4397 return u32+1;
4398# else
4399 __asm
4400 {
4401 mov eax, 1
4402# ifdef RT_ARCH_AMD64
4403 mov rdx, [pu32]
4404 lock xadd [rdx], eax
4405# else
4406 mov edx, [pu32]
4407 lock xadd [edx], eax
4408# endif
4409 mov u32, eax
4410 }
4411 return u32+1;
4412# endif
4413
4414# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4415 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicIncU32, pu32, DMB_SY,
4416 "add %w[uNew], %w[uNew], #1\n\t",
4417 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
4418 "X" (0) /* dummy */);
4419 return u32NewRet;
4420
4421# else
4422 return ASMAtomicAddU32(pu32, 1) + 1;
4423# endif
4424}
4425#endif
4426
4427
4428/**
4429 * Atomically increment a signed 32-bit value, ordered.
4430 *
4431 * @returns The new value.
4432 * @param pi32 Pointer to the value to increment.
4433 *
4434 * @remarks x86: Requires a 486 or later.
4435 */
4436DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
4437{
4438 return (int32_t)ASMAtomicIncU32((uint32_t volatile RT_FAR *)pi32);
4439}
4440
4441
4442/**
4443 * Atomically increment a 64-bit value, ordered.
4444 *
4445 * @returns The new value.
4446 * @param pu64 Pointer to the value to increment.
4447 *
4448 * @remarks x86: Requires a Pentium or later.
4449 */
4450#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4451DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
4452#else
4453DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
4454{
4455# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4456 return (uint64_t)_InterlockedIncrement64((__int64 RT_FAR *)pu64);
4457
4458# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4459 uint64_t u64;
4460 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
4461 : "=r" (u64)
4462 , "=m" (*pu64)
4463 : "0" (1)
4464 , "m" (*pu64)
4465 : "memory"
4466 , "cc");
4467 return u64 + 1;
4468
4469# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4470 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicIncU64, pu64, DMB_SY,
4471 "add %[uNew], %[uNew], #1\n\t"
4472 ,
4473 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */
4474 "adc %H[uNew], %H[uNew], %[uZeroVal]\n\t",
4475 RTASM_ARM_PICK_6432("X" (0) /* dummy */, [uZeroVal] "r" (0)) );
4476 return u64NewRet;
4477
4478# else
4479 return ASMAtomicAddU64(pu64, 1) + 1;
4480# endif
4481}
4482#endif
4483
4484
4485/**
4486 * Atomically increment a signed 64-bit value, ordered.
4487 *
4488 * @returns The new value.
4489 * @param pi64 Pointer to the value to increment.
4490 *
4491 * @remarks x86: Requires a Pentium or later.
4492 */
4493DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
4494{
4495 return (int64_t)ASMAtomicIncU64((uint64_t volatile RT_FAR *)pi64);
4496}
4497
4498
4499/**
4500 * Atomically increment a size_t value, ordered.
4501 *
4502 * @returns The new value.
4503 * @param pcb Pointer to the value to increment.
4504 *
4505 * @remarks x86: Requires a 486 or later.
4506 */
4507DECLINLINE(size_t) ASMAtomicIncZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
4508{
4509#if ARCH_BITS == 64
4510 return ASMAtomicIncU64((uint64_t volatile RT_FAR *)pcb);
4511#elif ARCH_BITS == 32
4512 return ASMAtomicIncU32((uint32_t volatile RT_FAR *)pcb);
4513#elif ARCH_BITS == 16
4514 return ASMAtomicIncU16((uint16_t volatile RT_FAR *)pcb);
4515#else
4516# error "Unsupported ARCH_BITS value"
4517#endif
4518}
4519
4520
4521
4522/**
4523 * Atomically decrement an unsigned 32-bit value, ordered.
4524 *
4525 * @returns The new value.
4526 * @param pu16 Pointer to the value to decrement.
4527 * @remarks Not implemented. Just to make 16-bit code happy.
4528 *
4529 * @remarks x86: Requires a 486 or later.
4530 */
4531RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
4532
4533
4534/**
4535 * Atomically decrement an unsigned 32-bit value, ordered.
4536 *
4537 * @returns The new value.
4538 * @param pu32 Pointer to the value to decrement.
4539 *
4540 * @remarks x86: Requires a 486 or later.
4541 */
4542#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4543RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
4544#else
4545DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
4546{
4547# if RT_INLINE_ASM_USES_INTRIN
4548 return (uint32_t)_InterlockedDecrement((long RT_FAR *)pu32);
4549
4550# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4551# if RT_INLINE_ASM_GNU_STYLE
4552 uint32_t u32;
4553 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
4554 : "=r" (u32)
4555 , "=m" (*pu32)
4556 : "0" (-1)
4557 , "m" (*pu32)
4558 : "memory"
4559 , "cc");
4560 return u32-1;
4561# else
4562 uint32_t u32;
4563 __asm
4564 {
4565 mov eax, -1
4566# ifdef RT_ARCH_AMD64
4567 mov rdx, [pu32]
4568 lock xadd [rdx], eax
4569# else
4570 mov edx, [pu32]
4571 lock xadd [edx], eax
4572# endif
4573 mov u32, eax
4574 }
4575 return u32-1;
4576# endif
4577
4578# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4579 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicDecU32, pu32, DMB_SY,
4580 "sub %w[uNew], %w[uNew], #1\n\t",
4581 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
4582 "X" (0) /* dummy */);
4583 return u32NewRet;
4584
4585# else
4586 return ASMAtomicSubU32(pu32, 1) - (uint32_t)1;
4587# endif
4588}
4589#endif
4590
4591
4592/**
4593 * Atomically decrement a signed 32-bit value, ordered.
4594 *
4595 * @returns The new value.
4596 * @param pi32 Pointer to the value to decrement.
4597 *
4598 * @remarks x86: Requires a 486 or later.
4599 */
4600DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
4601{
4602 return (int32_t)ASMAtomicDecU32((uint32_t volatile RT_FAR *)pi32);
4603}
4604
4605
4606/**
4607 * Atomically decrement an unsigned 64-bit value, ordered.
4608 *
4609 * @returns The new value.
4610 * @param pu64 Pointer to the value to decrement.
4611 *
4612 * @remarks x86: Requires a Pentium or later.
4613 */
4614#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4615RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
4616#else
4617DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
4618{
4619# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4620 return (uint64_t)_InterlockedDecrement64((__int64 volatile RT_FAR *)pu64);
4621
4622# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4623 uint64_t u64;
4624 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
4625 : "=r" (u64)
4626 , "=m" (*pu64)
4627 : "0" (~(uint64_t)0)
4628 , "m" (*pu64)
4629 : "memory"
4630 , "cc");
4631 return u64-1;
4632
4633# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4634 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicDecU64, pu64, DMB_SY,
4635 "sub %[uNew], %[uNew], #1\n\t"
4636 ,
4637 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */
4638 "sbc %H[uNew], %H[uNew], %[uZeroVal]\n\t",
4639 RTASM_ARM_PICK_6432("X" (0) /* dummy */, [uZeroVal] "r" (0)) );
4640 return u64NewRet;
4641
4642# else
4643 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
4644# endif
4645}
4646#endif
4647
4648
4649/**
4650 * Atomically decrement a signed 64-bit value, ordered.
4651 *
4652 * @returns The new value.
4653 * @param pi64 Pointer to the value to decrement.
4654 *
4655 * @remarks x86: Requires a Pentium or later.
4656 */
4657DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
4658{
4659 return (int64_t)ASMAtomicDecU64((uint64_t volatile RT_FAR *)pi64);
4660}
4661
4662
4663/**
4664 * Atomically decrement a size_t value, ordered.
4665 *
4666 * @returns The new value.
4667 * @param pcb Pointer to the value to decrement.
4668 *
4669 * @remarks x86: Requires a 486 or later.
4670 */
4671DECLINLINE(size_t) ASMAtomicDecZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
4672{
4673#if ARCH_BITS == 64
4674 return ASMAtomicDecU64((uint64_t volatile RT_FAR *)pcb);
4675#elif ARCH_BITS == 32
4676 return ASMAtomicDecU32((uint32_t volatile RT_FAR *)pcb);
4677#elif ARCH_BITS == 16
4678 return ASMAtomicDecU16((uint16_t volatile RT_FAR *)pcb);
4679#else
4680# error "Unsupported ARCH_BITS value"
4681#endif
4682}
4683
4684
4685/**
4686 * Atomically Or an unsigned 32-bit value, ordered.
4687 *
4688 * @param pu32 Pointer to the pointer variable to OR u32 with.
4689 * @param u32 The value to OR *pu32 with.
4690 *
4691 * @remarks x86: Requires a 386 or later.
4692 */
4693#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4694RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4695#else
4696DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4697{
4698# if RT_INLINE_ASM_USES_INTRIN
4699 _InterlockedOr((long volatile RT_FAR *)pu32, (long)u32);
4700
4701# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4702# if RT_INLINE_ASM_GNU_STYLE
4703 __asm__ __volatile__("lock; orl %1, %0\n\t"
4704 : "=m" (*pu32)
4705 : "ir" (u32)
4706 , "m" (*pu32)
4707 : "cc");
4708# else
4709 __asm
4710 {
4711 mov eax, [u32]
4712# ifdef RT_ARCH_AMD64
4713 mov rdx, [pu32]
4714 lock or [rdx], eax
4715# else
4716 mov edx, [pu32]
4717 lock or [edx], eax
4718# endif
4719 }
4720# endif
4721
4722# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4723 /* For more on Orr see https://en.wikipedia.org/wiki/Orr_(Catch-22) ;-) */
4724 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicOr32, pu32, DMB_SY,
4725 "orr %w[uNew], %w[uNew], %w[uVal]\n\t",
4726 "orr %[uNew], %[uNew], %[uVal]\n\t",
4727 [uVal] "r" (u32));
4728
4729# else
4730# error "Port me"
4731# endif
4732}
4733#endif
4734
4735
4736/**
4737 * Atomically OR an unsigned 32-bit value, ordered, extended version (for bitmap
4738 * fallback).
4739 *
4740 * @returns Old value.
4741 * @param pu32 Pointer to the variable to OR @a u32 with.
4742 * @param u32 The value to OR @a *pu32 with.
4743 */
4744DECLINLINE(uint32_t) ASMAtomicOrExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4745{
4746#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4747 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicOrEx32, pu32, DMB_SY,
4748 "orr %w[uNew], %w[uOld], %w[uVal]\n\t",
4749 "orr %[uNew], %[uOld], %[uVal]\n\t",
4750 [uVal] "r" (u32));
4751 return u32OldRet;
4752
4753#else
4754 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4755 uint32_t u32New;
4756 do
4757 u32New = u32RetOld | u32;
4758 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
4759 return u32RetOld;
4760#endif
4761}
4762
4763
4764/**
4765 * Atomically Or a signed 32-bit value, ordered.
4766 *
4767 * @param pi32 Pointer to the pointer variable to OR u32 with.
4768 * @param i32 The value to OR *pu32 with.
4769 *
4770 * @remarks x86: Requires a 386 or later.
4771 */
4772DECLINLINE(void) ASMAtomicOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4773{
4774 ASMAtomicOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4775}
4776
4777
4778/**
4779 * Atomically Or an unsigned 64-bit value, ordered.
4780 *
4781 * @param pu64 Pointer to the pointer variable to OR u64 with.
4782 * @param u64 The value to OR *pu64 with.
4783 *
4784 * @remarks x86: Requires a Pentium or later.
4785 */
4786#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4787DECLASM(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4788#else
4789DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4790{
4791# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4792 _InterlockedOr64((__int64 volatile RT_FAR *)pu64, (__int64)u64);
4793
4794# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4795 __asm__ __volatile__("lock; orq %1, %q0\n\t"
4796 : "=m" (*pu64)
4797 : "r" (u64)
4798 , "m" (*pu64)
4799 : "cc");
4800
4801# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4802 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicOrU64, pu64, DMB_SY,
4803 "orr %[uNew], %[uNew], %[uVal]\n\t"
4804 ,
4805 "orr %[uNew], %[uNew], %[uVal]\n\t"
4806 "orr %H[uNew], %H[uNew], %H[uVal]\n\t",
4807 [uVal] "r" (u64));
4808
4809# else
4810 for (;;)
4811 {
4812 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4813 uint64_t u64New = u64Old | u64;
4814 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4815 break;
4816 ASMNopPause();
4817 }
4818# endif
4819}
4820#endif
4821
4822
4823/**
4824 * Atomically Or a signed 64-bit value, ordered.
4825 *
4826 * @param pi64 Pointer to the pointer variable to OR u64 with.
4827 * @param i64 The value to OR *pu64 with.
4828 *
4829 * @remarks x86: Requires a Pentium or later.
4830 */
4831DECLINLINE(void) ASMAtomicOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4832{
4833 ASMAtomicOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4834}
4835
4836
4837/**
4838 * Atomically And an unsigned 32-bit value, ordered.
4839 *
4840 * @param pu32 Pointer to the pointer variable to AND u32 with.
4841 * @param u32 The value to AND *pu32 with.
4842 *
4843 * @remarks x86: Requires a 386 or later.
4844 */
4845#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4846RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4847#else
4848DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4849{
4850# if RT_INLINE_ASM_USES_INTRIN
4851 _InterlockedAnd((long volatile RT_FAR *)pu32, u32);
4852
4853# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4854# if RT_INLINE_ASM_GNU_STYLE
4855 __asm__ __volatile__("lock; andl %1, %0\n\t"
4856 : "=m" (*pu32)
4857 : "ir" (u32)
4858 , "m" (*pu32)
4859 : "cc");
4860# else
4861 __asm
4862 {
4863 mov eax, [u32]
4864# ifdef RT_ARCH_AMD64
4865 mov rdx, [pu32]
4866 lock and [rdx], eax
4867# else
4868 mov edx, [pu32]
4869 lock and [edx], eax
4870# endif
4871 }
4872# endif
4873
4874# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4875 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicAnd32, pu32, DMB_SY,
4876 "and %w[uNew], %w[uNew], %w[uVal]\n\t",
4877 "and %[uNew], %[uNew], %[uVal]\n\t",
4878 [uVal] "r" (u32));
4879
4880# else
4881# error "Port me"
4882# endif
4883}
4884#endif
4885
4886
4887/**
4888 * Atomically AND an unsigned 32-bit value, ordered, extended version.
4889 *
4890 * @returns Old value.
4891 * @param pu32 Pointer to the variable to AND @a u32 with.
4892 * @param u32 The value to AND @a *pu32 with.
4893 */
4894DECLINLINE(uint32_t) ASMAtomicAndExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4895{
4896#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4897 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicAndEx32, pu32, DMB_SY,
4898 "and %w[uNew], %w[uOld], %w[uVal]\n\t",
4899 "and %[uNew], %[uOld], %[uVal]\n\t",
4900 [uVal] "r" (u32));
4901 return u32OldRet;
4902
4903#else
4904 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4905 uint32_t u32New;
4906 do
4907 u32New = u32RetOld & u32;
4908 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
4909 return u32RetOld;
4910#endif
4911}
4912
4913
4914/**
4915 * Atomically And a signed 32-bit value, ordered.
4916 *
4917 * @param pi32 Pointer to the pointer variable to AND i32 with.
4918 * @param i32 The value to AND *pi32 with.
4919 *
4920 * @remarks x86: Requires a 386 or later.
4921 */
4922DECLINLINE(void) ASMAtomicAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4923{
4924 ASMAtomicAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4925}
4926
4927
4928/**
4929 * Atomically And an unsigned 64-bit value, ordered.
4930 *
4931 * @param pu64 Pointer to the pointer variable to AND u64 with.
4932 * @param u64 The value to AND *pu64 with.
4933 *
4934 * @remarks x86: Requires a Pentium or later.
4935 */
4936#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4937DECLASM(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4938#else
4939DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4940{
4941# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4942 _InterlockedAnd64((__int64 volatile RT_FAR *)pu64, u64);
4943
4944# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4945 __asm__ __volatile__("lock; andq %1, %0\n\t"
4946 : "=m" (*pu64)
4947 : "r" (u64)
4948 , "m" (*pu64)
4949 : "cc");
4950
4951# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4952 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicAndU64, pu64, DMB_SY,
4953 "and %[uNew], %[uNew], %[uVal]\n\t"
4954 ,
4955 "and %[uNew], %[uNew], %[uVal]\n\t"
4956 "and %H[uNew], %H[uNew], %H[uVal]\n\t",
4957 [uVal] "r" (u64));
4958
4959# else
4960 for (;;)
4961 {
4962 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4963 uint64_t u64New = u64Old & u64;
4964 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4965 break;
4966 ASMNopPause();
4967 }
4968# endif
4969}
4970#endif
4971
4972
4973/**
4974 * Atomically And a signed 64-bit value, ordered.
4975 *
4976 * @param pi64 Pointer to the pointer variable to AND i64 with.
4977 * @param i64 The value to AND *pi64 with.
4978 *
4979 * @remarks x86: Requires a Pentium or later.
4980 */
4981DECLINLINE(void) ASMAtomicAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4982{
4983 ASMAtomicAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4984}
4985
4986
4987/**
4988 * Atomically XOR an unsigned 32-bit value and a memory location, ordered.
4989 *
4990 * @param pu32 Pointer to the variable to XOR @a u32 with.
4991 * @param u32 The value to XOR @a *pu32 with.
4992 *
4993 * @remarks x86: Requires a 386 or later.
4994 */
4995#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4996RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4997#else
4998DECLINLINE(void) ASMAtomicXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4999{
5000# if RT_INLINE_ASM_USES_INTRIN
5001 _InterlockedXor((long volatile RT_FAR *)pu32, u32);
5002
5003# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5004# if RT_INLINE_ASM_GNU_STYLE
5005 __asm__ __volatile__("lock; xorl %1, %0\n\t"
5006 : "=m" (*pu32)
5007 : "ir" (u32)
5008 , "m" (*pu32)
5009 : "cc");
5010# else
5011 __asm
5012 {
5013 mov eax, [u32]
5014# ifdef RT_ARCH_AMD64
5015 mov rdx, [pu32]
5016 lock xor [rdx], eax
5017# else
5018 mov edx, [pu32]
5019 lock xor [edx], eax
5020# endif
5021 }
5022# endif
5023
5024# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5025 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicXor32, pu32, DMB_SY,
5026 "eor %w[uNew], %w[uNew], %w[uVal]\n\t",
5027 "eor %[uNew], %[uNew], %[uVal]\n\t",
5028 [uVal] "r" (u32));
5029
5030# else
5031# error "Port me"
5032# endif
5033}
5034#endif
5035
5036
5037/**
5038 * Atomically XOR an unsigned 32-bit value and a memory location, ordered,
5039 * extended version (for bitmaps).
5040 *
5041 * @returns Old value.
5042 * @param pu32 Pointer to the variable to XOR @a u32 with.
5043 * @param u32 The value to XOR @a *pu32 with.
5044 */
5045DECLINLINE(uint32_t) ASMAtomicXorExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5046{
5047#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5048 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicXorEx32, pu32, DMB_SY,
5049 "eor %w[uNew], %w[uOld], %w[uVal]\n\t",
5050 "eor %[uNew], %[uOld], %[uVal]\n\t",
5051 [uVal] "r" (u32));
5052 return u32OldRet;
5053
5054#else
5055 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
5056 uint32_t u32New;
5057 do
5058 u32New = u32RetOld ^ u32;
5059 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
5060 return u32RetOld;
5061#endif
5062}
5063
5064
5065/**
5066 * Atomically XOR a signed 32-bit value, ordered.
5067 *
5068 * @param pi32 Pointer to the variable to XOR i32 with.
5069 * @param i32 The value to XOR *pi32 with.
5070 *
5071 * @remarks x86: Requires a 386 or later.
5072 */
5073DECLINLINE(void) ASMAtomicXorS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5074{
5075 ASMAtomicXorU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5076}
5077
5078
5079/**
5080 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
5081 *
5082 * @param pu32 Pointer to the pointer variable to OR u32 with.
5083 * @param u32 The value to OR *pu32 with.
5084 *
5085 * @remarks x86: Requires a 386 or later.
5086 */
5087#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5088RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
5089#else
5090DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5091{
5092# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5093# if RT_INLINE_ASM_GNU_STYLE
5094 __asm__ __volatile__("orl %1, %0\n\t"
5095 : "=m" (*pu32)
5096 : "ir" (u32)
5097 , "m" (*pu32)
5098 : "cc");
5099# else
5100 __asm
5101 {
5102 mov eax, [u32]
5103# ifdef RT_ARCH_AMD64
5104 mov rdx, [pu32]
5105 or [rdx], eax
5106# else
5107 mov edx, [pu32]
5108 or [edx], eax
5109# endif
5110 }
5111# endif
5112
5113# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5114 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoOrU32, pu32, NO_BARRIER,
5115 "orr %w[uNew], %w[uNew], %w[uVal]\n\t",
5116 "orr %[uNew], %[uNew], %[uVal]\n\t",
5117 [uVal] "r" (u32));
5118
5119# else
5120# error "Port me"
5121# endif
5122}
5123#endif
5124
5125
5126/**
5127 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe,
5128 * extended version (for bitmap fallback).
5129 *
5130 * @returns Old value.
5131 * @param pu32 Pointer to the variable to OR @a u32 with.
5132 * @param u32 The value to OR @a *pu32 with.
5133 */
5134DECLINLINE(uint32_t) ASMAtomicUoOrExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5135{
5136#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5137 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoOrExU32, pu32, NO_BARRIER,
5138 "orr %w[uNew], %w[uOld], %w[uVal]\n\t",
5139 "orr %[uNew], %[uOld], %[uVal]\n\t",
5140 [uVal] "r" (u32));
5141 return u32OldRet;
5142
5143#else
5144 return ASMAtomicOrExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
5145#endif
5146}
5147
5148
5149/**
5150 * Atomically OR a signed 32-bit value, unordered.
5151 *
5152 * @param pi32 Pointer to the pointer variable to OR u32 with.
5153 * @param i32 The value to OR *pu32 with.
5154 *
5155 * @remarks x86: Requires a 386 or later.
5156 */
5157DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5158{
5159 ASMAtomicUoOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5160}
5161
5162
5163/**
5164 * Atomically OR an unsigned 64-bit value, unordered.
5165 *
5166 * @param pu64 Pointer to the pointer variable to OR u64 with.
5167 * @param u64 The value to OR *pu64 with.
5168 *
5169 * @remarks x86: Requires a Pentium or later.
5170 */
5171#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5172DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
5173#else
5174DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
5175{
5176# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5177 __asm__ __volatile__("orq %1, %q0\n\t"
5178 : "=m" (*pu64)
5179 : "r" (u64)
5180 , "m" (*pu64)
5181 : "cc");
5182
5183# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5184 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicUoOrU64, pu64, NO_BARRIER,
5185 "orr %[uNew], %[uNew], %[uVal]\n\t"
5186 ,
5187 "orr %[uNew], %[uNew], %[uVal]\n\t"
5188 "orr %H[uNew], %H[uNew], %H[uVal]\n\t",
5189 [uVal] "r" (u64));
5190
5191# else
5192 for (;;)
5193 {
5194 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
5195 uint64_t u64New = u64Old | u64;
5196 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
5197 break;
5198 ASMNopPause();
5199 }
5200# endif
5201}
5202#endif
5203
5204
5205/**
5206 * Atomically Or a signed 64-bit value, unordered.
5207 *
5208 * @param pi64 Pointer to the pointer variable to OR u64 with.
5209 * @param i64 The value to OR *pu64 with.
5210 *
5211 * @remarks x86: Requires a Pentium or later.
5212 */
5213DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
5214{
5215 ASMAtomicUoOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
5216}
5217
5218
5219/**
5220 * Atomically And an unsigned 32-bit value, unordered.
5221 *
5222 * @param pu32 Pointer to the pointer variable to AND u32 with.
5223 * @param u32 The value to AND *pu32 with.
5224 *
5225 * @remarks x86: Requires a 386 or later.
5226 */
5227#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5228RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
5229#else
5230DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5231{
5232# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5233# if RT_INLINE_ASM_GNU_STYLE
5234 __asm__ __volatile__("andl %1, %0\n\t"
5235 : "=m" (*pu32)
5236 : "ir" (u32)
5237 , "m" (*pu32)
5238 : "cc");
5239# else
5240 __asm
5241 {
5242 mov eax, [u32]
5243# ifdef RT_ARCH_AMD64
5244 mov rdx, [pu32]
5245 and [rdx], eax
5246# else
5247 mov edx, [pu32]
5248 and [edx], eax
5249# endif
5250 }
5251# endif
5252
5253# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5254 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoAnd32, pu32, NO_BARRIER,
5255 "and %w[uNew], %w[uNew], %w[uVal]\n\t",
5256 "and %[uNew], %[uNew], %[uVal]\n\t",
5257 [uVal] "r" (u32));
5258
5259# else
5260# error "Port me"
5261# endif
5262}
5263#endif
5264
5265
5266/**
5267 * Atomically AND an unsigned 32-bit value, unordered, extended version (for
5268 * bitmap fallback).
5269 *
5270 * @returns Old value.
5271 * @param pu32 Pointer to the pointer to AND @a u32 with.
5272 * @param u32 The value to AND @a *pu32 with.
5273 */
5274DECLINLINE(uint32_t) ASMAtomicUoAndExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5275{
5276#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5277 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoAndEx32, pu32, NO_BARRIER,
5278 "and %w[uNew], %w[uOld], %w[uVal]\n\t",
5279 "and %[uNew], %[uOld], %[uVal]\n\t",
5280 [uVal] "r" (u32));
5281 return u32OldRet;
5282
5283#else
5284 return ASMAtomicAndExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
5285#endif
5286}
5287
5288
5289/**
5290 * Atomically And a signed 32-bit value, unordered.
5291 *
5292 * @param pi32 Pointer to the pointer variable to AND i32 with.
5293 * @param i32 The value to AND *pi32 with.
5294 *
5295 * @remarks x86: Requires a 386 or later.
5296 */
5297DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5298{
5299 ASMAtomicUoAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5300}
5301
5302
5303/**
5304 * Atomically And an unsigned 64-bit value, unordered.
5305 *
5306 * @param pu64 Pointer to the pointer variable to AND u64 with.
5307 * @param u64 The value to AND *pu64 with.
5308 *
5309 * @remarks x86: Requires a Pentium or later.
5310 */
5311#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5312DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
5313#else
5314DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
5315{
5316# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5317 __asm__ __volatile__("andq %1, %0\n\t"
5318 : "=m" (*pu64)
5319 : "r" (u64)
5320 , "m" (*pu64)
5321 : "cc");
5322
5323# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5324 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicUoAndU64, pu64, NO_BARRIER,
5325 "and %[uNew], %[uNew], %[uVal]\n\t"
5326 ,
5327 "and %[uNew], %[uNew], %[uVal]\n\t"
5328 "and %H[uNew], %H[uNew], %H[uVal]\n\t",
5329 [uVal] "r" (u64));
5330
5331# else
5332 for (;;)
5333 {
5334 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
5335 uint64_t u64New = u64Old & u64;
5336 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
5337 break;
5338 ASMNopPause();
5339 }
5340# endif
5341}
5342#endif
5343
5344
5345/**
5346 * Atomically And a signed 64-bit value, unordered.
5347 *
5348 * @param pi64 Pointer to the pointer variable to AND i64 with.
5349 * @param i64 The value to AND *pi64 with.
5350 *
5351 * @remarks x86: Requires a Pentium or later.
5352 */
5353DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
5354{
5355 ASMAtomicUoAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
5356}
5357
5358
5359/**
5360 * Atomically XOR an unsigned 32-bit value, unordered but interrupt safe.
5361 *
5362 * @param pu32 Pointer to the variable to XOR @a u32 with.
5363 * @param u32 The value to OR @a *pu32 with.
5364 *
5365 * @remarks x86: Requires a 386 or later.
5366 */
5367#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5368RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
5369#else
5370DECLINLINE(void) ASMAtomicUoXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5371{
5372# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5373# if RT_INLINE_ASM_GNU_STYLE
5374 __asm__ __volatile__("xorl %1, %0\n\t"
5375 : "=m" (*pu32)
5376 : "ir" (u32)
5377 , "m" (*pu32)
5378 : "cc");
5379# else
5380 __asm
5381 {
5382 mov eax, [u32]
5383# ifdef RT_ARCH_AMD64
5384 mov rdx, [pu32]
5385 xor [rdx], eax
5386# else
5387 mov edx, [pu32]
5388 xor [edx], eax
5389# endif
5390 }
5391# endif
5392
5393# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5394 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoXorU32, pu32, NO_BARRIER,
5395 "eor %w[uNew], %w[uNew], %w[uVal]\n\t",
5396 "eor %[uNew], %[uNew], %[uVal]\n\t",
5397 [uVal] "r" (u32));
5398
5399# else
5400# error "Port me"
5401# endif
5402}
5403#endif
5404
5405
5406/**
5407 * Atomically XOR an unsigned 32-bit value, unordered but interrupt safe,
5408 * extended version (for bitmap fallback).
5409 *
5410 * @returns Old value.
5411 * @param pu32 Pointer to the variable to XOR @a u32 with.
5412 * @param u32 The value to OR @a *pu32 with.
5413 */
5414DECLINLINE(uint32_t) ASMAtomicUoXorExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5415{
5416#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5417 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoXorExU32, pu32, NO_BARRIER,
5418 "eor %w[uNew], %w[uOld], %w[uVal]\n\t",
5419 "eor %[uNew], %[uOld], %[uVal]\n\t",
5420 [uVal] "r" (u32));
5421 return u32OldRet;
5422
5423#else
5424 return ASMAtomicXorExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
5425#endif
5426}
5427
5428
5429/**
5430 * Atomically XOR a signed 32-bit value, unordered.
5431 *
5432 * @param pi32 Pointer to the variable to XOR @a u32 with.
5433 * @param i32 The value to XOR @a *pu32 with.
5434 *
5435 * @remarks x86: Requires a 386 or later.
5436 */
5437DECLINLINE(void) ASMAtomicUoXorS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5438{
5439 ASMAtomicUoXorU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5440}
5441
5442
5443/**
5444 * Atomically increment an unsigned 32-bit value, unordered.
5445 *
5446 * @returns the new value.
5447 * @param pu32 Pointer to the variable to increment.
5448 *
5449 * @remarks x86: Requires a 486 or later.
5450 */
5451#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5452RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
5453#else
5454DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
5455{
5456# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5457 uint32_t u32;
5458# if RT_INLINE_ASM_GNU_STYLE
5459 __asm__ __volatile__("xaddl %0, %1\n\t"
5460 : "=r" (u32)
5461 , "=m" (*pu32)
5462 : "0" (1)
5463 , "m" (*pu32)
5464 : "memory" /** @todo why 'memory'? */
5465 , "cc");
5466 return u32 + 1;
5467# else
5468 __asm
5469 {
5470 mov eax, 1
5471# ifdef RT_ARCH_AMD64
5472 mov rdx, [pu32]
5473 xadd [rdx], eax
5474# else
5475 mov edx, [pu32]
5476 xadd [edx], eax
5477# endif
5478 mov u32, eax
5479 }
5480 return u32 + 1;
5481# endif
5482
5483# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5484 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoIncU32, pu32, NO_BARRIER,
5485 "add %w[uNew], %w[uNew], #1\n\t",
5486 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
5487 "X" (0) /* dummy */);
5488 return u32NewRet;
5489
5490# else
5491# error "Port me"
5492# endif
5493}
5494#endif
5495
5496
5497/**
5498 * Atomically decrement an unsigned 32-bit value, unordered.
5499 *
5500 * @returns the new value.
5501 * @param pu32 Pointer to the variable to decrement.
5502 *
5503 * @remarks x86: Requires a 486 or later.
5504 */
5505#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5506RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
5507#else
5508DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
5509{
5510# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5511 uint32_t u32;
5512# if RT_INLINE_ASM_GNU_STYLE
5513 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
5514 : "=r" (u32)
5515 , "=m" (*pu32)
5516 : "0" (-1)
5517 , "m" (*pu32)
5518 : "memory"
5519 , "cc");
5520 return u32 - 1;
5521# else
5522 __asm
5523 {
5524 mov eax, -1
5525# ifdef RT_ARCH_AMD64
5526 mov rdx, [pu32]
5527 xadd [rdx], eax
5528# else
5529 mov edx, [pu32]
5530 xadd [edx], eax
5531# endif
5532 mov u32, eax
5533 }
5534 return u32 - 1;
5535# endif
5536
5537# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5538 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoDecU32, pu32, NO_BARRIER,
5539 "sub %w[uNew], %w[uNew], #1\n\t",
5540 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
5541 "X" (0) /* dummy */);
5542 return u32NewRet;
5543
5544# else
5545# error "Port me"
5546# endif
5547}
5548#endif
5549
5550
5551/** @def RT_ASM_PAGE_SIZE
5552 * We try avoid dragging in iprt/param.h here.
5553 * @internal
5554 */
5555#if defined(RT_ARCH_SPARC64)
5556# define RT_ASM_PAGE_SIZE 0x2000
5557# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
5558# if PAGE_SIZE != 0x2000
5559# error "PAGE_SIZE is not 0x2000!"
5560# endif
5561# endif
5562#elif defined(RT_ARCH_ARM64)
5563# define RT_ASM_PAGE_SIZE 0x4000
5564# if defined(PAGE_SIZE) && !defined(NT_INCLUDED) && !defined(_MACH_ARM_VM_PARAM_H_)
5565# if PAGE_SIZE != 0x4000
5566# error "PAGE_SIZE is not 0x4000!"
5567# endif
5568# endif
5569#else
5570# define RT_ASM_PAGE_SIZE 0x1000
5571# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
5572# if PAGE_SIZE != 0x1000
5573# error "PAGE_SIZE is not 0x1000!"
5574# endif
5575# endif
5576#endif
5577
5578/**
5579 * Zeros a 4K memory page.
5580 *
5581 * @param pv Pointer to the memory block. This must be page aligned.
5582 */
5583#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5584RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_PROTO;
5585# else
5586DECLINLINE(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_DEF
5587{
5588# if RT_INLINE_ASM_USES_INTRIN
5589# ifdef RT_ARCH_AMD64
5590 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
5591# else
5592 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
5593# endif
5594
5595# elif RT_INLINE_ASM_GNU_STYLE
5596 RTCCUINTREG uDummy;
5597# ifdef RT_ARCH_AMD64
5598 __asm__ __volatile__("rep stosq"
5599 : "=D" (pv),
5600 "=c" (uDummy)
5601 : "0" (pv),
5602 "c" (RT_ASM_PAGE_SIZE >> 3),
5603 "a" (0)
5604 : "memory");
5605# else
5606 __asm__ __volatile__("rep stosl"
5607 : "=D" (pv),
5608 "=c" (uDummy)
5609 : "0" (pv),
5610 "c" (RT_ASM_PAGE_SIZE >> 2),
5611 "a" (0)
5612 : "memory");
5613# endif
5614# else
5615 __asm
5616 {
5617# ifdef RT_ARCH_AMD64
5618 xor rax, rax
5619 mov ecx, 0200h
5620 mov rdi, [pv]
5621 rep stosq
5622# else
5623 xor eax, eax
5624 mov ecx, 0400h
5625 mov edi, [pv]
5626 rep stosd
5627# endif
5628 }
5629# endif
5630}
5631# endif
5632
5633
5634/**
5635 * Zeros a memory block with a 32-bit aligned size.
5636 *
5637 * @param pv Pointer to the memory block.
5638 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5639 */
5640#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5641RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
5642#else
5643DECLINLINE(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5644{
5645# if RT_INLINE_ASM_USES_INTRIN
5646# ifdef RT_ARCH_AMD64
5647 if (!(cb & 7))
5648 __stosq((unsigned __int64 RT_FAR *)pv, 0, cb / 8);
5649 else
5650# endif
5651 __stosd((unsigned long RT_FAR *)pv, 0, cb / 4);
5652
5653# elif RT_INLINE_ASM_GNU_STYLE
5654 __asm__ __volatile__("rep stosl"
5655 : "=D" (pv),
5656 "=c" (cb)
5657 : "0" (pv),
5658 "1" (cb >> 2),
5659 "a" (0)
5660 : "memory");
5661# else
5662 __asm
5663 {
5664 xor eax, eax
5665# ifdef RT_ARCH_AMD64
5666 mov rcx, [cb]
5667 shr rcx, 2
5668 mov rdi, [pv]
5669# else
5670 mov ecx, [cb]
5671 shr ecx, 2
5672 mov edi, [pv]
5673# endif
5674 rep stosd
5675 }
5676# endif
5677}
5678#endif
5679
5680
5681/**
5682 * Fills a memory block with a 32-bit aligned size.
5683 *
5684 * @param pv Pointer to the memory block.
5685 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5686 * @param u32 The value to fill with.
5687 */
5688#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5689RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_PROTO;
5690#else
5691DECLINLINE(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
5692{
5693# if RT_INLINE_ASM_USES_INTRIN
5694# ifdef RT_ARCH_AMD64
5695 if (!(cb & 7))
5696 __stosq((unsigned __int64 RT_FAR *)pv, RT_MAKE_U64(u32, u32), cb / 8);
5697 else
5698# endif
5699 __stosd((unsigned long RT_FAR *)pv, u32, cb / 4);
5700
5701# elif RT_INLINE_ASM_GNU_STYLE
5702 __asm__ __volatile__("rep stosl"
5703 : "=D" (pv),
5704 "=c" (cb)
5705 : "0" (pv),
5706 "1" (cb >> 2),
5707 "a" (u32)
5708 : "memory");
5709# else
5710 __asm
5711 {
5712# ifdef RT_ARCH_AMD64
5713 mov rcx, [cb]
5714 shr rcx, 2
5715 mov rdi, [pv]
5716# else
5717 mov ecx, [cb]
5718 shr ecx, 2
5719 mov edi, [pv]
5720# endif
5721 mov eax, [u32]
5722 rep stosd
5723 }
5724# endif
5725}
5726#endif
5727
5728
5729/**
5730 * Checks if a memory block is all zeros.
5731 *
5732 * @returns Pointer to the first non-zero byte.
5733 * @returns NULL if all zero.
5734 *
5735 * @param pv Pointer to the memory block.
5736 * @param cb Number of bytes in the block.
5737 */
5738#if !defined(RDESKTOP) && (!defined(RT_OS_LINUX) || !defined(__KERNEL__))
5739DECLASM(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
5740#else
5741DECLINLINE(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5742{
5743/** @todo replace with ASMMemFirstNonZero-generic.cpp in kernel modules. */
5744 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
5745 for (; cb; cb--, pb++)
5746 if (RT_LIKELY(*pb == 0))
5747 { /* likely */ }
5748 else
5749 return (void RT_FAR *)pb;
5750 return NULL;
5751}
5752#endif
5753
5754
5755/**
5756 * Checks if a memory block is all zeros.
5757 *
5758 * @returns true if zero, false if not.
5759 *
5760 * @param pv Pointer to the memory block.
5761 * @param cb Number of bytes in the block.
5762 *
5763 * @sa ASMMemFirstNonZero
5764 */
5765DECLINLINE(bool) ASMMemIsZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5766{
5767 return ASMMemFirstNonZero(pv, cb) == NULL;
5768}
5769
5770
5771/**
5772 * Checks if a memory page is all zeros.
5773 *
5774 * @returns true / false.
5775 *
5776 * @param pvPage Pointer to the page. Must be aligned on 16 byte
5777 * boundary
5778 */
5779DECLINLINE(bool) ASMMemIsZeroPage(void const RT_FAR *pvPage) RT_NOTHROW_DEF
5780{
5781# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
5782 union { RTCCUINTREG r; bool f; } uAX;
5783 RTCCUINTREG xCX, xDI;
5784 Assert(!((uintptr_t)pvPage & 15));
5785 __asm__ __volatile__("repe; "
5786# ifdef RT_ARCH_AMD64
5787 "scasq\n\t"
5788# else
5789 "scasl\n\t"
5790# endif
5791 "setnc %%al\n\t"
5792 : "=&c" (xCX)
5793 , "=&D" (xDI)
5794 , "=&a" (uAX.r)
5795 : "mr" (pvPage)
5796# ifdef RT_ARCH_AMD64
5797 , "0" (RT_ASM_PAGE_SIZE/8)
5798# else
5799 , "0" (RT_ASM_PAGE_SIZE/4)
5800# endif
5801 , "1" (pvPage)
5802 , "2" (0)
5803 : "cc");
5804 return uAX.f;
5805# else
5806 uintptr_t const RT_FAR *puPtr = (uintptr_t const RT_FAR *)pvPage;
5807 size_t cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
5808 Assert(!((uintptr_t)pvPage & 15));
5809 for (;;)
5810 {
5811 if (puPtr[0]) return false;
5812 if (puPtr[4]) return false;
5813
5814 if (puPtr[2]) return false;
5815 if (puPtr[6]) return false;
5816
5817 if (puPtr[1]) return false;
5818 if (puPtr[5]) return false;
5819
5820 if (puPtr[3]) return false;
5821 if (puPtr[7]) return false;
5822
5823 if (!--cLeft)
5824 return true;
5825 puPtr += 8;
5826 }
5827# endif
5828}
5829
5830
5831/**
5832 * Checks if a memory block is filled with the specified byte, returning the
5833 * first mismatch.
5834 *
5835 * This is sort of an inverted memchr.
5836 *
5837 * @returns Pointer to the byte which doesn't equal u8.
5838 * @returns NULL if all equal to u8.
5839 *
5840 * @param pv Pointer to the memory block.
5841 * @param cb Number of bytes in the block.
5842 * @param u8 The value it's supposed to be filled with.
5843 *
5844 * @remarks No alignment requirements.
5845 */
5846#if (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
5847 && (!defined(RT_OS_FREEBSD) || !defined(_KERNEL))
5848DECLASM(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_PROTO;
5849#else
5850DECLINLINE(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
5851{
5852/** @todo replace with ASMMemFirstMismatchingU8-generic.cpp in kernel modules. */
5853 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
5854 for (; cb; cb--, pb++)
5855 if (RT_LIKELY(*pb == u8))
5856 { /* likely */ }
5857 else
5858 return (void *)pb;
5859 return NULL;
5860}
5861#endif
5862
5863
5864/**
5865 * Checks if a memory block is filled with the specified byte.
5866 *
5867 * @returns true if all matching, false if not.
5868 *
5869 * @param pv Pointer to the memory block.
5870 * @param cb Number of bytes in the block.
5871 * @param u8 The value it's supposed to be filled with.
5872 *
5873 * @remarks No alignment requirements.
5874 */
5875DECLINLINE(bool) ASMMemIsAllU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
5876{
5877 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
5878}
5879
5880
5881/**
5882 * Checks if a memory block is filled with the specified 32-bit value.
5883 *
5884 * This is a sort of inverted memchr.
5885 *
5886 * @returns Pointer to the first value which doesn't equal u32.
5887 * @returns NULL if all equal to u32.
5888 *
5889 * @param pv Pointer to the memory block.
5890 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5891 * @param u32 The value it's supposed to be filled with.
5892 */
5893DECLINLINE(uint32_t RT_FAR *) ASMMemFirstMismatchingU32(void const RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
5894{
5895/** @todo rewrite this in inline assembly? */
5896 uint32_t const RT_FAR *pu32 = (uint32_t const RT_FAR *)pv;
5897 for (; cb; cb -= 4, pu32++)
5898 if (RT_LIKELY(*pu32 == u32))
5899 { /* likely */ }
5900 else
5901 return (uint32_t RT_FAR *)pu32;
5902 return NULL;
5903}
5904
5905
5906/**
5907 * Probes a byte pointer for read access.
5908 *
5909 * While the function will not fault if the byte is not read accessible,
5910 * the idea is to do this in a safe place like before acquiring locks
5911 * and such like.
5912 *
5913 * Also, this functions guarantees that an eager compiler is not going
5914 * to optimize the probing away.
5915 *
5916 * @param pvByte Pointer to the byte.
5917 */
5918#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5919RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_PROTO;
5920#else
5921DECLINLINE(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_DEF
5922{
5923# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5924 uint8_t u8;
5925# if RT_INLINE_ASM_GNU_STYLE
5926 __asm__ __volatile__("movb %1, %0\n\t"
5927 : "=q" (u8)
5928 : "m" (*(const uint8_t *)pvByte));
5929# else
5930 __asm
5931 {
5932# ifdef RT_ARCH_AMD64
5933 mov rax, [pvByte]
5934 mov al, [rax]
5935# else
5936 mov eax, [pvByte]
5937 mov al, [eax]
5938# endif
5939 mov [u8], al
5940 }
5941# endif
5942 return u8;
5943
5944# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5945 uint32_t u32;
5946 __asm__ __volatile__(".Lstart_ASMProbeReadByte_%=:\n\t"
5947# if defined(RT_ARCH_ARM64)
5948 "ldxrb %w[uDst], %[pMem]\n\t"
5949# else
5950 "ldrexb %[uDst], %[pMem]\n\t"
5951# endif
5952 : [uDst] "=&r" (u32)
5953 : [pMem] "Q" (*(uint8_t const *)pvByte));
5954 return (uint8_t)u32;
5955
5956# else
5957# error "Port me"
5958# endif
5959}
5960#endif
5961
5962/**
5963 * Probes a buffer for read access page by page.
5964 *
5965 * While the function will fault if the buffer is not fully read
5966 * accessible, the idea is to do this in a safe place like before
5967 * acquiring locks and such like.
5968 *
5969 * Also, this functions guarantees that an eager compiler is not going
5970 * to optimize the probing away.
5971 *
5972 * @param pvBuf Pointer to the buffer.
5973 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
5974 */
5975DECLINLINE(void) ASMProbeReadBuffer(const void RT_FAR *pvBuf, size_t cbBuf) RT_NOTHROW_DEF
5976{
5977 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5978 /* the first byte */
5979 const uint8_t RT_FAR *pu8 = (const uint8_t RT_FAR *)pvBuf;
5980 ASMProbeReadByte(pu8);
5981
5982 /* the pages in between pages. */
5983 while (cbBuf > RT_ASM_PAGE_SIZE)
5984 {
5985 ASMProbeReadByte(pu8);
5986 cbBuf -= RT_ASM_PAGE_SIZE;
5987 pu8 += RT_ASM_PAGE_SIZE;
5988 }
5989
5990 /* the last byte */
5991 ASMProbeReadByte(pu8 + cbBuf - 1);
5992}
5993
5994
5995/**
5996 * Reverse the byte order of the given 16-bit integer.
5997 *
5998 * @returns Revert
5999 * @param u16 16-bit integer value.
6000 */
6001#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6002RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_PROTO;
6003#else
6004DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_DEF
6005{
6006# if RT_INLINE_ASM_USES_INTRIN
6007 return _byteswap_ushort(u16);
6008
6009# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6010# if RT_INLINE_ASM_GNU_STYLE
6011 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16) : "cc");
6012# else
6013 _asm
6014 {
6015 mov ax, [u16]
6016 ror ax, 8
6017 mov [u16], ax
6018 }
6019# endif
6020 return u16;
6021
6022# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
6023 uint32_t u32Ret;
6024 __asm__ __volatile__(
6025# if defined(RT_ARCH_ARM64)
6026 "rev16 %w[uRet], %w[uVal]\n\t"
6027# else
6028 "rev16 %[uRet], %[uVal]\n\t"
6029# endif
6030 : [uRet] "=r" (u32Ret)
6031 : [uVal] "r" (u16));
6032 return (uint16_t)u32Ret;
6033
6034# else
6035# error "Port me"
6036# endif
6037}
6038#endif
6039
6040
6041/**
6042 * Reverse the byte order of the given 32-bit integer.
6043 *
6044 * @returns Revert
6045 * @param u32 32-bit integer value.
6046 */
6047#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6048RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_PROTO;
6049#else
6050DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_DEF
6051{
6052# if RT_INLINE_ASM_USES_INTRIN
6053 return _byteswap_ulong(u32);
6054
6055# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6056# if RT_INLINE_ASM_GNU_STYLE
6057 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
6058# else
6059 _asm
6060 {
6061 mov eax, [u32]
6062 bswap eax
6063 mov [u32], eax
6064 }
6065# endif
6066 return u32;
6067
6068# elif defined(RT_ARCH_ARM64)
6069 uint64_t u64Ret;
6070 __asm__ __volatile__("rev32 %[uRet], %[uVal]\n\t"
6071 : [uRet] "=r" (u64Ret)
6072 : [uVal] "r" ((uint64_t)u32));
6073 return (uint32_t)u64Ret;
6074
6075# elif defined(RT_ARCH_ARM32)
6076 __asm__ __volatile__("rev %[uRet], %[uVal]\n\t"
6077 : [uRet] "=r" (u32)
6078 : [uVal] "[uRet]" (u32));
6079 return u32;
6080
6081# else
6082# error "Port me"
6083# endif
6084}
6085#endif
6086
6087
6088/**
6089 * Reverse the byte order of the given 64-bit integer.
6090 *
6091 * @returns Revert
6092 * @param u64 64-bit integer value.
6093 */
6094DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64) RT_NOTHROW_DEF
6095{
6096#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
6097 return _byteswap_uint64(u64);
6098
6099# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
6100 __asm__ ("bswapq %0" : "=r" (u64) : "0" (u64));
6101 return u64;
6102
6103# elif defined(RT_ARCH_ARM64)
6104 __asm__ __volatile__("rev %[uRet], %[uVal]\n\t"
6105 : [uRet] "=r" (u64)
6106 : [uVal] "[uRet]" (u64));
6107 return u64;
6108
6109#else
6110 return (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
6111 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
6112#endif
6113}
6114
6115
6116
6117/** @defgroup grp_inline_bits Bit Operations
6118 * @{
6119 */
6120
6121
6122/**
6123 * Sets a bit in a bitmap.
6124 *
6125 * @param pvBitmap Pointer to the bitmap (little endian). This should be
6126 * 32-bit aligned.
6127 * @param iBit The bit to set.
6128 *
6129 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6130 * However, doing so will yield better performance as well as avoiding
6131 * traps accessing the last bits in the bitmap.
6132 */
6133#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6134RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6135#else
6136DECLINLINE(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6137{
6138# if RT_INLINE_ASM_USES_INTRIN
6139 _bittestandset((long RT_FAR *)pvBitmap, iBit);
6140
6141# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6142# if RT_INLINE_ASM_GNU_STYLE
6143 __asm__ __volatile__("btsl %1, %0"
6144 : "=m" (*(volatile long RT_FAR *)pvBitmap)
6145 : "Ir" (iBit)
6146 , "m" (*(volatile long RT_FAR *)pvBitmap)
6147 : "memory"
6148 , "cc");
6149# else
6150 __asm
6151 {
6152# ifdef RT_ARCH_AMD64
6153 mov rax, [pvBitmap]
6154 mov edx, [iBit]
6155 bts [rax], edx
6156# else
6157 mov eax, [pvBitmap]
6158 mov edx, [iBit]
6159 bts [eax], edx
6160# endif
6161 }
6162# endif
6163
6164# else
6165 int32_t offBitmap = iBit / 32;
6166 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6167 ASMAtomicUoOrU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6168# endif
6169}
6170#endif
6171
6172
6173/**
6174 * Atomically sets a bit in a bitmap, ordered.
6175 *
6176 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6177 * aligned, otherwise the memory access isn't atomic!
6178 * @param iBit The bit to set.
6179 *
6180 * @remarks x86: Requires a 386 or later.
6181 */
6182#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6183RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6184#else
6185DECLINLINE(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6186{
6187 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6188# if RT_INLINE_ASM_USES_INTRIN
6189 _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
6190# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6191# if RT_INLINE_ASM_GNU_STYLE
6192 __asm__ __volatile__("lock; btsl %1, %0"
6193 : "=m" (*(volatile long *)pvBitmap)
6194 : "Ir" (iBit)
6195 , "m" (*(volatile long *)pvBitmap)
6196 : "memory"
6197 , "cc");
6198# else
6199 __asm
6200 {
6201# ifdef RT_ARCH_AMD64
6202 mov rax, [pvBitmap]
6203 mov edx, [iBit]
6204 lock bts [rax], edx
6205# else
6206 mov eax, [pvBitmap]
6207 mov edx, [iBit]
6208 lock bts [eax], edx
6209# endif
6210 }
6211# endif
6212
6213# else
6214 ASMAtomicOrU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6215# endif
6216}
6217#endif
6218
6219
6220/**
6221 * Clears a bit in a bitmap.
6222 *
6223 * @param pvBitmap Pointer to the bitmap (little endian).
6224 * @param iBit The bit to clear.
6225 *
6226 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6227 * However, doing so will yield better performance as well as avoiding
6228 * traps accessing the last bits in the bitmap.
6229 */
6230#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6231RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6232#else
6233DECLINLINE(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6234{
6235# if RT_INLINE_ASM_USES_INTRIN
6236 _bittestandreset((long RT_FAR *)pvBitmap, iBit);
6237
6238# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6239# if RT_INLINE_ASM_GNU_STYLE
6240 __asm__ __volatile__("btrl %1, %0"
6241 : "=m" (*(volatile long RT_FAR *)pvBitmap)
6242 : "Ir" (iBit)
6243 , "m" (*(volatile long RT_FAR *)pvBitmap)
6244 : "memory"
6245 , "cc");
6246# else
6247 __asm
6248 {
6249# ifdef RT_ARCH_AMD64
6250 mov rax, [pvBitmap]
6251 mov edx, [iBit]
6252 btr [rax], edx
6253# else
6254 mov eax, [pvBitmap]
6255 mov edx, [iBit]
6256 btr [eax], edx
6257# endif
6258 }
6259# endif
6260
6261# else
6262 int32_t offBitmap = iBit / 32;
6263 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6264 ASMAtomicUoAndU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(~RT_BIT_32(iBit & 31)));
6265# endif
6266}
6267#endif
6268
6269
6270/**
6271 * Atomically clears a bit in a bitmap, ordered.
6272 *
6273 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6274 * aligned, otherwise the memory access isn't atomic!
6275 * @param iBit The bit to toggle set.
6276 *
6277 * @remarks No memory barrier, take care on smp.
6278 * @remarks x86: Requires a 386 or later.
6279 */
6280#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
6281RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6282#else
6283DECLINLINE(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6284{
6285 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6286# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6287# if RT_INLINE_ASM_GNU_STYLE
6288 __asm__ __volatile__("lock; btrl %1, %0"
6289 : "=m" (*(volatile long RT_FAR *)pvBitmap)
6290 : "Ir" (iBit)
6291 , "m" (*(volatile long RT_FAR *)pvBitmap)
6292 : "memory"
6293 , "cc");
6294# else
6295 __asm
6296 {
6297# ifdef RT_ARCH_AMD64
6298 mov rax, [pvBitmap]
6299 mov edx, [iBit]
6300 lock btr [rax], edx
6301# else
6302 mov eax, [pvBitmap]
6303 mov edx, [iBit]
6304 lock btr [eax], edx
6305# endif
6306 }
6307# endif
6308# else
6309 ASMAtomicAndU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(~RT_BIT_32(iBit & 31)));
6310# endif
6311}
6312#endif
6313
6314
6315/**
6316 * Toggles a bit in a bitmap.
6317 *
6318 * @param pvBitmap Pointer to the bitmap (little endian).
6319 * @param iBit The bit to toggle.
6320 *
6321 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6322 * However, doing so will yield better performance as well as avoiding
6323 * traps accessing the last bits in the bitmap.
6324 */
6325#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6326RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6327#else
6328DECLINLINE(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6329{
6330# if RT_INLINE_ASM_USES_INTRIN
6331 _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
6332# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6333# if RT_INLINE_ASM_GNU_STYLE
6334 __asm__ __volatile__("btcl %1, %0"
6335 : "=m" (*(volatile long *)pvBitmap)
6336 : "Ir" (iBit)
6337 , "m" (*(volatile long *)pvBitmap)
6338 : "memory"
6339 , "cc");
6340# else
6341 __asm
6342 {
6343# ifdef RT_ARCH_AMD64
6344 mov rax, [pvBitmap]
6345 mov edx, [iBit]
6346 btc [rax], edx
6347# else
6348 mov eax, [pvBitmap]
6349 mov edx, [iBit]
6350 btc [eax], edx
6351# endif
6352 }
6353# endif
6354# else
6355 int32_t offBitmap = iBit / 32;
6356 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6357 ASMAtomicUoXorU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6358# endif
6359}
6360#endif
6361
6362
6363/**
6364 * Atomically toggles a bit in a bitmap, ordered.
6365 *
6366 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6367 * aligned, otherwise the memory access isn't atomic!
6368 * @param iBit The bit to test and set.
6369 *
6370 * @remarks x86: Requires a 386 or later.
6371 */
6372#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
6373RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6374#else
6375DECLINLINE(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6376{
6377 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6378# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6379# if RT_INLINE_ASM_GNU_STYLE
6380 __asm__ __volatile__("lock; btcl %1, %0"
6381 : "=m" (*(volatile long RT_FAR *)pvBitmap)
6382 : "Ir" (iBit)
6383 , "m" (*(volatile long RT_FAR *)pvBitmap)
6384 : "memory"
6385 , "cc");
6386# else
6387 __asm
6388 {
6389# ifdef RT_ARCH_AMD64
6390 mov rax, [pvBitmap]
6391 mov edx, [iBit]
6392 lock btc [rax], edx
6393# else
6394 mov eax, [pvBitmap]
6395 mov edx, [iBit]
6396 lock btc [eax], edx
6397# endif
6398 }
6399# endif
6400# else
6401 ASMAtomicXorU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6402# endif
6403}
6404#endif
6405
6406
6407/**
6408 * Tests and sets a bit in a bitmap.
6409 *
6410 * @returns true if the bit was set.
6411 * @returns false if the bit was clear.
6412 *
6413 * @param pvBitmap Pointer to the bitmap (little endian).
6414 * @param iBit The bit to test and set.
6415 *
6416 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6417 * However, doing so will yield better performance as well as avoiding
6418 * traps accessing the last bits in the bitmap.
6419 */
6420#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6421RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6422#else
6423DECLINLINE(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6424{
6425 union { bool f; uint32_t u32; uint8_t u8; } rc;
6426# if RT_INLINE_ASM_USES_INTRIN
6427 rc.u8 = _bittestandset((long RT_FAR *)pvBitmap, iBit);
6428
6429# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6430# if RT_INLINE_ASM_GNU_STYLE
6431 __asm__ __volatile__("btsl %2, %1\n\t"
6432 "setc %b0\n\t"
6433 "andl $1, %0\n\t"
6434 : "=q" (rc.u32)
6435 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6436 : "Ir" (iBit)
6437 , "m" (*(volatile long RT_FAR *)pvBitmap)
6438 : "memory"
6439 , "cc");
6440# else
6441 __asm
6442 {
6443 mov edx, [iBit]
6444# ifdef RT_ARCH_AMD64
6445 mov rax, [pvBitmap]
6446 bts [rax], edx
6447# else
6448 mov eax, [pvBitmap]
6449 bts [eax], edx
6450# endif
6451 setc al
6452 and eax, 1
6453 mov [rc.u32], eax
6454 }
6455# endif
6456
6457# else
6458 int32_t offBitmap = iBit / 32;
6459 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6460 rc.u32 = RT_LE2H_U32(ASMAtomicUoOrExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6461 >> (iBit & 31);
6462 rc.u32 &= 1;
6463# endif
6464 return rc.f;
6465}
6466#endif
6467
6468
6469/**
6470 * Atomically tests and sets a bit in a bitmap, ordered.
6471 *
6472 * @returns true if the bit was set.
6473 * @returns false if the bit was clear.
6474 *
6475 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6476 * aligned, otherwise the memory access isn't atomic!
6477 * @param iBit The bit to set.
6478 *
6479 * @remarks x86: Requires a 386 or later.
6480 */
6481#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6482RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6483#else
6484DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6485{
6486 union { bool f; uint32_t u32; uint8_t u8; } rc;
6487 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6488# if RT_INLINE_ASM_USES_INTRIN
6489 rc.u8 = _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
6490# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6491# if RT_INLINE_ASM_GNU_STYLE
6492 __asm__ __volatile__("lock; btsl %2, %1\n\t"
6493 "setc %b0\n\t"
6494 "andl $1, %0\n\t"
6495 : "=q" (rc.u32)
6496 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6497 : "Ir" (iBit)
6498 , "m" (*(volatile long RT_FAR *)pvBitmap)
6499 : "memory"
6500 , "cc");
6501# else
6502 __asm
6503 {
6504 mov edx, [iBit]
6505# ifdef RT_ARCH_AMD64
6506 mov rax, [pvBitmap]
6507 lock bts [rax], edx
6508# else
6509 mov eax, [pvBitmap]
6510 lock bts [eax], edx
6511# endif
6512 setc al
6513 and eax, 1
6514 mov [rc.u32], eax
6515 }
6516# endif
6517
6518# else
6519 rc.u32 = RT_LE2H_U32(ASMAtomicOrExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6520 >> (iBit & 31);
6521 rc.u32 &= 1;
6522# endif
6523 return rc.f;
6524}
6525#endif
6526
6527
6528/**
6529 * Tests and clears a bit in a bitmap.
6530 *
6531 * @returns true if the bit was set.
6532 * @returns false if the bit was clear.
6533 *
6534 * @param pvBitmap Pointer to the bitmap (little endian).
6535 * @param iBit The bit to test and clear.
6536 *
6537 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6538 * However, doing so will yield better performance as well as avoiding
6539 * traps accessing the last bits in the bitmap.
6540 */
6541#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6542RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6543#else
6544DECLINLINE(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6545{
6546 union { bool f; uint32_t u32; uint8_t u8; } rc;
6547# if RT_INLINE_ASM_USES_INTRIN
6548 rc.u8 = _bittestandreset((long RT_FAR *)pvBitmap, iBit);
6549
6550# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6551# if RT_INLINE_ASM_GNU_STYLE
6552 __asm__ __volatile__("btrl %2, %1\n\t"
6553 "setc %b0\n\t"
6554 "andl $1, %0\n\t"
6555 : "=q" (rc.u32)
6556 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6557 : "Ir" (iBit)
6558 , "m" (*(volatile long RT_FAR *)pvBitmap)
6559 : "memory"
6560 , "cc");
6561# else
6562 __asm
6563 {
6564 mov edx, [iBit]
6565# ifdef RT_ARCH_AMD64
6566 mov rax, [pvBitmap]
6567 btr [rax], edx
6568# else
6569 mov eax, [pvBitmap]
6570 btr [eax], edx
6571# endif
6572 setc al
6573 and eax, 1
6574 mov [rc.u32], eax
6575 }
6576# endif
6577
6578# else
6579 int32_t offBitmap = iBit / 32;
6580 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6581 rc.u32 = RT_LE2H_U32(ASMAtomicUoAndExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(~RT_BIT_32(iBit & 31))))
6582 >> (iBit & 31);
6583 rc.u32 &= 1;
6584# endif
6585 return rc.f;
6586}
6587#endif
6588
6589
6590/**
6591 * Atomically tests and clears a bit in a bitmap, ordered.
6592 *
6593 * @returns true if the bit was set.
6594 * @returns false if the bit was clear.
6595 *
6596 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6597 * aligned, otherwise the memory access isn't atomic!
6598 * @param iBit The bit to test and clear.
6599 *
6600 * @remarks No memory barrier, take care on smp.
6601 * @remarks x86: Requires a 386 or later.
6602 */
6603#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6604RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6605#else
6606DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6607{
6608 union { bool f; uint32_t u32; uint8_t u8; } rc;
6609 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6610# if RT_INLINE_ASM_USES_INTRIN
6611 rc.u8 = _interlockedbittestandreset((long RT_FAR *)pvBitmap, iBit);
6612
6613# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6614# if RT_INLINE_ASM_GNU_STYLE
6615 __asm__ __volatile__("lock; btrl %2, %1\n\t"
6616 "setc %b0\n\t"
6617 "andl $1, %0\n\t"
6618 : "=q" (rc.u32)
6619 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6620 : "Ir" (iBit)
6621 , "m" (*(volatile long RT_FAR *)pvBitmap)
6622 : "memory"
6623 , "cc");
6624# else
6625 __asm
6626 {
6627 mov edx, [iBit]
6628# ifdef RT_ARCH_AMD64
6629 mov rax, [pvBitmap]
6630 lock btr [rax], edx
6631# else
6632 mov eax, [pvBitmap]
6633 lock btr [eax], edx
6634# endif
6635 setc al
6636 and eax, 1
6637 mov [rc.u32], eax
6638 }
6639# endif
6640
6641# else
6642 rc.u32 = RT_LE2H_U32(ASMAtomicAndExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(~RT_BIT_32(iBit & 31))))
6643 >> (iBit & 31);
6644 rc.u32 &= 1;
6645# endif
6646 return rc.f;
6647}
6648#endif
6649
6650
6651/**
6652 * Tests and toggles a bit in a bitmap.
6653 *
6654 * @returns true if the bit was set.
6655 * @returns false if the bit was clear.
6656 *
6657 * @param pvBitmap Pointer to the bitmap (little endian).
6658 * @param iBit The bit to test and toggle.
6659 *
6660 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6661 * However, doing so will yield better performance as well as avoiding
6662 * traps accessing the last bits in the bitmap.
6663 */
6664#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6665RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6666#else
6667DECLINLINE(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6668{
6669 union { bool f; uint32_t u32; uint8_t u8; } rc;
6670# if RT_INLINE_ASM_USES_INTRIN
6671 rc.u8 = _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
6672
6673# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6674# if RT_INLINE_ASM_GNU_STYLE
6675 __asm__ __volatile__("btcl %2, %1\n\t"
6676 "setc %b0\n\t"
6677 "andl $1, %0\n\t"
6678 : "=q" (rc.u32)
6679 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6680 : "Ir" (iBit)
6681 , "m" (*(volatile long RT_FAR *)pvBitmap)
6682 : "memory"
6683 , "cc");
6684# else
6685 __asm
6686 {
6687 mov edx, [iBit]
6688# ifdef RT_ARCH_AMD64
6689 mov rax, [pvBitmap]
6690 btc [rax], edx
6691# else
6692 mov eax, [pvBitmap]
6693 btc [eax], edx
6694# endif
6695 setc al
6696 and eax, 1
6697 mov [rc.u32], eax
6698 }
6699# endif
6700
6701# else
6702 int32_t offBitmap = iBit / 32;
6703 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6704 rc.u32 = RT_LE2H_U32(ASMAtomicUoXorExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6705 >> (iBit & 31);
6706 rc.u32 &= 1;
6707# endif
6708 return rc.f;
6709}
6710#endif
6711
6712
6713/**
6714 * Atomically tests and toggles a bit in a bitmap, ordered.
6715 *
6716 * @returns true if the bit was set.
6717 * @returns false if the bit was clear.
6718 *
6719 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6720 * aligned, otherwise the memory access isn't atomic!
6721 * @param iBit The bit to test and toggle.
6722 *
6723 * @remarks x86: Requires a 386 or later.
6724 */
6725#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
6726RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6727#else
6728DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6729{
6730 union { bool f; uint32_t u32; uint8_t u8; } rc;
6731 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6732# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6733# if RT_INLINE_ASM_GNU_STYLE
6734 __asm__ __volatile__("lock; btcl %2, %1\n\t"
6735 "setc %b0\n\t"
6736 "andl $1, %0\n\t"
6737 : "=q" (rc.u32)
6738 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6739 : "Ir" (iBit)
6740 , "m" (*(volatile long RT_FAR *)pvBitmap)
6741 : "memory"
6742 , "cc");
6743# else
6744 __asm
6745 {
6746 mov edx, [iBit]
6747# ifdef RT_ARCH_AMD64
6748 mov rax, [pvBitmap]
6749 lock btc [rax], edx
6750# else
6751 mov eax, [pvBitmap]
6752 lock btc [eax], edx
6753# endif
6754 setc al
6755 and eax, 1
6756 mov [rc.u32], eax
6757 }
6758# endif
6759
6760# else
6761 rc.u32 = RT_H2LE_U32(ASMAtomicXorExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_LE2H_U32(RT_BIT_32(iBit & 31))))
6762 >> (iBit & 31);
6763 rc.u32 &= 1;
6764# endif
6765 return rc.f;
6766}
6767#endif
6768
6769
6770/**
6771 * Tests if a bit in a bitmap is set.
6772 *
6773 * @returns true if the bit is set.
6774 * @returns false if the bit is clear.
6775 *
6776 * @param pvBitmap Pointer to the bitmap (little endian).
6777 * @param iBit The bit to test.
6778 *
6779 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6780 * However, doing so will yield better performance as well as avoiding
6781 * traps accessing the last bits in the bitmap.
6782 */
6783#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6784RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6785#else
6786DECLINLINE(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6787{
6788 union { bool f; uint32_t u32; uint8_t u8; } rc;
6789# if RT_INLINE_ASM_USES_INTRIN
6790 rc.u32 = _bittest((long *)pvBitmap, iBit);
6791
6792# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6793# if RT_INLINE_ASM_GNU_STYLE
6794
6795 __asm__ __volatile__("btl %2, %1\n\t"
6796 "setc %b0\n\t"
6797 "andl $1, %0\n\t"
6798 : "=q" (rc.u32)
6799 : "m" (*(const volatile long RT_FAR *)pvBitmap)
6800 , "Ir" (iBit)
6801 : "memory"
6802 , "cc");
6803# else
6804 __asm
6805 {
6806 mov edx, [iBit]
6807# ifdef RT_ARCH_AMD64
6808 mov rax, [pvBitmap]
6809 bt [rax], edx
6810# else
6811 mov eax, [pvBitmap]
6812 bt [eax], edx
6813# endif
6814 setc al
6815 and eax, 1
6816 mov [rc.u32], eax
6817 }
6818# endif
6819
6820# else
6821 int32_t offBitmap = iBit / 32;
6822 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6823 rc.u32 = RT_LE2H_U32(ASMAtomicUoReadU32(&((uint32_t volatile *)pvBitmap)[offBitmap])) >> (iBit & 31);
6824 rc.u32 &= 1;
6825# endif
6826 return rc.f;
6827}
6828#endif
6829
6830
6831/**
6832 * Clears a bit range within a bitmap.
6833 *
6834 * @param pvBitmap Pointer to the bitmap (little endian).
6835 * @param iBitStart The First bit to clear.
6836 * @param iBitEnd The first bit not to clear.
6837 */
6838DECLINLINE(void) ASMBitClearRange(volatile void RT_FAR *pvBitmap, size_t iBitStart, size_t iBitEnd) RT_NOTHROW_DEF
6839{
6840 if (iBitStart < iBitEnd)
6841 {
6842 uint32_t volatile RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
6843 size_t iStart = iBitStart & ~(size_t)31;
6844 size_t iEnd = iBitEnd & ~(size_t)31;
6845 if (iStart == iEnd)
6846 *pu32 &= RT_H2LE_U32(((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1));
6847 else
6848 {
6849 /* bits in first dword. */
6850 if (iBitStart & 31)
6851 {
6852 *pu32 &= RT_H2LE_U32((UINT32_C(1) << (iBitStart & 31)) - 1);
6853 pu32++;
6854 iBitStart = iStart + 32;
6855 }
6856
6857 /* whole dwords. */
6858 if (iBitStart != iEnd)
6859 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
6860
6861 /* bits in last dword. */
6862 if (iBitEnd & 31)
6863 {
6864 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
6865 *pu32 &= RT_H2LE_U32(~((UINT32_C(1) << (iBitEnd & 31)) - 1));
6866 }
6867 }
6868 }
6869}
6870
6871
6872/**
6873 * Sets a bit range within a bitmap.
6874 *
6875 * @param pvBitmap Pointer to the bitmap (little endian).
6876 * @param iBitStart The First bit to set.
6877 * @param iBitEnd The first bit not to set.
6878 */
6879DECLINLINE(void) ASMBitSetRange(volatile void RT_FAR *pvBitmap, size_t iBitStart, size_t iBitEnd) RT_NOTHROW_DEF
6880{
6881 if (iBitStart < iBitEnd)
6882 {
6883 uint32_t volatile RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
6884 size_t iStart = iBitStart & ~(size_t)31;
6885 size_t iEnd = iBitEnd & ~(size_t)31;
6886 if (iStart == iEnd)
6887 *pu32 |= RT_H2LE_U32(((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31));
6888 else
6889 {
6890 /* bits in first dword. */
6891 if (iBitStart & 31)
6892 {
6893 *pu32 |= RT_H2LE_U32(~((UINT32_C(1) << (iBitStart & 31)) - 1));
6894 pu32++;
6895 iBitStart = iStart + 32;
6896 }
6897
6898 /* whole dword. */
6899 if (iBitStart != iEnd)
6900 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
6901
6902 /* bits in last dword. */
6903 if (iBitEnd & 31)
6904 {
6905 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
6906 *pu32 |= RT_H2LE_U32((UINT32_C(1) << (iBitEnd & 31)) - 1);
6907 }
6908 }
6909 }
6910}
6911
6912
6913/**
6914 * Finds the first clear bit in a bitmap.
6915 *
6916 * @returns Index of the first zero bit.
6917 * @returns -1 if no clear bit was found.
6918 * @param pvBitmap Pointer to the bitmap (little endian).
6919 * @param cBits The number of bits in the bitmap. Multiple of 32.
6920 */
6921#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
6922DECLASM(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
6923#else
6924DECLINLINE(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
6925{
6926 if (cBits)
6927 {
6928 int32_t iBit;
6929# if RT_INLINE_ASM_GNU_STYLE
6930 RTCCUINTREG uEAX, uECX, uEDI;
6931 cBits = RT_ALIGN_32(cBits, 32);
6932 __asm__ __volatile__("repe; scasl\n\t"
6933 "je 1f\n\t"
6934# ifdef RT_ARCH_AMD64
6935 "lea -4(%%rdi), %%rdi\n\t"
6936 "xorl (%%rdi), %%eax\n\t"
6937 "subq %5, %%rdi\n\t"
6938# else
6939 "lea -4(%%edi), %%edi\n\t"
6940 "xorl (%%edi), %%eax\n\t"
6941 "subl %5, %%edi\n\t"
6942# endif
6943 "shll $3, %%edi\n\t"
6944 "bsfl %%eax, %%edx\n\t"
6945 "addl %%edi, %%edx\n\t"
6946 "1:\t\n"
6947 : "=d" (iBit)
6948 , "=&c" (uECX)
6949 , "=&D" (uEDI)
6950 , "=&a" (uEAX)
6951 : "0" (0xffffffff)
6952 , "mr" (pvBitmap)
6953 , "1" (cBits >> 5)
6954 , "2" (pvBitmap)
6955 , "3" (0xffffffff)
6956 : "cc");
6957# else
6958 cBits = RT_ALIGN_32(cBits, 32);
6959 __asm
6960 {
6961# ifdef RT_ARCH_AMD64
6962 mov rdi, [pvBitmap]
6963 mov rbx, rdi
6964# else
6965 mov edi, [pvBitmap]
6966 mov ebx, edi
6967# endif
6968 mov edx, 0ffffffffh
6969 mov eax, edx
6970 mov ecx, [cBits]
6971 shr ecx, 5
6972 repe scasd
6973 je done
6974
6975# ifdef RT_ARCH_AMD64
6976 lea rdi, [rdi - 4]
6977 xor eax, [rdi]
6978 sub rdi, rbx
6979# else
6980 lea edi, [edi - 4]
6981 xor eax, [edi]
6982 sub edi, ebx
6983# endif
6984 shl edi, 3
6985 bsf edx, eax
6986 add edx, edi
6987 done:
6988 mov [iBit], edx
6989 }
6990# endif
6991 return iBit;
6992 }
6993 return -1;
6994}
6995#endif
6996
6997
6998/**
6999 * Finds the next clear bit in a bitmap.
7000 *
7001 * @returns Index of the first zero bit.
7002 * @returns -1 if no clear bit was found.
7003 * @param pvBitmap Pointer to the bitmap (little endian).
7004 * @param cBits The number of bits in the bitmap. Multiple of 32.
7005 * @param iBitPrev The bit returned from the last search.
7006 * The search will start at iBitPrev + 1.
7007 */
7008#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
7009DECLASM(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
7010#else
7011DECLINLINE(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
7012{
7013 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
7014 int iBit = ++iBitPrev & 31;
7015 if (iBit)
7016 {
7017 /*
7018 * Inspect the 32-bit word containing the unaligned bit.
7019 */
7020 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
7021
7022# if RT_INLINE_ASM_USES_INTRIN
7023 unsigned long ulBit = 0;
7024 if (_BitScanForward(&ulBit, u32))
7025 return ulBit + iBitPrev;
7026# else
7027# if RT_INLINE_ASM_GNU_STYLE
7028 __asm__ __volatile__("bsf %1, %0\n\t"
7029 "jnz 1f\n\t"
7030 "movl $-1, %0\n\t" /** @todo use conditional move for 64-bit? */
7031 "1:\n\t"
7032 : "=r" (iBit)
7033 : "r" (u32)
7034 : "cc");
7035# else
7036 __asm
7037 {
7038 mov edx, [u32]
7039 bsf eax, edx
7040 jnz done
7041 mov eax, 0ffffffffh
7042 done:
7043 mov [iBit], eax
7044 }
7045# endif
7046 if (iBit >= 0)
7047 return iBit + (int)iBitPrev;
7048# endif
7049
7050 /*
7051 * Skip ahead and see if there is anything left to search.
7052 */
7053 iBitPrev |= 31;
7054 iBitPrev++;
7055 if (cBits <= (uint32_t)iBitPrev)
7056 return -1;
7057 }
7058
7059 /*
7060 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
7061 */
7062 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
7063 if (iBit >= 0)
7064 iBit += iBitPrev;
7065 return iBit;
7066}
7067#endif
7068
7069
7070/**
7071 * Finds the first set bit in a bitmap.
7072 *
7073 * @returns Index of the first set bit.
7074 * @returns -1 if no clear bit was found.
7075 * @param pvBitmap Pointer to the bitmap (little endian).
7076 * @param cBits The number of bits in the bitmap. Multiple of 32.
7077 */
7078#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
7079DECLASM(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
7080#else
7081DECLINLINE(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
7082{
7083 if (cBits)
7084 {
7085 int32_t iBit;
7086# if RT_INLINE_ASM_GNU_STYLE
7087 RTCCUINTREG uEAX, uECX, uEDI;
7088 cBits = RT_ALIGN_32(cBits, 32);
7089 __asm__ __volatile__("repe; scasl\n\t"
7090 "je 1f\n\t"
7091# ifdef RT_ARCH_AMD64
7092 "lea -4(%%rdi), %%rdi\n\t"
7093 "movl (%%rdi), %%eax\n\t"
7094 "subq %5, %%rdi\n\t"
7095# else
7096 "lea -4(%%edi), %%edi\n\t"
7097 "movl (%%edi), %%eax\n\t"
7098 "subl %5, %%edi\n\t"
7099# endif
7100 "shll $3, %%edi\n\t"
7101 "bsfl %%eax, %%edx\n\t"
7102 "addl %%edi, %%edx\n\t"
7103 "1:\t\n"
7104 : "=d" (iBit)
7105 , "=&c" (uECX)
7106 , "=&D" (uEDI)
7107 , "=&a" (uEAX)
7108 : "0" (0xffffffff)
7109 , "mr" (pvBitmap)
7110 , "1" (cBits >> 5)
7111 , "2" (pvBitmap)
7112 , "3" (0)
7113 : "cc");
7114# else
7115 cBits = RT_ALIGN_32(cBits, 32);
7116 __asm
7117 {
7118# ifdef RT_ARCH_AMD64
7119 mov rdi, [pvBitmap]
7120 mov rbx, rdi
7121# else
7122 mov edi, [pvBitmap]
7123 mov ebx, edi
7124# endif
7125 mov edx, 0ffffffffh
7126 xor eax, eax
7127 mov ecx, [cBits]
7128 shr ecx, 5
7129 repe scasd
7130 je done
7131# ifdef RT_ARCH_AMD64
7132 lea rdi, [rdi - 4]
7133 mov eax, [rdi]
7134 sub rdi, rbx
7135# else
7136 lea edi, [edi - 4]
7137 mov eax, [edi]
7138 sub edi, ebx
7139# endif
7140 shl edi, 3
7141 bsf edx, eax
7142 add edx, edi
7143 done:
7144 mov [iBit], edx
7145 }
7146# endif
7147 return iBit;
7148 }
7149 return -1;
7150}
7151#endif
7152
7153
7154/**
7155 * Finds the next set bit in a bitmap.
7156 *
7157 * @returns Index of the next set bit.
7158 * @returns -1 if no set bit was found.
7159 * @param pvBitmap Pointer to the bitmap (little endian).
7160 * @param cBits The number of bits in the bitmap. Multiple of 32.
7161 * @param iBitPrev The bit returned from the last search.
7162 * The search will start at iBitPrev + 1.
7163 */
7164#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
7165DECLASM(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
7166#else
7167DECLINLINE(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
7168{
7169 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
7170 int iBit = ++iBitPrev & 31;
7171 if (iBit)
7172 {
7173 /*
7174 * Inspect the 32-bit word containing the unaligned bit.
7175 */
7176 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
7177
7178# if RT_INLINE_ASM_USES_INTRIN
7179 unsigned long ulBit = 0;
7180 if (_BitScanForward(&ulBit, u32))
7181 return ulBit + iBitPrev;
7182# else
7183# if RT_INLINE_ASM_GNU_STYLE
7184 __asm__ __volatile__("bsf %1, %0\n\t"
7185 "jnz 1f\n\t" /** @todo use conditional move for 64-bit? */
7186 "movl $-1, %0\n\t"
7187 "1:\n\t"
7188 : "=r" (iBit)
7189 : "r" (u32)
7190 : "cc");
7191# else
7192 __asm
7193 {
7194 mov edx, [u32]
7195 bsf eax, edx
7196 jnz done
7197 mov eax, 0ffffffffh
7198 done:
7199 mov [iBit], eax
7200 }
7201# endif
7202 if (iBit >= 0)
7203 return iBit + (int)iBitPrev;
7204# endif
7205
7206 /*
7207 * Skip ahead and see if there is anything left to search.
7208 */
7209 iBitPrev |= 31;
7210 iBitPrev++;
7211 if (cBits <= (uint32_t)iBitPrev)
7212 return -1;
7213 }
7214
7215 /*
7216 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
7217 */
7218 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
7219 if (iBit >= 0)
7220 iBit += iBitPrev;
7221 return iBit;
7222}
7223#endif
7224
7225
7226/**
7227 * Finds the first bit which is set in the given 32-bit integer.
7228 * Bits are numbered from 1 (least significant) to 32.
7229 *
7230 * @returns index [1..32] of the first set bit.
7231 * @returns 0 if all bits are cleared.
7232 * @param u32 Integer to search for set bits.
7233 * @remarks Similar to ffs() in BSD.
7234 */
7235#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7236RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_PROTO;
7237#else
7238DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_DEF
7239{
7240# if RT_INLINE_ASM_USES_INTRIN
7241 unsigned long iBit;
7242 if (_BitScanForward(&iBit, u32))
7243 iBit++;
7244 else
7245 iBit = 0;
7246
7247# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7248# if RT_INLINE_ASM_GNU_STYLE
7249 uint32_t iBit;
7250 __asm__ __volatile__("bsf %1, %0\n\t"
7251 "jnz 1f\n\t"
7252 "xorl %0, %0\n\t"
7253 "jmp 2f\n"
7254 "1:\n\t"
7255 "incl %0\n"
7256 "2:\n\t"
7257 : "=r" (iBit)
7258 : "rm" (u32)
7259 : "cc");
7260# else
7261 uint32_t iBit;
7262 _asm
7263 {
7264 bsf eax, [u32]
7265 jnz found
7266 xor eax, eax
7267 jmp done
7268 found:
7269 inc eax
7270 done:
7271 mov [iBit], eax
7272 }
7273# endif
7274
7275# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7276 /*
7277 * Using the "count leading zeros (clz)" instruction here because there
7278 * is no dedicated instruction to get the first set bit.
7279 * Need to reverse the bits in the value with "rbit" first because
7280 * "clz" starts counting from the most significant bit.
7281 */
7282 uint32_t iBit;
7283 __asm__ __volatile__(
7284# if defined(RT_ARCH_ARM64)
7285 "rbit %w[uVal], %w[uVal]\n\t"
7286 "clz %w[iBit], %w[uVal]\n\t"
7287# else
7288 "rbit %[uVal], %[uVal]\n\t"
7289 "clz %[iBit], %[uVal]\n\t"
7290# endif
7291 : [uVal] "=r" (u32)
7292 , [iBit] "=r" (iBit)
7293 : "[uVal]" (u32));
7294 if (iBit != 32)
7295 iBit++;
7296 else
7297 iBit = 0; /* No bit set. */
7298
7299# else
7300# error "Port me"
7301# endif
7302 return iBit;
7303}
7304#endif
7305
7306
7307/**
7308 * Finds the first bit which is set in the given 32-bit integer.
7309 * Bits are numbered from 1 (least significant) to 32.
7310 *
7311 * @returns index [1..32] of the first set bit.
7312 * @returns 0 if all bits are cleared.
7313 * @param i32 Integer to search for set bits.
7314 * @remark Similar to ffs() in BSD.
7315 */
7316DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32) RT_NOTHROW_DEF
7317{
7318 return ASMBitFirstSetU32((uint32_t)i32);
7319}
7320
7321
7322/**
7323 * Finds the first bit which is set in the given 64-bit integer.
7324 *
7325 * Bits are numbered from 1 (least significant) to 64.
7326 *
7327 * @returns index [1..64] of the first set bit.
7328 * @returns 0 if all bits are cleared.
7329 * @param u64 Integer to search for set bits.
7330 * @remarks Similar to ffs() in BSD.
7331 */
7332#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7333RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_PROTO;
7334#else
7335DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_DEF
7336{
7337# if RT_INLINE_ASM_USES_INTRIN
7338 unsigned long iBit;
7339# if ARCH_BITS == 64
7340 if (_BitScanForward64(&iBit, u64))
7341 iBit++;
7342 else
7343 iBit = 0;
7344# else
7345 if (_BitScanForward(&iBit, (uint32_t)u64))
7346 iBit++;
7347 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
7348 iBit += 33;
7349 else
7350 iBit = 0;
7351# endif
7352
7353# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7354 uint64_t iBit;
7355 __asm__ __volatile__("bsfq %1, %0\n\t"
7356 "jnz 1f\n\t"
7357 "xorl %k0, %k0\n\t"
7358 "jmp 2f\n"
7359 "1:\n\t"
7360 "incl %k0\n"
7361 "2:\n\t"
7362 : "=r" (iBit)
7363 : "rm" (u64)
7364 : "cc");
7365
7366# elif defined(RT_ARCH_ARM64)
7367 uint64_t iBit;
7368 __asm__ __volatile__("rbit %[uVal], %[uVal]\n\t"
7369 "clz %[iBit], %[uVal]\n\t"
7370 : [uVal] "=r" (u64)
7371 , [iBit] "=r" (iBit)
7372 : "[uVal]" (u64));
7373 if (iBit != 64)
7374 iBit++;
7375 else
7376 iBit = 0; /* No bit set. */
7377
7378# else
7379 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
7380 if (!iBit)
7381 {
7382 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
7383 if (iBit)
7384 iBit += 32;
7385 }
7386# endif
7387 return (unsigned)iBit;
7388}
7389#endif
7390
7391
7392/**
7393 * Finds the first bit which is set in the given 16-bit integer.
7394 *
7395 * Bits are numbered from 1 (least significant) to 16.
7396 *
7397 * @returns index [1..16] of the first set bit.
7398 * @returns 0 if all bits are cleared.
7399 * @param u16 Integer to search for set bits.
7400 * @remarks For 16-bit bs3kit code.
7401 */
7402#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7403RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_PROTO;
7404#else
7405DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_DEF
7406{
7407 return ASMBitFirstSetU32((uint32_t)u16);
7408}
7409#endif
7410
7411
7412/**
7413 * Finds the last bit which is set in the given 32-bit integer.
7414 * Bits are numbered from 1 (least significant) to 32.
7415 *
7416 * @returns index [1..32] of the last set bit.
7417 * @returns 0 if all bits are cleared.
7418 * @param u32 Integer to search for set bits.
7419 * @remark Similar to fls() in BSD.
7420 */
7421#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7422RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_PROTO;
7423#else
7424DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_DEF
7425{
7426# if RT_INLINE_ASM_USES_INTRIN
7427 unsigned long iBit;
7428 if (_BitScanReverse(&iBit, u32))
7429 iBit++;
7430 else
7431 iBit = 0;
7432
7433# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7434# if RT_INLINE_ASM_GNU_STYLE
7435 uint32_t iBit;
7436 __asm__ __volatile__("bsrl %1, %0\n\t"
7437 "jnz 1f\n\t"
7438 "xorl %0, %0\n\t"
7439 "jmp 2f\n"
7440 "1:\n\t"
7441 "incl %0\n"
7442 "2:\n\t"
7443 : "=r" (iBit)
7444 : "rm" (u32)
7445 : "cc");
7446# else
7447 uint32_t iBit;
7448 _asm
7449 {
7450 bsr eax, [u32]
7451 jnz found
7452 xor eax, eax
7453 jmp done
7454 found:
7455 inc eax
7456 done:
7457 mov [iBit], eax
7458 }
7459# endif
7460
7461# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7462 uint32_t iBit;
7463 __asm__ __volatile__(
7464# if defined(RT_ARCH_ARM64)
7465 "clz %w[iBit], %w[uVal]\n\t"
7466# else
7467 "clz %[iBit], %[uVal]\n\t"
7468# endif
7469 : [iBit] "=r" (iBit)
7470 : [uVal] "r" (u32));
7471 iBit = 32 - iBit;
7472
7473# else
7474# error "Port me"
7475# endif
7476 return iBit;
7477}
7478#endif
7479
7480
7481/**
7482 * Finds the last bit which is set in the given 32-bit integer.
7483 * Bits are numbered from 1 (least significant) to 32.
7484 *
7485 * @returns index [1..32] of the last set bit.
7486 * @returns 0 if all bits are cleared.
7487 * @param i32 Integer to search for set bits.
7488 * @remark Similar to fls() in BSD.
7489 */
7490DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32) RT_NOTHROW_DEF
7491{
7492 return ASMBitLastSetU32((uint32_t)i32);
7493}
7494
7495
7496/**
7497 * Finds the last bit which is set in the given 64-bit integer.
7498 *
7499 * Bits are numbered from 1 (least significant) to 64.
7500 *
7501 * @returns index [1..64] of the last set bit.
7502 * @returns 0 if all bits are cleared.
7503 * @param u64 Integer to search for set bits.
7504 * @remark Similar to fls() in BSD.
7505 */
7506#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7507RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_PROTO;
7508#else
7509DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_DEF
7510{
7511# if RT_INLINE_ASM_USES_INTRIN
7512 unsigned long iBit;
7513# if ARCH_BITS == 64
7514 if (_BitScanReverse64(&iBit, u64))
7515 iBit++;
7516 else
7517 iBit = 0;
7518# else
7519 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
7520 iBit += 33;
7521 else if (_BitScanReverse(&iBit, (uint32_t)u64))
7522 iBit++;
7523 else
7524 iBit = 0;
7525# endif
7526
7527# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7528 uint64_t iBit;
7529 __asm__ __volatile__("bsrq %1, %0\n\t"
7530 "jnz 1f\n\t"
7531 "xorl %k0, %k0\n\t"
7532 "jmp 2f\n"
7533 "1:\n\t"
7534 "incl %k0\n"
7535 "2:\n\t"
7536 : "=r" (iBit)
7537 : "rm" (u64)
7538 : "cc");
7539
7540# elif defined(RT_ARCH_ARM64)
7541 uint64_t iBit;
7542 __asm__ __volatile__("clz %[iBit], %[uVal]\n\t"
7543 : [iBit] "=r" (iBit)
7544 : [uVal] "r" (u64));
7545 iBit = 64 - iBit;
7546
7547# else
7548 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
7549 if (iBit)
7550 iBit += 32;
7551 else
7552 iBit = ASMBitLastSetU32((uint32_t)u64);
7553# endif
7554 return (unsigned)iBit;
7555}
7556#endif
7557
7558
7559/**
7560 * Finds the last bit which is set in the given 16-bit integer.
7561 *
7562 * Bits are numbered from 1 (least significant) to 16.
7563 *
7564 * @returns index [1..16] of the last set bit.
7565 * @returns 0 if all bits are cleared.
7566 * @param u16 Integer to search for set bits.
7567 * @remarks For 16-bit bs3kit code.
7568 */
7569#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7570RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_PROTO;
7571#else
7572DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_DEF
7573{
7574 return ASMBitLastSetU32((uint32_t)u16);
7575}
7576#endif
7577
7578
7579/**
7580 * Count the number of leading zero bits in the given 32-bit integer.
7581 *
7582 * The counting starts with the most significate bit.
7583 *
7584 * @returns Number of most significant zero bits.
7585 * @returns 32 if all bits are cleared.
7586 * @param u32 Integer to consider.
7587 * @remarks Similar to __builtin_clz() in gcc, except defined zero input result.
7588 */
7589#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7590RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMCountLeadingZerosU32(uint32_t u32) RT_NOTHROW_PROTO;
7591#else
7592DECLINLINE(unsigned) ASMCountLeadingZerosU32(uint32_t u32) RT_NOTHROW_DEF
7593{
7594# if RT_INLINE_ASM_USES_INTRIN
7595 unsigned long iBit;
7596 if (!_BitScanReverse(&iBit, u32))
7597 return 32;
7598 return 31 - (unsigned)iBit;
7599
7600# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7601 uint32_t iBit;
7602# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64) && 0 /* significantly slower on 10980xe; 929 vs 237 ps/call */
7603 __asm__ __volatile__("bsrl %1, %0\n\t"
7604 "cmovzl %2, %0\n\t"
7605 : "=&r" (iBit)
7606 : "rm" (u32)
7607 , "rm" ((int32_t)-1)
7608 : "cc");
7609# elif RT_INLINE_ASM_GNU_STYLE
7610 __asm__ __volatile__("bsr %1, %0\n\t"
7611 "jnz 1f\n\t"
7612 "mov $-1, %0\n\t"
7613 "1:\n\t"
7614 : "=r" (iBit)
7615 : "rm" (u32)
7616 : "cc");
7617# else
7618 _asm
7619 {
7620 bsr eax, [u32]
7621 jnz found
7622 mov eax, -1
7623 found:
7624 mov [iBit], eax
7625 }
7626# endif
7627 return 31 - iBit;
7628
7629# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7630 uint32_t iBit;
7631 __asm__ __volatile__(
7632# if defined(RT_ARCH_ARM64)
7633 "clz %w[iBit], %w[uVal]\n\t"
7634# else
7635 "clz %[iBit], %[uVal]\n\t"
7636# endif
7637 : [uVal] "=r" (u32)
7638 , [iBit] "=r" (iBit)
7639 : "[uVal]" (u32));
7640 return iBit;
7641
7642# elif defined(__GNUC__)
7643 AssertCompile(sizeof(u32) == sizeof(unsigned int));
7644 return u32 ? __builtin_clz(u32) : 32;
7645
7646# else
7647# error "Port me"
7648# endif
7649}
7650#endif
7651
7652
7653/**
7654 * Count the number of leading zero bits in the given 64-bit integer.
7655 *
7656 * The counting starts with the most significate bit.
7657 *
7658 * @returns Number of most significant zero bits.
7659 * @returns 64 if all bits are cleared.
7660 * @param u64 Integer to consider.
7661 * @remarks Similar to __builtin_clzl() in gcc, except defined zero input
7662 * result.
7663 */
7664#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7665RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMCountLeadingZerosU64(uint64_t u64) RT_NOTHROW_PROTO;
7666#else
7667DECLINLINE(unsigned) ASMCountLeadingZerosU64(uint64_t u64) RT_NOTHROW_DEF
7668{
7669# if RT_INLINE_ASM_USES_INTRIN
7670 unsigned long iBit;
7671# if ARCH_BITS == 64
7672 if (_BitScanReverse64(&iBit, u64))
7673 return 63 - (unsigned)iBit;
7674# else
7675 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
7676 return 31 - (unsigned)iBit;
7677 if (_BitScanReverse(&iBit, (uint32_t)u64))
7678 return 63 - (unsigned)iBit;
7679# endif
7680 return 64;
7681
7682# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7683 uint64_t iBit;
7684# if 0 /* 10980xe benchmark: 932 ps/call - the slower variant */
7685 __asm__ __volatile__("bsrq %1, %0\n\t"
7686 "cmovzq %2, %0\n\t"
7687 : "=&r" (iBit)
7688 : "rm" (u64)
7689 , "rm" ((int64_t)-1)
7690 : "cc");
7691# else /* 10980xe benchmark: 262 ps/call */
7692 __asm__ __volatile__("bsrq %1, %0\n\t"
7693 "jnz 1f\n\t"
7694 "mov $-1, %0\n\t"
7695 "1:\n\t"
7696 : "=&r" (iBit)
7697 : "rm" (u64)
7698 : "cc");
7699# endif
7700 return 63 - (unsigned)iBit;
7701
7702# elif defined(RT_ARCH_ARM64)
7703 uint64_t iBit;
7704 __asm__ __volatile__("clz %[iBit], %[uVal]\n\t"
7705 : [uVal] "=r" (u64)
7706 , [iBit] "=r" (iBit)
7707 : "[uVal]" (u64));
7708 return (unsigned)iBit;
7709
7710# elif defined(__GNUC__) && ARCH_BITS == 64
7711 AssertCompile(sizeof(u64) == sizeof(unsigned long));
7712 return u64 ? __builtin_clzl(u64) : 64;
7713
7714# else
7715 unsigned iBit = ASMCountLeadingZerosU32((uint32_t)(u64 >> 32));
7716 if (iBit == 32)
7717 iBit = ASMCountLeadingZerosU32((uint32_t)u64) + 32;
7718 return iBit;
7719# endif
7720}
7721#endif
7722
7723
7724/**
7725 * Count the number of leading zero bits in the given 16-bit integer.
7726 *
7727 * The counting starts with the most significate bit.
7728 *
7729 * @returns Number of most significant zero bits.
7730 * @returns 16 if all bits are cleared.
7731 * @param u16 Integer to consider.
7732 */
7733#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7734RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMCountLeadingZerosU16(uint16_t u16) RT_NOTHROW_PROTO;
7735#else
7736DECLINLINE(unsigned) ASMCountLeadingZerosU16(uint16_t u16) RT_NOTHROW_DEF
7737{
7738# if RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)) && 0 /* slower (10980xe: 987 vs 292 ps/call) */
7739 uint16_t iBit;
7740 __asm__ __volatile__("bsrw %1, %0\n\t"
7741 "jnz 1f\n\t"
7742 "mov $-1, %0\n\t"
7743 "1:\n\t"
7744 : "=r" (iBit)
7745 : "rm" (u16)
7746 : "cc");
7747 return 15 - (int16_t)iBit;
7748# else
7749 return ASMCountLeadingZerosU32((uint32_t)u16) - 16;
7750# endif
7751}
7752#endif
7753
7754
7755/**
7756 * Count the number of trailing zero bits in the given 32-bit integer.
7757 *
7758 * The counting starts with the least significate bit, i.e. the zero bit.
7759 *
7760 * @returns Number of lest significant zero bits.
7761 * @returns 32 if all bits are cleared.
7762 * @param u32 Integer to consider.
7763 * @remarks Similar to __builtin_ctz() in gcc, except defined zero input result.
7764 */
7765#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7766RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMCountTrailingZerosU32(uint32_t u32) RT_NOTHROW_PROTO;
7767#else
7768DECLINLINE(unsigned) ASMCountTrailingZerosU32(uint32_t u32) RT_NOTHROW_DEF
7769{
7770# if RT_INLINE_ASM_USES_INTRIN
7771 unsigned long iBit;
7772 if (!_BitScanForward(&iBit, u32))
7773 return 32;
7774 return (unsigned)iBit;
7775
7776# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7777 uint32_t iBit;
7778# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64) && 0 /* significantly slower on 10980xe; 932 vs 240 ps/call */
7779 __asm__ __volatile__("bsfl %1, %0\n\t"
7780 "cmovzl %2, %0\n\t"
7781 : "=&r" (iBit)
7782 : "rm" (u32)
7783 , "rm" ((int32_t)32)
7784 : "cc");
7785# elif RT_INLINE_ASM_GNU_STYLE
7786 __asm__ __volatile__("bsfl %1, %0\n\t"
7787 "jnz 1f\n\t"
7788 "mov $32, %0\n\t"
7789 "1:\n\t"
7790 : "=r" (iBit)
7791 : "rm" (u32)
7792 : "cc");
7793# else
7794 _asm
7795 {
7796 bsf eax, [u32]
7797 jnz found
7798 mov eax, 32
7799 found:
7800 mov [iBit], eax
7801 }
7802# endif
7803 return iBit;
7804
7805# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7806 /* Invert the bits and use clz. */
7807 uint32_t iBit;
7808 __asm__ __volatile__(
7809# if defined(RT_ARCH_ARM64)
7810 "rbit %w[uVal], %w[uVal]\n\t"
7811 "clz %w[iBit], %w[uVal]\n\t"
7812# else
7813 "rbit %[uVal], %[uVal]\n\t"
7814 "clz %[iBit], %[uVal]\n\t"
7815# endif
7816 : [uVal] "=r" (u32)
7817 , [iBit] "=r" (iBit)
7818 : "[uVal]" (u32));
7819 return iBit;
7820
7821# elif defined(__GNUC__)
7822 AssertCompile(sizeof(u32) == sizeof(unsigned int));
7823 return u32 ? __builtin_ctz(u32) : 32;
7824
7825# else
7826# error "Port me"
7827# endif
7828}
7829#endif
7830
7831
7832/**
7833 * Count the number of trailing zero bits in the given 64-bit integer.
7834 *
7835 * The counting starts with the least significate bit.
7836 *
7837 * @returns Number of least significant zero bits.
7838 * @returns 64 if all bits are cleared.
7839 * @param u64 Integer to consider.
7840 * @remarks Similar to __builtin_ctzl() in gcc, except defined zero input
7841 * result.
7842 */
7843#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7844RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMCountTrailingZerosU64(uint64_t u64) RT_NOTHROW_PROTO;
7845#else
7846DECLINLINE(unsigned) ASMCountTrailingZerosU64(uint64_t u64) RT_NOTHROW_DEF
7847{
7848# if RT_INLINE_ASM_USES_INTRIN
7849 unsigned long iBit;
7850# if ARCH_BITS == 64
7851 if (_BitScanForward64(&iBit, u64))
7852 return (unsigned)iBit;
7853# else
7854 if (_BitScanForward(&iBit, (uint32_t)u64))
7855 return (unsigned)iBit;
7856 if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
7857 return (unsigned)iBit + 32;
7858# endif
7859 return 64;
7860
7861# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7862 uint64_t iBit;
7863# if 0 /* 10980xe benchmark: 932 ps/call - the slower variant */
7864 __asm__ __volatile__("bsfq %1, %0\n\t"
7865 "cmovzq %2, %0\n\t"
7866 : "=&r" (iBit)
7867 : "rm" (u64)
7868 , "rm" ((int64_t)64)
7869 : "cc");
7870# else /* 10980xe benchmark: 262 ps/call */
7871 __asm__ __volatile__("bsfq %1, %0\n\t"
7872 "jnz 1f\n\t"
7873 "mov $64, %0\n\t"
7874 "1:\n\t"
7875 : "=&r" (iBit)
7876 : "rm" (u64)
7877 : "cc");
7878# endif
7879 return (unsigned)iBit;
7880
7881# elif defined(RT_ARCH_ARM64)
7882 /* Invert the bits and use clz. */
7883 uint64_t iBit;
7884 __asm__ __volatile__("rbit %[uVal], %[uVal]\n\t"
7885 "clz %[iBit], %[uVal]\n\t"
7886 : [uVal] "=r" (u64)
7887 , [iBit] "=r" (iBit)
7888 : "[uVal]" (u64));
7889 return (unsigned)iBit;
7890
7891# elif defined(__GNUC__) && ARCH_BITS == 64
7892 AssertCompile(sizeof(u64) == sizeof(unsigned long));
7893 return u64 ? __builtin_ctzl(u64) : 64;
7894
7895# else
7896 unsigned iBit = ASMCountTrailingZerosU32((uint32_t)u64);
7897 if (iBit == 32)
7898 iBit = ASMCountTrailingZerosU32((uint32_t)(u64 >> 32)) + 32;
7899 return iBit;
7900# endif
7901}
7902#endif
7903
7904
7905/**
7906 * Count the number of trailing zero bits in the given 16-bit integer.
7907 *
7908 * The counting starts with the most significate bit.
7909 *
7910 * @returns Number of most significant zero bits.
7911 * @returns 16 if all bits are cleared.
7912 * @param u16 Integer to consider.
7913 */
7914#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7915RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMCountTrailingZerosU16(uint16_t u16) RT_NOTHROW_PROTO;
7916#else
7917DECLINLINE(unsigned) ASMCountTrailingZerosU16(uint16_t u16) RT_NOTHROW_DEF
7918{
7919# if RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)) && 0 /* slower (10980xe: 992 vs 349 ps/call) */
7920 uint16_t iBit;
7921 __asm__ __volatile__("bsfw %1, %0\n\t"
7922 "jnz 1f\n\t"
7923 "mov $16, %0\n\t"
7924 "1:\n\t"
7925 : "=r" (iBit)
7926 : "rm" (u16)
7927 : "cc");
7928 return iBit;
7929# else
7930 return ASMCountTrailingZerosU32((uint32_t)u16 | UINT32_C(0x10000));
7931#endif
7932}
7933#endif
7934
7935
7936/**
7937 * Rotate 32-bit unsigned value to the left by @a cShift.
7938 *
7939 * @returns Rotated value.
7940 * @param u32 The value to rotate.
7941 * @param cShift How many bits to rotate by.
7942 */
7943#ifdef __WATCOMC__
7944RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
7945#else
7946DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
7947{
7948# if RT_INLINE_ASM_USES_INTRIN
7949 return _rotl(u32, cShift);
7950
7951# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
7952 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc");
7953 return u32;
7954
7955# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7956 __asm__ __volatile__(
7957# if defined(RT_ARCH_ARM64)
7958 "ror %w[uRet], %w[uVal], %w[cShift]\n\t"
7959# else
7960 "ror %[uRet], %[uVal], %[cShift]\n\t"
7961# endif
7962 : [uRet] "=r" (u32)
7963 : [uVal] "[uRet]" (u32)
7964 , [cShift] "r" (32 - (cShift & 31))); /** @todo there is an immediate form here */
7965 return u32;
7966
7967# else
7968 cShift &= 31;
7969 return (u32 << cShift) | (u32 >> (32 - cShift));
7970# endif
7971}
7972#endif
7973
7974
7975/**
7976 * Rotate 32-bit unsigned value to the right by @a cShift.
7977 *
7978 * @returns Rotated value.
7979 * @param u32 The value to rotate.
7980 * @param cShift How many bits to rotate by.
7981 */
7982#ifdef __WATCOMC__
7983RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
7984#else
7985DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
7986{
7987# if RT_INLINE_ASM_USES_INTRIN
7988 return _rotr(u32, cShift);
7989
7990# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
7991 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc");
7992 return u32;
7993
7994# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7995 __asm__ __volatile__(
7996# if defined(RT_ARCH_ARM64)
7997 "ror %w[uRet], %w[uVal], %w[cShift]\n\t"
7998# else
7999 "ror %[uRet], %[uVal], %[cShift]\n\t"
8000# endif
8001 : [uRet] "=r" (u32)
8002 : [uVal] "[uRet]" (u32)
8003 , [cShift] "r" (cShift & 31)); /** @todo there is an immediate form here */
8004 return u32;
8005
8006# else
8007 cShift &= 31;
8008 return (u32 >> cShift) | (u32 << (32 - cShift));
8009# endif
8010}
8011#endif
8012
8013
8014/**
8015 * Rotate 64-bit unsigned value to the left by @a cShift.
8016 *
8017 * @returns Rotated value.
8018 * @param u64 The value to rotate.
8019 * @param cShift How many bits to rotate by.
8020 */
8021DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
8022{
8023#if RT_INLINE_ASM_USES_INTRIN
8024 return _rotl64(u64, cShift);
8025
8026#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
8027 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc");
8028 return u64;
8029
8030#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
8031 uint32_t uSpill;
8032 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
8033 "jz 1f\n\t"
8034 "xchgl %%eax, %%edx\n\t"
8035 "1:\n\t"
8036 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
8037 "jz 2f\n\t"
8038 "movl %%edx, %2\n\t" /* save the hi value in %3. */
8039 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
8040 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
8041 "2:\n\t" /* } */
8042 : "=A" (u64)
8043 , "=c" (cShift)
8044 , "=r" (uSpill)
8045 : "0" (u64)
8046 , "1" (cShift)
8047 : "cc");
8048 return u64;
8049
8050# elif defined(RT_ARCH_ARM64)
8051 __asm__ __volatile__("ror %[uRet], %[uVal], %[cShift]\n\t"
8052 : [uRet] "=r" (u64)
8053 : [uVal] "[uRet]" (u64)
8054 , [cShift] "r" ((uint64_t)(64 - (cShift & 63)))); /** @todo there is an immediate form here */
8055 return u64;
8056
8057#else
8058 cShift &= 63;
8059 return (u64 << cShift) | (u64 >> (64 - cShift));
8060#endif
8061}
8062
8063
8064/**
8065 * Rotate 64-bit unsigned value to the right by @a cShift.
8066 *
8067 * @returns Rotated value.
8068 * @param u64 The value to rotate.
8069 * @param cShift How many bits to rotate by.
8070 */
8071DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
8072{
8073#if RT_INLINE_ASM_USES_INTRIN
8074 return _rotr64(u64, cShift);
8075
8076#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
8077 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc");
8078 return u64;
8079
8080#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
8081 uint32_t uSpill;
8082 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
8083 "jz 1f\n\t"
8084 "xchgl %%eax, %%edx\n\t"
8085 "1:\n\t"
8086 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
8087 "jz 2f\n\t"
8088 "movl %%edx, %2\n\t" /* save the hi value in %3. */
8089 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
8090 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
8091 "2:\n\t" /* } */
8092 : "=A" (u64)
8093 , "=c" (cShift)
8094 , "=r" (uSpill)
8095 : "0" (u64)
8096 , "1" (cShift)
8097 : "cc");
8098 return u64;
8099
8100# elif defined(RT_ARCH_ARM64)
8101 __asm__ __volatile__("ror %[uRet], %[uVal], %[cShift]\n\t"
8102 : [uRet] "=r" (u64)
8103 : [uVal] "[uRet]" (u64)
8104 , [cShift] "r" ((uint64_t)(cShift & 63))); /** @todo there is an immediate form here */
8105 return u64;
8106
8107#else
8108 cShift &= 63;
8109 return (u64 >> cShift) | (u64 << (64 - cShift));
8110#endif
8111}
8112
8113/** @} */
8114
8115
8116/** @} */
8117
8118/*
8119 * Include #pragma aux definitions for Watcom C/C++.
8120 */
8121#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
8122# define IPRT_ASM_WATCOM_X86_16_WITH_PRAGMAS
8123# undef IPRT_INCLUDED_asm_watcom_x86_16_h
8124# include "asm-watcom-x86-16.h"
8125#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
8126# define IPRT_ASM_WATCOM_X86_32_WITH_PRAGMAS
8127# undef IPRT_INCLUDED_asm_watcom_x86_32_h
8128# include "asm-watcom-x86-32.h"
8129#endif
8130
8131#endif /* !IPRT_INCLUDED_asm_h */
8132
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette