VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 76410

Last change on this file since 76410 was 75618, checked in by vboxsync, 6 years ago

asm.h: do not use typeof for old gcc versions in ASMAtomicWriteNullPtr().
This gets rid of typecast warnings building kernel modules on old systems.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 165.5 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2017 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37/* Solaris 10 header ugliness */
38#ifdef u
39# undef u
40#endif
41
42#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
43# pragma warning(push)
44# pragma warning(disable:4668) /* Several incorrect __cplusplus uses. */
45# pragma warning(disable:4255) /* Incorrect __slwpcb prototype. */
46# include <intrin.h>
47# pragma warning(pop)
48 /* Emit the intrinsics at all optimization levels. */
49# pragma intrinsic(_ReadWriteBarrier)
50# pragma intrinsic(__cpuid)
51# pragma intrinsic(__stosd)
52# pragma intrinsic(__stosw)
53# pragma intrinsic(__stosb)
54# pragma intrinsic(_BitScanForward)
55# pragma intrinsic(_BitScanReverse)
56# pragma intrinsic(_bittest)
57# pragma intrinsic(_bittestandset)
58# pragma intrinsic(_bittestandreset)
59# pragma intrinsic(_bittestandcomplement)
60# pragma intrinsic(_byteswap_ushort)
61# pragma intrinsic(_byteswap_ulong)
62# pragma intrinsic(_interlockedbittestandset)
63# pragma intrinsic(_interlockedbittestandreset)
64# pragma intrinsic(_InterlockedAnd)
65# pragma intrinsic(_InterlockedOr)
66# pragma intrinsic(_InterlockedIncrement)
67# pragma intrinsic(_InterlockedDecrement)
68# pragma intrinsic(_InterlockedExchange)
69# pragma intrinsic(_InterlockedExchangeAdd)
70# pragma intrinsic(_InterlockedCompareExchange)
71# pragma intrinsic(_InterlockedCompareExchange64)
72# pragma intrinsic(_rotl)
73# pragma intrinsic(_rotr)
74# pragma intrinsic(_rotl64)
75# pragma intrinsic(_rotr64)
76# ifdef RT_ARCH_AMD64
77# pragma intrinsic(__stosq)
78# pragma intrinsic(_byteswap_uint64)
79# pragma intrinsic(_InterlockedExchange64)
80# pragma intrinsic(_InterlockedExchangeAdd64)
81# pragma intrinsic(_InterlockedAnd64)
82# pragma intrinsic(_InterlockedOr64)
83# pragma intrinsic(_InterlockedIncrement64)
84# pragma intrinsic(_InterlockedDecrement64)
85# endif
86#endif
87
88/*
89 * Undefine all symbols we have Watcom C/C++ #pragma aux'es for.
90 */
91#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
92# include "asm-watcom-x86-16.h"
93#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
94# include "asm-watcom-x86-32.h"
95#endif
96
97
98/** @defgroup grp_rt_asm ASM - Assembly Routines
99 * @ingroup grp_rt
100 *
101 * @remarks The difference between ordered and unordered atomic operations are that
102 * the former will complete outstanding reads and writes before continuing
103 * while the latter doesn't make any promises about the order. Ordered
104 * operations doesn't, it seems, make any 100% promise wrt to whether
105 * the operation will complete before any subsequent memory access.
106 * (please, correct if wrong.)
107 *
108 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
109 * are unordered (note the Uo).
110 *
111 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
112 * or even optimize assembler instructions away. For instance, in the following code
113 * the second rdmsr instruction is optimized away because gcc treats that instruction
114 * as deterministic:
115 *
116 * @code
117 * static inline uint64_t rdmsr_low(int idx)
118 * {
119 * uint32_t low;
120 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
121 * }
122 * ...
123 * uint32_t msr1 = rdmsr_low(1);
124 * foo(msr1);
125 * msr1 = rdmsr_low(1);
126 * bar(msr1);
127 * @endcode
128 *
129 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
130 * use the result of the first call as input parameter for bar() as well. For rdmsr this
131 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
132 * machine status information in general.
133 *
134 * @{
135 */
136
137
138/** @def RT_INLINE_ASM_GCC_4_3_X_X86
139 * Used to work around some 4.3.x register allocation issues in this version of
140 * the compiler. So far this workaround is still required for 4.4 and 4.5 but
141 * definitely not for 5.x */
142#if (RT_GNUC_PREREQ(4, 3) && !RT_GNUC_PREREQ(5, 0) && defined(__i386__))
143# define RT_INLINE_ASM_GCC_4_3_X_X86 1
144#else
145# define RT_INLINE_ASM_GCC_4_3_X_X86 0
146#endif
147
148/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
149 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
150 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
151 * mode, x86.
152 *
153 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
154 * when in PIC mode on x86.
155 */
156#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
157# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
158# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
159# elif defined(_MSC_VER) /* Visual C++ has trouble too, but it'll only tell us when C4688 is enabled. */
160# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
161# elif ( (defined(PIC) || defined(__PIC__)) \
162 && defined(RT_ARCH_X86) \
163 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
164 || defined(RT_OS_DARWIN)) )
165# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
166# else
167# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
168# endif
169#endif
170
171
172/** @def ASMReturnAddress
173 * Gets the return address of the current (or calling if you like) function or method.
174 */
175#ifdef _MSC_VER
176# ifdef __cplusplus
177extern "C"
178# endif
179void * _ReturnAddress(void);
180# pragma intrinsic(_ReturnAddress)
181# define ASMReturnAddress() _ReturnAddress()
182#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
183# define ASMReturnAddress() __builtin_return_address(0)
184#elif defined(__WATCOMC__)
185# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
186#else
187# error "Unsupported compiler."
188#endif
189
190
191/**
192 * Compiler memory barrier.
193 *
194 * Ensure that the compiler does not use any cached (register/tmp stack) memory
195 * values or any outstanding writes when returning from this function.
196 *
197 * This function must be used if non-volatile data is modified by a
198 * device or the VMM. Typical cases are port access, MMIO access,
199 * trapping instruction, etc.
200 */
201#if RT_INLINE_ASM_GNU_STYLE
202# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
203#elif RT_INLINE_ASM_USES_INTRIN
204# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
205#elif defined(__WATCOMC__)
206void ASMCompilerBarrier(void);
207#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
208DECLINLINE(void) ASMCompilerBarrier(void)
209{
210 __asm
211 {
212 }
213}
214#endif
215
216
217/** @def ASMBreakpoint
218 * Debugger Breakpoint.
219 * @deprecated Use RT_BREAKPOINT instead.
220 * @internal
221 */
222#define ASMBreakpoint() RT_BREAKPOINT()
223
224
225/**
226 * Spinloop hint for platforms that have these, empty function on the other
227 * platforms.
228 *
229 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
230 * spin locks.
231 */
232#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
233RT_ASM_DECL_PRAGMA_WATCOM(void) ASMNopPause(void);
234#else
235DECLINLINE(void) ASMNopPause(void)
236{
237# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
238# if RT_INLINE_ASM_GNU_STYLE
239 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
240# else
241 __asm {
242 _emit 0f3h
243 _emit 090h
244 }
245# endif
246# else
247 /* dummy */
248# endif
249}
250#endif
251
252
253/**
254 * Atomically Exchange an unsigned 8-bit value, ordered.
255 *
256 * @returns Current *pu8 value
257 * @param pu8 Pointer to the 8-bit variable to update.
258 * @param u8 The 8-bit value to assign to *pu8.
259 */
260#if RT_INLINE_ASM_EXTERNAL
261RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8);
262#else
263DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8)
264{
265# if RT_INLINE_ASM_GNU_STYLE
266 __asm__ __volatile__("xchgb %0, %1\n\t"
267 : "=m" (*pu8),
268 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
269 : "1" (u8),
270 "m" (*pu8));
271# else
272 __asm
273 {
274# ifdef RT_ARCH_AMD64
275 mov rdx, [pu8]
276 mov al, [u8]
277 xchg [rdx], al
278 mov [u8], al
279# else
280 mov edx, [pu8]
281 mov al, [u8]
282 xchg [edx], al
283 mov [u8], al
284# endif
285 }
286# endif
287 return u8;
288}
289#endif
290
291
292/**
293 * Atomically Exchange a signed 8-bit value, ordered.
294 *
295 * @returns Current *pu8 value
296 * @param pi8 Pointer to the 8-bit variable to update.
297 * @param i8 The 8-bit value to assign to *pi8.
298 */
299DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t RT_FAR *pi8, int8_t i8)
300{
301 return (int8_t)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8);
302}
303
304
305/**
306 * Atomically Exchange a bool value, ordered.
307 *
308 * @returns Current *pf value
309 * @param pf Pointer to the 8-bit variable to update.
310 * @param f The 8-bit value to assign to *pi8.
311 */
312DECLINLINE(bool) ASMAtomicXchgBool(volatile bool RT_FAR *pf, bool f)
313{
314#ifdef _MSC_VER
315 return !!ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
316#else
317 return (bool)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
318#endif
319}
320
321
322/**
323 * Atomically Exchange an unsigned 16-bit value, ordered.
324 *
325 * @returns Current *pu16 value
326 * @param pu16 Pointer to the 16-bit variable to update.
327 * @param u16 The 16-bit value to assign to *pu16.
328 */
329#if RT_INLINE_ASM_EXTERNAL
330RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16);
331#else
332DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16)
333{
334# if RT_INLINE_ASM_GNU_STYLE
335 __asm__ __volatile__("xchgw %0, %1\n\t"
336 : "=m" (*pu16),
337 "=r" (u16)
338 : "1" (u16),
339 "m" (*pu16));
340# else
341 __asm
342 {
343# ifdef RT_ARCH_AMD64
344 mov rdx, [pu16]
345 mov ax, [u16]
346 xchg [rdx], ax
347 mov [u16], ax
348# else
349 mov edx, [pu16]
350 mov ax, [u16]
351 xchg [edx], ax
352 mov [u16], ax
353# endif
354 }
355# endif
356 return u16;
357}
358#endif
359
360
361/**
362 * Atomically Exchange a signed 16-bit value, ordered.
363 *
364 * @returns Current *pu16 value
365 * @param pi16 Pointer to the 16-bit variable to update.
366 * @param i16 The 16-bit value to assign to *pi16.
367 */
368DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t RT_FAR *pi16, int16_t i16)
369{
370 return (int16_t)ASMAtomicXchgU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16);
371}
372
373
374/**
375 * Atomically Exchange an unsigned 32-bit value, ordered.
376 *
377 * @returns Current *pu32 value
378 * @param pu32 Pointer to the 32-bit variable to update.
379 * @param u32 The 32-bit value to assign to *pu32.
380 *
381 * @remarks Does not work on 286 and earlier.
382 */
383#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
384RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32);
385#else
386DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32)
387{
388# if RT_INLINE_ASM_GNU_STYLE
389 __asm__ __volatile__("xchgl %0, %1\n\t"
390 : "=m" (*pu32),
391 "=r" (u32)
392 : "1" (u32),
393 "m" (*pu32));
394
395# elif RT_INLINE_ASM_USES_INTRIN
396 u32 = _InterlockedExchange((long RT_FAR *)pu32, u32);
397
398# else
399 __asm
400 {
401# ifdef RT_ARCH_AMD64
402 mov rdx, [pu32]
403 mov eax, u32
404 xchg [rdx], eax
405 mov [u32], eax
406# else
407 mov edx, [pu32]
408 mov eax, u32
409 xchg [edx], eax
410 mov [u32], eax
411# endif
412 }
413# endif
414 return u32;
415}
416#endif
417
418
419/**
420 * Atomically Exchange a signed 32-bit value, ordered.
421 *
422 * @returns Current *pu32 value
423 * @param pi32 Pointer to the 32-bit variable to update.
424 * @param i32 The 32-bit value to assign to *pi32.
425 */
426DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t RT_FAR *pi32, int32_t i32)
427{
428 return (int32_t)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32);
429}
430
431
432/**
433 * Atomically Exchange an unsigned 64-bit value, ordered.
434 *
435 * @returns Current *pu64 value
436 * @param pu64 Pointer to the 64-bit variable to update.
437 * @param u64 The 64-bit value to assign to *pu64.
438 *
439 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
440 */
441#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
442 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
443RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64);
444#else
445DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64)
446{
447# if defined(RT_ARCH_AMD64)
448# if RT_INLINE_ASM_USES_INTRIN
449 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
450
451# elif RT_INLINE_ASM_GNU_STYLE
452 __asm__ __volatile__("xchgq %0, %1\n\t"
453 : "=m" (*pu64),
454 "=r" (u64)
455 : "1" (u64),
456 "m" (*pu64));
457# else
458 __asm
459 {
460 mov rdx, [pu64]
461 mov rax, [u64]
462 xchg [rdx], rax
463 mov [u64], rax
464 }
465# endif
466# else /* !RT_ARCH_AMD64 */
467# if RT_INLINE_ASM_GNU_STYLE
468# if defined(PIC) || defined(__PIC__)
469 uint32_t u32EBX = (uint32_t)u64;
470 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
471 "xchgl %%ebx, %3\n\t"
472 "1:\n\t"
473 "lock; cmpxchg8b (%5)\n\t"
474 "jnz 1b\n\t"
475 "movl %3, %%ebx\n\t"
476 /*"xchgl %%esi, %5\n\t"*/
477 : "=A" (u64),
478 "=m" (*pu64)
479 : "0" (*pu64),
480 "m" ( u32EBX ),
481 "c" ( (uint32_t)(u64 >> 32) ),
482 "S" (pu64));
483# else /* !PIC */
484 __asm__ __volatile__("1:\n\t"
485 "lock; cmpxchg8b %1\n\t"
486 "jnz 1b\n\t"
487 : "=A" (u64),
488 "=m" (*pu64)
489 : "0" (*pu64),
490 "b" ( (uint32_t)u64 ),
491 "c" ( (uint32_t)(u64 >> 32) ));
492# endif
493# else
494 __asm
495 {
496 mov ebx, dword ptr [u64]
497 mov ecx, dword ptr [u64 + 4]
498 mov edi, pu64
499 mov eax, dword ptr [edi]
500 mov edx, dword ptr [edi + 4]
501 retry:
502 lock cmpxchg8b [edi]
503 jnz retry
504 mov dword ptr [u64], eax
505 mov dword ptr [u64 + 4], edx
506 }
507# endif
508# endif /* !RT_ARCH_AMD64 */
509 return u64;
510}
511#endif
512
513
514/**
515 * Atomically Exchange an signed 64-bit value, ordered.
516 *
517 * @returns Current *pi64 value
518 * @param pi64 Pointer to the 64-bit variable to update.
519 * @param i64 The 64-bit value to assign to *pi64.
520 */
521DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t RT_FAR *pi64, int64_t i64)
522{
523 return (int64_t)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64);
524}
525
526
527/**
528 * Atomically Exchange a size_t value, ordered.
529 *
530 * @returns Current *ppv value
531 * @param puDst Pointer to the size_t variable to update.
532 * @param uNew The new value to assign to *puDst.
533 */
534DECLINLINE(size_t) ASMAtomicXchgZ(size_t volatile RT_FAR *puDst, const size_t uNew)
535{
536#if ARCH_BITS == 16
537 AssertCompile(sizeof(size_t) == 2);
538 return ASMAtomicXchgU16((volatile uint16_t RT_FAR *)puDst, uNew);
539#elif ARCH_BITS == 32
540 return ASMAtomicXchgU32((volatile uint32_t RT_FAR *)puDst, uNew);
541#elif ARCH_BITS == 64
542 return ASMAtomicXchgU64((volatile uint64_t RT_FAR *)puDst, uNew);
543#else
544# error "ARCH_BITS is bogus"
545#endif
546}
547
548
549/**
550 * Atomically Exchange a pointer value, ordered.
551 *
552 * @returns Current *ppv value
553 * @param ppv Pointer to the pointer variable to update.
554 * @param pv The pointer value to assign to *ppv.
555 */
556DECLINLINE(void RT_FAR *) ASMAtomicXchgPtr(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pv)
557{
558#if ARCH_BITS == 32 || ARCH_BITS == 16
559 return (void RT_FAR *)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
560#elif ARCH_BITS == 64
561 return (void RT_FAR *)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
562#else
563# error "ARCH_BITS is bogus"
564#endif
565}
566
567
568/**
569 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
570 *
571 * @returns Current *pv value
572 * @param ppv Pointer to the pointer variable to update.
573 * @param pv The pointer value to assign to *ppv.
574 * @param Type The type of *ppv, sans volatile.
575 */
576#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
577# define ASMAtomicXchgPtrT(ppv, pv, Type) \
578 __extension__ \
579 ({\
580 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
581 Type const pvTypeChecked = (pv); \
582 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
583 pvTypeCheckedRet; \
584 })
585#else
586# define ASMAtomicXchgPtrT(ppv, pv, Type) \
587 (Type)ASMAtomicXchgPtr((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv))
588#endif
589
590
591/**
592 * Atomically Exchange a raw-mode context pointer value, ordered.
593 *
594 * @returns Current *ppv value
595 * @param ppvRC Pointer to the pointer variable to update.
596 * @param pvRC The pointer value to assign to *ppv.
597 */
598DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile RT_FAR *ppvRC, RTRCPTR pvRC)
599{
600 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(void RT_FAR *)ppvRC, (uint32_t)pvRC);
601}
602
603
604/**
605 * Atomically Exchange a ring-0 pointer value, ordered.
606 *
607 * @returns Current *ppv value
608 * @param ppvR0 Pointer to the pointer variable to update.
609 * @param pvR0 The pointer value to assign to *ppv.
610 */
611DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile RT_FAR *ppvR0, RTR0PTR pvR0)
612{
613#if R0_ARCH_BITS == 32 || ARCH_BITS == 16
614 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR0, (uint32_t)pvR0);
615#elif R0_ARCH_BITS == 64
616 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR0, (uint64_t)pvR0);
617#else
618# error "R0_ARCH_BITS is bogus"
619#endif
620}
621
622
623/**
624 * Atomically Exchange a ring-3 pointer value, ordered.
625 *
626 * @returns Current *ppv value
627 * @param ppvR3 Pointer to the pointer variable to update.
628 * @param pvR3 The pointer value to assign to *ppv.
629 */
630DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile RT_FAR *ppvR3, RTR3PTR pvR3)
631{
632#if R3_ARCH_BITS == 32 || ARCH_BITS == 16
633 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR3, (uint32_t)pvR3);
634#elif R3_ARCH_BITS == 64
635 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR3, (uint64_t)pvR3);
636#else
637# error "R3_ARCH_BITS is bogus"
638#endif
639}
640
641
642/** @def ASMAtomicXchgHandle
643 * Atomically Exchange a typical IPRT handle value, ordered.
644 *
645 * @param ph Pointer to the value to update.
646 * @param hNew The new value to assigned to *pu.
647 * @param phRes Where to store the current *ph value.
648 *
649 * @remarks This doesn't currently work for all handles (like RTFILE).
650 */
651#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
652# define ASMAtomicXchgHandle(ph, hNew, phRes) \
653 do { \
654 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
655 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
656 *(uint32_t RT_FAR *)(phRes) = ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
657 } while (0)
658#elif HC_ARCH_BITS == 64
659# define ASMAtomicXchgHandle(ph, hNew, phRes) \
660 do { \
661 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
662 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
663 *(uint64_t RT_FAR *)(phRes) = ASMAtomicXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
664 } while (0)
665#else
666# error HC_ARCH_BITS
667#endif
668
669
670/**
671 * Atomically Exchange a value which size might differ
672 * between platforms or compilers, ordered.
673 *
674 * @param pu Pointer to the variable to update.
675 * @param uNew The value to assign to *pu.
676 * @todo This is busted as its missing the result argument.
677 */
678#define ASMAtomicXchgSize(pu, uNew) \
679 do { \
680 switch (sizeof(*(pu))) { \
681 case 1: ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
682 case 2: ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
683 case 4: ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
684 case 8: ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
685 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
686 } \
687 } while (0)
688
689/**
690 * Atomically Exchange a value which size might differ
691 * between platforms or compilers, ordered.
692 *
693 * @param pu Pointer to the variable to update.
694 * @param uNew The value to assign to *pu.
695 * @param puRes Where to store the current *pu value.
696 */
697#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
698 do { \
699 switch (sizeof(*(pu))) { \
700 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
701 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
702 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
703 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
704 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
705 } \
706 } while (0)
707
708
709
710/**
711 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
712 *
713 * @returns true if xchg was done.
714 * @returns false if xchg wasn't done.
715 *
716 * @param pu8 Pointer to the value to update.
717 * @param u8New The new value to assigned to *pu8.
718 * @param u8Old The old value to *pu8 compare with.
719 *
720 * @remarks x86: Requires a 486 or later.
721 */
722#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
723RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old);
724#else
725DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, uint8_t u8Old)
726{
727 uint8_t u8Ret;
728 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
729 "setz %1\n\t"
730 : "=m" (*pu8),
731 "=qm" (u8Ret),
732 "=a" (u8Old)
733 : "q" (u8New),
734 "2" (u8Old),
735 "m" (*pu8));
736 return (bool)u8Ret;
737}
738#endif
739
740
741/**
742 * Atomically Compare and Exchange a signed 8-bit value, ordered.
743 *
744 * @returns true if xchg was done.
745 * @returns false if xchg wasn't done.
746 *
747 * @param pi8 Pointer to the value to update.
748 * @param i8New The new value to assigned to *pi8.
749 * @param i8Old The old value to *pi8 compare with.
750 *
751 * @remarks x86: Requires a 486 or later.
752 */
753DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old)
754{
755 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8New, (uint8_t)i8Old);
756}
757
758
759/**
760 * Atomically Compare and Exchange a bool value, ordered.
761 *
762 * @returns true if xchg was done.
763 * @returns false if xchg wasn't done.
764 *
765 * @param pf Pointer to the value to update.
766 * @param fNew The new value to assigned to *pf.
767 * @param fOld The old value to *pf compare with.
768 *
769 * @remarks x86: Requires a 486 or later.
770 */
771DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool RT_FAR *pf, const bool fNew, const bool fOld)
772{
773 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)fNew, (uint8_t)fOld);
774}
775
776
777/**
778 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
779 *
780 * @returns true if xchg was done.
781 * @returns false if xchg wasn't done.
782 *
783 * @param pu32 Pointer to the value to update.
784 * @param u32New The new value to assigned to *pu32.
785 * @param u32Old The old value to *pu32 compare with.
786 *
787 * @remarks x86: Requires a 486 or later.
788 */
789#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
790RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old);
791#else
792DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, uint32_t u32Old)
793{
794# if RT_INLINE_ASM_GNU_STYLE
795 uint8_t u8Ret;
796 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
797 "setz %1\n\t"
798 : "=m" (*pu32),
799 "=qm" (u8Ret),
800 "=a" (u32Old)
801 : "r" (u32New),
802 "2" (u32Old),
803 "m" (*pu32));
804 return (bool)u8Ret;
805
806# elif RT_INLINE_ASM_USES_INTRIN
807 return (uint32_t)_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old) == u32Old;
808
809# else
810 uint32_t u32Ret;
811 __asm
812 {
813# ifdef RT_ARCH_AMD64
814 mov rdx, [pu32]
815# else
816 mov edx, [pu32]
817# endif
818 mov eax, [u32Old]
819 mov ecx, [u32New]
820# ifdef RT_ARCH_AMD64
821 lock cmpxchg [rdx], ecx
822# else
823 lock cmpxchg [edx], ecx
824# endif
825 setz al
826 movzx eax, al
827 mov [u32Ret], eax
828 }
829 return !!u32Ret;
830# endif
831}
832#endif
833
834
835/**
836 * Atomically Compare and Exchange a signed 32-bit value, ordered.
837 *
838 * @returns true if xchg was done.
839 * @returns false if xchg wasn't done.
840 *
841 * @param pi32 Pointer to the value to update.
842 * @param i32New The new value to assigned to *pi32.
843 * @param i32Old The old value to *pi32 compare with.
844 *
845 * @remarks x86: Requires a 486 or later.
846 */
847DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old)
848{
849 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
850}
851
852
853/**
854 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
855 *
856 * @returns true if xchg was done.
857 * @returns false if xchg wasn't done.
858 *
859 * @param pu64 Pointer to the 64-bit variable to update.
860 * @param u64New The 64-bit value to assign to *pu64.
861 * @param u64Old The value to compare with.
862 *
863 * @remarks x86: Requires a Pentium or later.
864 */
865#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
866 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
867RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old);
868#else
869DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64New, uint64_t u64Old)
870{
871# if RT_INLINE_ASM_USES_INTRIN
872 return (uint64_t)_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old) == u64Old;
873
874# elif defined(RT_ARCH_AMD64)
875# if RT_INLINE_ASM_GNU_STYLE
876 uint8_t u8Ret;
877 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
878 "setz %1\n\t"
879 : "=m" (*pu64),
880 "=qm" (u8Ret),
881 "=a" (u64Old)
882 : "r" (u64New),
883 "2" (u64Old),
884 "m" (*pu64));
885 return (bool)u8Ret;
886# else
887 bool fRet;
888 __asm
889 {
890 mov rdx, [pu32]
891 mov rax, [u64Old]
892 mov rcx, [u64New]
893 lock cmpxchg [rdx], rcx
894 setz al
895 mov [fRet], al
896 }
897 return fRet;
898# endif
899# else /* !RT_ARCH_AMD64 */
900 uint32_t u32Ret;
901# if RT_INLINE_ASM_GNU_STYLE
902# if defined(PIC) || defined(__PIC__)
903 uint32_t u32EBX = (uint32_t)u64New;
904 uint32_t u32Spill;
905 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
906 "lock; cmpxchg8b (%6)\n\t"
907 "setz %%al\n\t"
908 "movl %4, %%ebx\n\t"
909 "movzbl %%al, %%eax\n\t"
910 : "=a" (u32Ret),
911 "=d" (u32Spill),
912# if RT_GNUC_PREREQ(4, 3)
913 "+m" (*pu64)
914# else
915 "=m" (*pu64)
916# endif
917 : "A" (u64Old),
918 "m" ( u32EBX ),
919 "c" ( (uint32_t)(u64New >> 32) ),
920 "S" (pu64));
921# else /* !PIC */
922 uint32_t u32Spill;
923 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
924 "setz %%al\n\t"
925 "movzbl %%al, %%eax\n\t"
926 : "=a" (u32Ret),
927 "=d" (u32Spill),
928 "+m" (*pu64)
929 : "A" (u64Old),
930 "b" ( (uint32_t)u64New ),
931 "c" ( (uint32_t)(u64New >> 32) ));
932# endif
933 return (bool)u32Ret;
934# else
935 __asm
936 {
937 mov ebx, dword ptr [u64New]
938 mov ecx, dword ptr [u64New + 4]
939 mov edi, [pu64]
940 mov eax, dword ptr [u64Old]
941 mov edx, dword ptr [u64Old + 4]
942 lock cmpxchg8b [edi]
943 setz al
944 movzx eax, al
945 mov dword ptr [u32Ret], eax
946 }
947 return !!u32Ret;
948# endif
949# endif /* !RT_ARCH_AMD64 */
950}
951#endif
952
953
954/**
955 * Atomically Compare and exchange a signed 64-bit value, ordered.
956 *
957 * @returns true if xchg was done.
958 * @returns false if xchg wasn't done.
959 *
960 * @param pi64 Pointer to the 64-bit variable to update.
961 * @param i64 The 64-bit value to assign to *pu64.
962 * @param i64Old The value to compare with.
963 *
964 * @remarks x86: Requires a Pentium or later.
965 */
966DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old)
967{
968 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old);
969}
970
971
972/**
973 * Atomically Compare and Exchange a pointer value, ordered.
974 *
975 * @returns true if xchg was done.
976 * @returns false if xchg wasn't done.
977 *
978 * @param ppv Pointer to the value to update.
979 * @param pvNew The new value to assigned to *ppv.
980 * @param pvOld The old value to *ppv compare with.
981 *
982 * @remarks x86: Requires a 486 or later.
983 */
984DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld)
985{
986#if ARCH_BITS == 32 || ARCH_BITS == 16
987 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
988#elif ARCH_BITS == 64
989 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
990#else
991# error "ARCH_BITS is bogus"
992#endif
993}
994
995
996/**
997 * Atomically Compare and Exchange a pointer value, ordered.
998 *
999 * @returns true if xchg was done.
1000 * @returns false if xchg wasn't done.
1001 *
1002 * @param ppv Pointer to the value to update.
1003 * @param pvNew The new value to assigned to *ppv.
1004 * @param pvOld The old value to *ppv compare with.
1005 *
1006 * @remarks This is relatively type safe on GCC platforms.
1007 * @remarks x86: Requires a 486 or later.
1008 */
1009#ifdef __GNUC__
1010# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1011 __extension__ \
1012 ({\
1013 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1014 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1015 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1016 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
1017 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
1018 fMacroRet; \
1019 })
1020#else
1021# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1022 ASMAtomicCmpXchgPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld))
1023#endif
1024
1025
1026/** @def ASMAtomicCmpXchgHandle
1027 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1028 *
1029 * @param ph Pointer to the value to update.
1030 * @param hNew The new value to assigned to *pu.
1031 * @param hOld The old value to *pu compare with.
1032 * @param fRc Where to store the result.
1033 *
1034 * @remarks This doesn't currently work for all handles (like RTFILE).
1035 * @remarks x86: Requires a 486 or later.
1036 */
1037#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1038# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1039 do { \
1040 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1041 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1042 } while (0)
1043#elif HC_ARCH_BITS == 64
1044# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1045 do { \
1046 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1047 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1048 } while (0)
1049#else
1050# error HC_ARCH_BITS
1051#endif
1052
1053
1054/** @def ASMAtomicCmpXchgSize
1055 * Atomically Compare and Exchange a value which size might differ
1056 * between platforms or compilers, ordered.
1057 *
1058 * @param pu Pointer to the value to update.
1059 * @param uNew The new value to assigned to *pu.
1060 * @param uOld The old value to *pu compare with.
1061 * @param fRc Where to store the result.
1062 *
1063 * @remarks x86: Requires a 486 or later.
1064 */
1065#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1066 do { \
1067 switch (sizeof(*(pu))) { \
1068 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1069 break; \
1070 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1071 break; \
1072 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1073 (fRc) = false; \
1074 break; \
1075 } \
1076 } while (0)
1077
1078
1079/**
1080 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1081 * passes back old value, ordered.
1082 *
1083 * @returns true if xchg was done.
1084 * @returns false if xchg wasn't done.
1085 *
1086 * @param pu32 Pointer to the value to update.
1087 * @param u32New The new value to assigned to *pu32.
1088 * @param u32Old The old value to *pu32 compare with.
1089 * @param pu32Old Pointer store the old value at.
1090 *
1091 * @remarks x86: Requires a 486 or later.
1092 */
1093#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1094RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old);
1095#else
1096DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old)
1097{
1098# if RT_INLINE_ASM_GNU_STYLE
1099 uint8_t u8Ret;
1100 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1101 "setz %1\n\t"
1102 : "=m" (*pu32),
1103 "=qm" (u8Ret),
1104 "=a" (*pu32Old)
1105 : "r" (u32New),
1106 "a" (u32Old),
1107 "m" (*pu32));
1108 return (bool)u8Ret;
1109
1110# elif RT_INLINE_ASM_USES_INTRIN
1111 return (*pu32Old =_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old)) == u32Old;
1112
1113# else
1114 uint32_t u32Ret;
1115 __asm
1116 {
1117# ifdef RT_ARCH_AMD64
1118 mov rdx, [pu32]
1119# else
1120 mov edx, [pu32]
1121# endif
1122 mov eax, [u32Old]
1123 mov ecx, [u32New]
1124# ifdef RT_ARCH_AMD64
1125 lock cmpxchg [rdx], ecx
1126 mov rdx, [pu32Old]
1127 mov [rdx], eax
1128# else
1129 lock cmpxchg [edx], ecx
1130 mov edx, [pu32Old]
1131 mov [edx], eax
1132# endif
1133 setz al
1134 movzx eax, al
1135 mov [u32Ret], eax
1136 }
1137 return !!u32Ret;
1138# endif
1139}
1140#endif
1141
1142
1143/**
1144 * Atomically Compare and Exchange a signed 32-bit value, additionally
1145 * passes back old value, ordered.
1146 *
1147 * @returns true if xchg was done.
1148 * @returns false if xchg wasn't done.
1149 *
1150 * @param pi32 Pointer to the value to update.
1151 * @param i32New The new value to assigned to *pi32.
1152 * @param i32Old The old value to *pi32 compare with.
1153 * @param pi32Old Pointer store the old value at.
1154 *
1155 * @remarks x86: Requires a 486 or later.
1156 */
1157DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old, int32_t RT_FAR *pi32Old)
1158{
1159 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t RT_FAR *)pi32Old);
1160}
1161
1162
1163/**
1164 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1165 * passing back old value, ordered.
1166 *
1167 * @returns true if xchg was done.
1168 * @returns false if xchg wasn't done.
1169 *
1170 * @param pu64 Pointer to the 64-bit variable to update.
1171 * @param u64New The 64-bit value to assign to *pu64.
1172 * @param u64Old The value to compare with.
1173 * @param pu64Old Pointer store the old value at.
1174 *
1175 * @remarks x86: Requires a Pentium or later.
1176 */
1177#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1178 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1179RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old);
1180#else
1181DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old)
1182{
1183# if RT_INLINE_ASM_USES_INTRIN
1184 return (*pu64Old =_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old)) == u64Old;
1185
1186# elif defined(RT_ARCH_AMD64)
1187# if RT_INLINE_ASM_GNU_STYLE
1188 uint8_t u8Ret;
1189 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1190 "setz %1\n\t"
1191 : "=m" (*pu64),
1192 "=qm" (u8Ret),
1193 "=a" (*pu64Old)
1194 : "r" (u64New),
1195 "a" (u64Old),
1196 "m" (*pu64));
1197 return (bool)u8Ret;
1198# else
1199 bool fRet;
1200 __asm
1201 {
1202 mov rdx, [pu32]
1203 mov rax, [u64Old]
1204 mov rcx, [u64New]
1205 lock cmpxchg [rdx], rcx
1206 mov rdx, [pu64Old]
1207 mov [rdx], rax
1208 setz al
1209 mov [fRet], al
1210 }
1211 return fRet;
1212# endif
1213# else /* !RT_ARCH_AMD64 */
1214# if RT_INLINE_ASM_GNU_STYLE
1215 uint64_t u64Ret;
1216# if defined(PIC) || defined(__PIC__)
1217 /* NB: this code uses a memory clobber description, because the clean
1218 * solution with an output value for *pu64 makes gcc run out of registers.
1219 * This will cause suboptimal code, and anyone with a better solution is
1220 * welcome to improve this. */
1221 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1222 "lock; cmpxchg8b %3\n\t"
1223 "xchgl %%ebx, %1\n\t"
1224 : "=A" (u64Ret)
1225 : "DS" ((uint32_t)u64New),
1226 "c" ((uint32_t)(u64New >> 32)),
1227 "m" (*pu64),
1228 "0" (u64Old)
1229 : "memory" );
1230# else /* !PIC */
1231 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1232 : "=A" (u64Ret),
1233 "=m" (*pu64)
1234 : "b" ((uint32_t)u64New),
1235 "c" ((uint32_t)(u64New >> 32)),
1236 "m" (*pu64),
1237 "0" (u64Old));
1238# endif
1239 *pu64Old = u64Ret;
1240 return u64Ret == u64Old;
1241# else
1242 uint32_t u32Ret;
1243 __asm
1244 {
1245 mov ebx, dword ptr [u64New]
1246 mov ecx, dword ptr [u64New + 4]
1247 mov edi, [pu64]
1248 mov eax, dword ptr [u64Old]
1249 mov edx, dword ptr [u64Old + 4]
1250 lock cmpxchg8b [edi]
1251 mov ebx, [pu64Old]
1252 mov [ebx], eax
1253 setz al
1254 movzx eax, al
1255 add ebx, 4
1256 mov [ebx], edx
1257 mov dword ptr [u32Ret], eax
1258 }
1259 return !!u32Ret;
1260# endif
1261# endif /* !RT_ARCH_AMD64 */
1262}
1263#endif
1264
1265
1266/**
1267 * Atomically Compare and exchange a signed 64-bit value, additionally
1268 * passing back old value, ordered.
1269 *
1270 * @returns true if xchg was done.
1271 * @returns false if xchg wasn't done.
1272 *
1273 * @param pi64 Pointer to the 64-bit variable to update.
1274 * @param i64 The 64-bit value to assign to *pu64.
1275 * @param i64Old The value to compare with.
1276 * @param pi64Old Pointer store the old value at.
1277 *
1278 * @remarks x86: Requires a Pentium or later.
1279 */
1280DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old, int64_t RT_FAR *pi64Old)
1281{
1282 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t RT_FAR *)pi64Old);
1283}
1284
1285/** @def ASMAtomicCmpXchgExHandle
1286 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1287 *
1288 * @param ph Pointer to the value to update.
1289 * @param hNew The new value to assigned to *pu.
1290 * @param hOld The old value to *pu compare with.
1291 * @param fRc Where to store the result.
1292 * @param phOldVal Pointer to where to store the old value.
1293 *
1294 * @remarks This doesn't currently work for all handles (like RTFILE).
1295 */
1296#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1297# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1298 do { \
1299 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1300 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1301 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(puOldVal)); \
1302 } while (0)
1303#elif HC_ARCH_BITS == 64
1304# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1305 do { \
1306 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1307 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1308 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(puOldVal)); \
1309 } while (0)
1310#else
1311# error HC_ARCH_BITS
1312#endif
1313
1314
1315/** @def ASMAtomicCmpXchgExSize
1316 * Atomically Compare and Exchange a value which size might differ
1317 * between platforms or compilers. Additionally passes back old value.
1318 *
1319 * @param pu Pointer to the value to update.
1320 * @param uNew The new value to assigned to *pu.
1321 * @param uOld The old value to *pu compare with.
1322 * @param fRc Where to store the result.
1323 * @param puOldVal Pointer to where to store the old value.
1324 *
1325 * @remarks x86: Requires a 486 or later.
1326 */
1327#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1328 do { \
1329 switch (sizeof(*(pu))) { \
1330 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(uOldVal)); \
1331 break; \
1332 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(uOldVal)); \
1333 break; \
1334 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1335 (fRc) = false; \
1336 (uOldVal) = 0; \
1337 break; \
1338 } \
1339 } while (0)
1340
1341
1342/**
1343 * Atomically Compare and Exchange a pointer value, additionally
1344 * passing back old value, ordered.
1345 *
1346 * @returns true if xchg was done.
1347 * @returns false if xchg wasn't done.
1348 *
1349 * @param ppv Pointer to the value to update.
1350 * @param pvNew The new value to assigned to *ppv.
1351 * @param pvOld The old value to *ppv compare with.
1352 * @param ppvOld Pointer store the old value at.
1353 *
1354 * @remarks x86: Requires a 486 or later.
1355 */
1356DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld,
1357 void RT_FAR * RT_FAR *ppvOld)
1358{
1359#if ARCH_BITS == 32 || ARCH_BITS == 16
1360 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t RT_FAR *)ppvOld);
1361#elif ARCH_BITS == 64
1362 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t RT_FAR *)ppvOld);
1363#else
1364# error "ARCH_BITS is bogus"
1365#endif
1366}
1367
1368
1369/**
1370 * Atomically Compare and Exchange a pointer value, additionally
1371 * passing back old value, ordered.
1372 *
1373 * @returns true if xchg was done.
1374 * @returns false if xchg wasn't done.
1375 *
1376 * @param ppv Pointer to the value to update.
1377 * @param pvNew The new value to assigned to *ppv.
1378 * @param pvOld The old value to *ppv compare with.
1379 * @param ppvOld Pointer store the old value at.
1380 *
1381 * @remarks This is relatively type safe on GCC platforms.
1382 * @remarks x86: Requires a 486 or later.
1383 */
1384#ifdef __GNUC__
1385# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1386 __extension__ \
1387 ({\
1388 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1389 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1390 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1391 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1392 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1393 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1394 (void **)ppvOldTypeChecked); \
1395 fMacroRet; \
1396 })
1397#else
1398# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1399 ASMAtomicCmpXchgExPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld), (void RT_FAR * RT_FAR *)(ppvOld))
1400#endif
1401
1402
1403/**
1404 * Virtualization unfriendly serializing instruction, always exits.
1405 */
1406#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1407RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionCpuId(void);
1408#else
1409DECLINLINE(void) ASMSerializeInstructionCpuId(void)
1410{
1411# if RT_INLINE_ASM_GNU_STYLE
1412 RTCCUINTREG xAX = 0;
1413# ifdef RT_ARCH_AMD64
1414 __asm__ __volatile__ ("cpuid"
1415 : "=a" (xAX)
1416 : "0" (xAX)
1417 : "rbx", "rcx", "rdx", "memory");
1418# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1419 __asm__ __volatile__ ("push %%ebx\n\t"
1420 "cpuid\n\t"
1421 "pop %%ebx\n\t"
1422 : "=a" (xAX)
1423 : "0" (xAX)
1424 : "ecx", "edx", "memory");
1425# else
1426 __asm__ __volatile__ ("cpuid"
1427 : "=a" (xAX)
1428 : "0" (xAX)
1429 : "ebx", "ecx", "edx", "memory");
1430# endif
1431
1432# elif RT_INLINE_ASM_USES_INTRIN
1433 int aInfo[4];
1434 _ReadWriteBarrier();
1435 __cpuid(aInfo, 0);
1436
1437# else
1438 __asm
1439 {
1440 push ebx
1441 xor eax, eax
1442 cpuid
1443 pop ebx
1444 }
1445# endif
1446}
1447#endif
1448
1449/**
1450 * Virtualization friendly serializing instruction, though more expensive.
1451 */
1452#if RT_INLINE_ASM_EXTERNAL
1453RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionIRet(void);
1454#else
1455DECLINLINE(void) ASMSerializeInstructionIRet(void)
1456{
1457# if RT_INLINE_ASM_GNU_STYLE
1458# ifdef RT_ARCH_AMD64
1459 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
1460 "subq $128, %%rsp\n\t" /*redzone*/
1461 "mov %%ss, %%eax\n\t"
1462 "pushq %%rax\n\t"
1463 "pushq %%r10\n\t"
1464 "pushfq\n\t"
1465 "movl %%cs, %%eax\n\t"
1466 "pushq %%rax\n\t"
1467 "leaq 1f(%%rip), %%rax\n\t"
1468 "pushq %%rax\n\t"
1469 "iretq\n\t"
1470 "1:\n\t"
1471 ::: "rax", "r10", "memory");
1472# else
1473 __asm__ __volatile__ ("pushfl\n\t"
1474 "pushl %%cs\n\t"
1475 "pushl $1f\n\t"
1476 "iretl\n\t"
1477 "1:\n\t"
1478 ::: "memory");
1479# endif
1480
1481# else
1482 __asm
1483 {
1484 pushfd
1485 push cs
1486 push la_ret
1487 iretd
1488 la_ret:
1489 }
1490# endif
1491}
1492#endif
1493
1494/**
1495 * Virtualization friendlier serializing instruction, may still cause exits.
1496 */
1497#if RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < 15
1498RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionRdTscp(void);
1499#else
1500DECLINLINE(void) ASMSerializeInstructionRdTscp(void)
1501{
1502# if RT_INLINE_ASM_GNU_STYLE
1503 /* rdtscp is not supported by ancient linux build VM of course :-( */
1504# ifdef RT_ARCH_AMD64
1505 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
1506 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
1507# else
1508 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
1509 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
1510# endif
1511# else
1512# if RT_INLINE_ASM_USES_INTRIN >= 15
1513 uint32_t uIgnore;
1514 _ReadWriteBarrier();
1515 (void)__rdtscp(&uIgnore);
1516 (void)uIgnore;
1517# else
1518 __asm
1519 {
1520 rdtscp
1521 }
1522# endif
1523# endif
1524}
1525#endif
1526
1527
1528/**
1529 * Serialize Instruction.
1530 */
1531#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
1532# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
1533#elif defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
1534# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
1535#elif defined(RT_ARCH_SPARC64)
1536RTDECL(void) ASMSerializeInstruction(void);
1537#else
1538# error "Port me"
1539#endif
1540
1541
1542/**
1543 * Memory fence, waits for any pending writes and reads to complete.
1544 */
1545DECLINLINE(void) ASMMemoryFence(void)
1546{
1547#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
1548# if RT_INLINE_ASM_GNU_STYLE
1549 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
1550# elif RT_INLINE_ASM_USES_INTRIN
1551 _mm_mfence();
1552# else
1553 __asm
1554 {
1555 _emit 0x0f
1556 _emit 0xae
1557 _emit 0xf0
1558 }
1559# endif
1560#elif ARCH_BITS == 16
1561 uint16_t volatile u16;
1562 ASMAtomicXchgU16(&u16, 0);
1563#else
1564 uint32_t volatile u32;
1565 ASMAtomicXchgU32(&u32, 0);
1566#endif
1567}
1568
1569
1570/**
1571 * Write fence, waits for any pending writes to complete.
1572 */
1573DECLINLINE(void) ASMWriteFence(void)
1574{
1575#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
1576# if RT_INLINE_ASM_GNU_STYLE
1577 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
1578# elif RT_INLINE_ASM_USES_INTRIN
1579 _mm_sfence();
1580# else
1581 __asm
1582 {
1583 _emit 0x0f
1584 _emit 0xae
1585 _emit 0xf8
1586 }
1587# endif
1588#else
1589 ASMMemoryFence();
1590#endif
1591}
1592
1593
1594/**
1595 * Read fence, waits for any pending reads to complete.
1596 */
1597DECLINLINE(void) ASMReadFence(void)
1598{
1599#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
1600# if RT_INLINE_ASM_GNU_STYLE
1601 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
1602# elif RT_INLINE_ASM_USES_INTRIN
1603 _mm_lfence();
1604# else
1605 __asm
1606 {
1607 _emit 0x0f
1608 _emit 0xae
1609 _emit 0xe8
1610 }
1611# endif
1612#else
1613 ASMMemoryFence();
1614#endif
1615}
1616
1617
1618/**
1619 * Atomically reads an unsigned 8-bit value, ordered.
1620 *
1621 * @returns Current *pu8 value
1622 * @param pu8 Pointer to the 8-bit variable to read.
1623 */
1624DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t RT_FAR *pu8)
1625{
1626 ASMMemoryFence();
1627 return *pu8; /* byte reads are atomic on x86 */
1628}
1629
1630
1631/**
1632 * Atomically reads an unsigned 8-bit value, unordered.
1633 *
1634 * @returns Current *pu8 value
1635 * @param pu8 Pointer to the 8-bit variable to read.
1636 */
1637DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t RT_FAR *pu8)
1638{
1639 return *pu8; /* byte reads are atomic on x86 */
1640}
1641
1642
1643/**
1644 * Atomically reads a signed 8-bit value, ordered.
1645 *
1646 * @returns Current *pi8 value
1647 * @param pi8 Pointer to the 8-bit variable to read.
1648 */
1649DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t RT_FAR *pi8)
1650{
1651 ASMMemoryFence();
1652 return *pi8; /* byte reads are atomic on x86 */
1653}
1654
1655
1656/**
1657 * Atomically reads a signed 8-bit value, unordered.
1658 *
1659 * @returns Current *pi8 value
1660 * @param pi8 Pointer to the 8-bit variable to read.
1661 */
1662DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t RT_FAR *pi8)
1663{
1664 return *pi8; /* byte reads are atomic on x86 */
1665}
1666
1667
1668/**
1669 * Atomically reads an unsigned 16-bit value, ordered.
1670 *
1671 * @returns Current *pu16 value
1672 * @param pu16 Pointer to the 16-bit variable to read.
1673 */
1674DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t RT_FAR *pu16)
1675{
1676 ASMMemoryFence();
1677 Assert(!((uintptr_t)pu16 & 1));
1678 return *pu16;
1679}
1680
1681
1682/**
1683 * Atomically reads an unsigned 16-bit value, unordered.
1684 *
1685 * @returns Current *pu16 value
1686 * @param pu16 Pointer to the 16-bit variable to read.
1687 */
1688DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t RT_FAR *pu16)
1689{
1690 Assert(!((uintptr_t)pu16 & 1));
1691 return *pu16;
1692}
1693
1694
1695/**
1696 * Atomically reads a signed 16-bit value, ordered.
1697 *
1698 * @returns Current *pi16 value
1699 * @param pi16 Pointer to the 16-bit variable to read.
1700 */
1701DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t RT_FAR *pi16)
1702{
1703 ASMMemoryFence();
1704 Assert(!((uintptr_t)pi16 & 1));
1705 return *pi16;
1706}
1707
1708
1709/**
1710 * Atomically reads a signed 16-bit value, unordered.
1711 *
1712 * @returns Current *pi16 value
1713 * @param pi16 Pointer to the 16-bit variable to read.
1714 */
1715DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t RT_FAR *pi16)
1716{
1717 Assert(!((uintptr_t)pi16 & 1));
1718 return *pi16;
1719}
1720
1721
1722/**
1723 * Atomically reads an unsigned 32-bit value, ordered.
1724 *
1725 * @returns Current *pu32 value
1726 * @param pu32 Pointer to the 32-bit variable to read.
1727 */
1728DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t RT_FAR *pu32)
1729{
1730 ASMMemoryFence();
1731 Assert(!((uintptr_t)pu32 & 3));
1732#if ARCH_BITS == 16
1733 AssertFailed(); /** @todo 16-bit */
1734#endif
1735 return *pu32;
1736}
1737
1738
1739/**
1740 * Atomically reads an unsigned 32-bit value, unordered.
1741 *
1742 * @returns Current *pu32 value
1743 * @param pu32 Pointer to the 32-bit variable to read.
1744 */
1745DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t RT_FAR *pu32)
1746{
1747 Assert(!((uintptr_t)pu32 & 3));
1748#if ARCH_BITS == 16
1749 AssertFailed(); /** @todo 16-bit */
1750#endif
1751 return *pu32;
1752}
1753
1754
1755/**
1756 * Atomically reads a signed 32-bit value, ordered.
1757 *
1758 * @returns Current *pi32 value
1759 * @param pi32 Pointer to the 32-bit variable to read.
1760 */
1761DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t RT_FAR *pi32)
1762{
1763 ASMMemoryFence();
1764 Assert(!((uintptr_t)pi32 & 3));
1765#if ARCH_BITS == 16
1766 AssertFailed(); /** @todo 16-bit */
1767#endif
1768 return *pi32;
1769}
1770
1771
1772/**
1773 * Atomically reads a signed 32-bit value, unordered.
1774 *
1775 * @returns Current *pi32 value
1776 * @param pi32 Pointer to the 32-bit variable to read.
1777 */
1778DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t RT_FAR *pi32)
1779{
1780 Assert(!((uintptr_t)pi32 & 3));
1781#if ARCH_BITS == 16
1782 AssertFailed(); /** @todo 16-bit */
1783#endif
1784 return *pi32;
1785}
1786
1787
1788/**
1789 * Atomically reads an unsigned 64-bit value, ordered.
1790 *
1791 * @returns Current *pu64 value
1792 * @param pu64 Pointer to the 64-bit variable to read.
1793 * The memory pointed to must be writable.
1794 *
1795 * @remarks This may fault if the memory is read-only!
1796 * @remarks x86: Requires a Pentium or later.
1797 */
1798#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1799 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1800RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64);
1801#else
1802DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64)
1803{
1804 uint64_t u64;
1805# ifdef RT_ARCH_AMD64
1806 Assert(!((uintptr_t)pu64 & 7));
1807/*# if RT_INLINE_ASM_GNU_STYLE
1808 __asm__ __volatile__( "mfence\n\t"
1809 "movq %1, %0\n\t"
1810 : "=r" (u64)
1811 : "m" (*pu64));
1812# else
1813 __asm
1814 {
1815 mfence
1816 mov rdx, [pu64]
1817 mov rax, [rdx]
1818 mov [u64], rax
1819 }
1820# endif*/
1821 ASMMemoryFence();
1822 u64 = *pu64;
1823# else /* !RT_ARCH_AMD64 */
1824# if RT_INLINE_ASM_GNU_STYLE
1825# if defined(PIC) || defined(__PIC__)
1826 uint32_t u32EBX = 0;
1827 Assert(!((uintptr_t)pu64 & 7));
1828 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1829 "lock; cmpxchg8b (%5)\n\t"
1830 "movl %3, %%ebx\n\t"
1831 : "=A" (u64),
1832# if RT_GNUC_PREREQ(4, 3)
1833 "+m" (*pu64)
1834# else
1835 "=m" (*pu64)
1836# endif
1837 : "0" (0ULL),
1838 "m" (u32EBX),
1839 "c" (0),
1840 "S" (pu64));
1841# else /* !PIC */
1842 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1843 : "=A" (u64),
1844 "+m" (*pu64)
1845 : "0" (0ULL),
1846 "b" (0),
1847 "c" (0));
1848# endif
1849# else
1850 Assert(!((uintptr_t)pu64 & 7));
1851 __asm
1852 {
1853 xor eax, eax
1854 xor edx, edx
1855 mov edi, pu64
1856 xor ecx, ecx
1857 xor ebx, ebx
1858 lock cmpxchg8b [edi]
1859 mov dword ptr [u64], eax
1860 mov dword ptr [u64 + 4], edx
1861 }
1862# endif
1863# endif /* !RT_ARCH_AMD64 */
1864 return u64;
1865}
1866#endif
1867
1868
1869/**
1870 * Atomically reads an unsigned 64-bit value, unordered.
1871 *
1872 * @returns Current *pu64 value
1873 * @param pu64 Pointer to the 64-bit variable to read.
1874 * The memory pointed to must be writable.
1875 *
1876 * @remarks This may fault if the memory is read-only!
1877 * @remarks x86: Requires a Pentium or later.
1878 */
1879#if !defined(RT_ARCH_AMD64) \
1880 && ( (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1881 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
1882RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64);
1883#else
1884DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64)
1885{
1886 uint64_t u64;
1887# ifdef RT_ARCH_AMD64
1888 Assert(!((uintptr_t)pu64 & 7));
1889/*# if RT_INLINE_ASM_GNU_STYLE
1890 Assert(!((uintptr_t)pu64 & 7));
1891 __asm__ __volatile__("movq %1, %0\n\t"
1892 : "=r" (u64)
1893 : "m" (*pu64));
1894# else
1895 __asm
1896 {
1897 mov rdx, [pu64]
1898 mov rax, [rdx]
1899 mov [u64], rax
1900 }
1901# endif */
1902 u64 = *pu64;
1903# else /* !RT_ARCH_AMD64 */
1904# if RT_INLINE_ASM_GNU_STYLE
1905# if defined(PIC) || defined(__PIC__)
1906 uint32_t u32EBX = 0;
1907 uint32_t u32Spill;
1908 Assert(!((uintptr_t)pu64 & 7));
1909 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1910 "xor %%ecx,%%ecx\n\t"
1911 "xor %%edx,%%edx\n\t"
1912 "xchgl %%ebx, %3\n\t"
1913 "lock; cmpxchg8b (%4)\n\t"
1914 "movl %3, %%ebx\n\t"
1915 : "=A" (u64),
1916# if RT_GNUC_PREREQ(4, 3)
1917 "+m" (*pu64),
1918# else
1919 "=m" (*pu64),
1920# endif
1921 "=c" (u32Spill)
1922 : "m" (u32EBX),
1923 "S" (pu64));
1924# else /* !PIC */
1925 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1926 : "=A" (u64),
1927 "+m" (*pu64)
1928 : "0" (0ULL),
1929 "b" (0),
1930 "c" (0));
1931# endif
1932# else
1933 Assert(!((uintptr_t)pu64 & 7));
1934 __asm
1935 {
1936 xor eax, eax
1937 xor edx, edx
1938 mov edi, pu64
1939 xor ecx, ecx
1940 xor ebx, ebx
1941 lock cmpxchg8b [edi]
1942 mov dword ptr [u64], eax
1943 mov dword ptr [u64 + 4], edx
1944 }
1945# endif
1946# endif /* !RT_ARCH_AMD64 */
1947 return u64;
1948}
1949#endif
1950
1951
1952/**
1953 * Atomically reads a signed 64-bit value, ordered.
1954 *
1955 * @returns Current *pi64 value
1956 * @param pi64 Pointer to the 64-bit variable to read.
1957 * The memory pointed to must be writable.
1958 *
1959 * @remarks This may fault if the memory is read-only!
1960 * @remarks x86: Requires a Pentium or later.
1961 */
1962DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t RT_FAR *pi64)
1963{
1964 return (int64_t)ASMAtomicReadU64((volatile uint64_t RT_FAR *)pi64);
1965}
1966
1967
1968/**
1969 * Atomically reads a signed 64-bit value, unordered.
1970 *
1971 * @returns Current *pi64 value
1972 * @param pi64 Pointer to the 64-bit variable to read.
1973 * The memory pointed to must be writable.
1974 *
1975 * @remarks This will fault if the memory is read-only!
1976 * @remarks x86: Requires a Pentium or later.
1977 */
1978DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t RT_FAR *pi64)
1979{
1980 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)pi64);
1981}
1982
1983
1984/**
1985 * Atomically reads a size_t value, ordered.
1986 *
1987 * @returns Current *pcb value
1988 * @param pcb Pointer to the size_t variable to read.
1989 */
1990DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile RT_FAR *pcb)
1991{
1992#if ARCH_BITS == 64
1993 return ASMAtomicReadU64((uint64_t volatile RT_FAR *)pcb);
1994#elif ARCH_BITS == 32
1995 return ASMAtomicReadU32((uint32_t volatile RT_FAR *)pcb);
1996#elif ARCH_BITS == 16
1997 AssertCompileSize(size_t, 2);
1998 return ASMAtomicReadU16((uint16_t volatile RT_FAR *)pcb);
1999#else
2000# error "Unsupported ARCH_BITS value"
2001#endif
2002}
2003
2004
2005/**
2006 * Atomically reads a size_t value, unordered.
2007 *
2008 * @returns Current *pcb value
2009 * @param pcb Pointer to the size_t variable to read.
2010 */
2011DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile RT_FAR *pcb)
2012{
2013#if ARCH_BITS == 64 || ARCH_BITS == 16
2014 return ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)pcb);
2015#elif ARCH_BITS == 32
2016 return ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)pcb);
2017#elif ARCH_BITS == 16
2018 AssertCompileSize(size_t, 2);
2019 return ASMAtomicUoReadU16((uint16_t volatile RT_FAR *)pcb);
2020#else
2021# error "Unsupported ARCH_BITS value"
2022#endif
2023}
2024
2025
2026/**
2027 * Atomically reads a pointer value, ordered.
2028 *
2029 * @returns Current *pv value
2030 * @param ppv Pointer to the pointer variable to read.
2031 *
2032 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
2033 * requires less typing (no casts).
2034 */
2035DECLINLINE(void RT_FAR *) ASMAtomicReadPtr(void RT_FAR * volatile RT_FAR *ppv)
2036{
2037#if ARCH_BITS == 32 || ARCH_BITS == 16
2038 return (void RT_FAR *)ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2039#elif ARCH_BITS == 64
2040 return (void RT_FAR *)ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2041#else
2042# error "ARCH_BITS is bogus"
2043#endif
2044}
2045
2046/**
2047 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
2048 *
2049 * @returns Current *pv value
2050 * @param ppv Pointer to the pointer variable to read.
2051 * @param Type The type of *ppv, sans volatile.
2052 */
2053#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
2054# define ASMAtomicReadPtrT(ppv, Type) \
2055 __extension__ \
2056 ({\
2057 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
2058 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
2059 pvTypeChecked; \
2060 })
2061#else
2062# define ASMAtomicReadPtrT(ppv, Type) \
2063 (Type)ASMAtomicReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2064#endif
2065
2066
2067/**
2068 * Atomically reads a pointer value, unordered.
2069 *
2070 * @returns Current *pv value
2071 * @param ppv Pointer to the pointer variable to read.
2072 *
2073 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
2074 * requires less typing (no casts).
2075 */
2076DECLINLINE(void RT_FAR *) ASMAtomicUoReadPtr(void RT_FAR * volatile RT_FAR *ppv)
2077{
2078#if ARCH_BITS == 32 || ARCH_BITS == 16
2079 return (void RT_FAR *)ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2080#elif ARCH_BITS == 64
2081 return (void RT_FAR *)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2082#else
2083# error "ARCH_BITS is bogus"
2084#endif
2085}
2086
2087
2088/**
2089 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
2090 *
2091 * @returns Current *pv value
2092 * @param ppv Pointer to the pointer variable to read.
2093 * @param Type The type of *ppv, sans volatile.
2094 */
2095#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
2096# define ASMAtomicUoReadPtrT(ppv, Type) \
2097 __extension__ \
2098 ({\
2099 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2100 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
2101 pvTypeChecked; \
2102 })
2103#else
2104# define ASMAtomicUoReadPtrT(ppv, Type) \
2105 (Type)ASMAtomicUoReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2106#endif
2107
2108
2109/**
2110 * Atomically reads a boolean value, ordered.
2111 *
2112 * @returns Current *pf value
2113 * @param pf Pointer to the boolean variable to read.
2114 */
2115DECLINLINE(bool) ASMAtomicReadBool(volatile bool RT_FAR *pf)
2116{
2117 ASMMemoryFence();
2118 return *pf; /* byte reads are atomic on x86 */
2119}
2120
2121
2122/**
2123 * Atomically reads a boolean value, unordered.
2124 *
2125 * @returns Current *pf value
2126 * @param pf Pointer to the boolean variable to read.
2127 */
2128DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool RT_FAR *pf)
2129{
2130 return *pf; /* byte reads are atomic on x86 */
2131}
2132
2133
2134/**
2135 * Atomically read a typical IPRT handle value, ordered.
2136 *
2137 * @param ph Pointer to the handle variable to read.
2138 * @param phRes Where to store the result.
2139 *
2140 * @remarks This doesn't currently work for all handles (like RTFILE).
2141 */
2142#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2143# define ASMAtomicReadHandle(ph, phRes) \
2144 do { \
2145 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2146 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2147 *(uint32_t RT_FAR *)(phRes) = ASMAtomicReadU32((uint32_t volatile RT_FAR *)(ph)); \
2148 } while (0)
2149#elif HC_ARCH_BITS == 64
2150# define ASMAtomicReadHandle(ph, phRes) \
2151 do { \
2152 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2153 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2154 *(uint64_t RT_FAR *)(phRes) = ASMAtomicReadU64((uint64_t volatile RT_FAR *)(ph)); \
2155 } while (0)
2156#else
2157# error HC_ARCH_BITS
2158#endif
2159
2160
2161/**
2162 * Atomically read a typical IPRT handle value, unordered.
2163 *
2164 * @param ph Pointer to the handle variable to read.
2165 * @param phRes Where to store the result.
2166 *
2167 * @remarks This doesn't currently work for all handles (like RTFILE).
2168 */
2169#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2170# define ASMAtomicUoReadHandle(ph, phRes) \
2171 do { \
2172 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2173 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2174 *(uint32_t RT_FAR *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)(ph)); \
2175 } while (0)
2176#elif HC_ARCH_BITS == 64
2177# define ASMAtomicUoReadHandle(ph, phRes) \
2178 do { \
2179 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2180 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2181 *(uint64_t RT_FAR *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)(ph)); \
2182 } while (0)
2183#else
2184# error HC_ARCH_BITS
2185#endif
2186
2187
2188/**
2189 * Atomically read a value which size might differ
2190 * between platforms or compilers, ordered.
2191 *
2192 * @param pu Pointer to the variable to read.
2193 * @param puRes Where to store the result.
2194 */
2195#define ASMAtomicReadSize(pu, puRes) \
2196 do { \
2197 switch (sizeof(*(pu))) { \
2198 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2199 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2200 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2201 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2202 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2203 } \
2204 } while (0)
2205
2206
2207/**
2208 * Atomically read a value which size might differ
2209 * between platforms or compilers, unordered.
2210 *
2211 * @param pu Pointer to the variable to read.
2212 * @param puRes Where to store the result.
2213 */
2214#define ASMAtomicUoReadSize(pu, puRes) \
2215 do { \
2216 switch (sizeof(*(pu))) { \
2217 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2218 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2219 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2220 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2221 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2222 } \
2223 } while (0)
2224
2225
2226/**
2227 * Atomically writes an unsigned 8-bit value, ordered.
2228 *
2229 * @param pu8 Pointer to the 8-bit variable.
2230 * @param u8 The 8-bit value to assign to *pu8.
2231 */
2232DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8)
2233{
2234 ASMAtomicXchgU8(pu8, u8);
2235}
2236
2237
2238/**
2239 * Atomically writes an unsigned 8-bit value, unordered.
2240 *
2241 * @param pu8 Pointer to the 8-bit variable.
2242 * @param u8 The 8-bit value to assign to *pu8.
2243 */
2244DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8)
2245{
2246 *pu8 = u8; /* byte writes are atomic on x86 */
2247}
2248
2249
2250/**
2251 * Atomically writes a signed 8-bit value, ordered.
2252 *
2253 * @param pi8 Pointer to the 8-bit variable to read.
2254 * @param i8 The 8-bit value to assign to *pi8.
2255 */
2256DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8)
2257{
2258 ASMAtomicXchgS8(pi8, i8);
2259}
2260
2261
2262/**
2263 * Atomically writes a signed 8-bit value, unordered.
2264 *
2265 * @param pi8 Pointer to the 8-bit variable to write.
2266 * @param i8 The 8-bit value to assign to *pi8.
2267 */
2268DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8)
2269{
2270 *pi8 = i8; /* byte writes are atomic on x86 */
2271}
2272
2273
2274/**
2275 * Atomically writes an unsigned 16-bit value, ordered.
2276 *
2277 * @param pu16 Pointer to the 16-bit variable to write.
2278 * @param u16 The 16-bit value to assign to *pu16.
2279 */
2280DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16)
2281{
2282 ASMAtomicXchgU16(pu16, u16);
2283}
2284
2285
2286/**
2287 * Atomically writes an unsigned 16-bit value, unordered.
2288 *
2289 * @param pu16 Pointer to the 16-bit variable to write.
2290 * @param u16 The 16-bit value to assign to *pu16.
2291 */
2292DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16)
2293{
2294 Assert(!((uintptr_t)pu16 & 1));
2295 *pu16 = u16;
2296}
2297
2298
2299/**
2300 * Atomically writes a signed 16-bit value, ordered.
2301 *
2302 * @param pi16 Pointer to the 16-bit variable to write.
2303 * @param i16 The 16-bit value to assign to *pi16.
2304 */
2305DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16)
2306{
2307 ASMAtomicXchgS16(pi16, i16);
2308}
2309
2310
2311/**
2312 * Atomically writes a signed 16-bit value, unordered.
2313 *
2314 * @param pi16 Pointer to the 16-bit variable to write.
2315 * @param i16 The 16-bit value to assign to *pi16.
2316 */
2317DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16)
2318{
2319 Assert(!((uintptr_t)pi16 & 1));
2320 *pi16 = i16;
2321}
2322
2323
2324/**
2325 * Atomically writes an unsigned 32-bit value, ordered.
2326 *
2327 * @param pu32 Pointer to the 32-bit variable to write.
2328 * @param u32 The 32-bit value to assign to *pu32.
2329 */
2330DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32)
2331{
2332 ASMAtomicXchgU32(pu32, u32);
2333}
2334
2335
2336/**
2337 * Atomically writes an unsigned 32-bit value, unordered.
2338 *
2339 * @param pu32 Pointer to the 32-bit variable to write.
2340 * @param u32 The 32-bit value to assign to *pu32.
2341 */
2342DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32)
2343{
2344 Assert(!((uintptr_t)pu32 & 3));
2345#if ARCH_BITS >= 32
2346 *pu32 = u32;
2347#else
2348 ASMAtomicXchgU32(pu32, u32);
2349#endif
2350}
2351
2352
2353/**
2354 * Atomically writes a signed 32-bit value, ordered.
2355 *
2356 * @param pi32 Pointer to the 32-bit variable to write.
2357 * @param i32 The 32-bit value to assign to *pi32.
2358 */
2359DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32)
2360{
2361 ASMAtomicXchgS32(pi32, i32);
2362}
2363
2364
2365/**
2366 * Atomically writes a signed 32-bit value, unordered.
2367 *
2368 * @param pi32 Pointer to the 32-bit variable to write.
2369 * @param i32 The 32-bit value to assign to *pi32.
2370 */
2371DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32)
2372{
2373 Assert(!((uintptr_t)pi32 & 3));
2374#if ARCH_BITS >= 32
2375 *pi32 = i32;
2376#else
2377 ASMAtomicXchgS32(pi32, i32);
2378#endif
2379}
2380
2381
2382/**
2383 * Atomically writes an unsigned 64-bit value, ordered.
2384 *
2385 * @param pu64 Pointer to the 64-bit variable to write.
2386 * @param u64 The 64-bit value to assign to *pu64.
2387 */
2388DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64)
2389{
2390 ASMAtomicXchgU64(pu64, u64);
2391}
2392
2393
2394/**
2395 * Atomically writes an unsigned 64-bit value, unordered.
2396 *
2397 * @param pu64 Pointer to the 64-bit variable to write.
2398 * @param u64 The 64-bit value to assign to *pu64.
2399 */
2400DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64)
2401{
2402 Assert(!((uintptr_t)pu64 & 7));
2403#if ARCH_BITS == 64
2404 *pu64 = u64;
2405#else
2406 ASMAtomicXchgU64(pu64, u64);
2407#endif
2408}
2409
2410
2411/**
2412 * Atomically writes a signed 64-bit value, ordered.
2413 *
2414 * @param pi64 Pointer to the 64-bit variable to write.
2415 * @param i64 The 64-bit value to assign to *pi64.
2416 */
2417DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64)
2418{
2419 ASMAtomicXchgS64(pi64, i64);
2420}
2421
2422
2423/**
2424 * Atomically writes a signed 64-bit value, unordered.
2425 *
2426 * @param pi64 Pointer to the 64-bit variable to write.
2427 * @param i64 The 64-bit value to assign to *pi64.
2428 */
2429DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64)
2430{
2431 Assert(!((uintptr_t)pi64 & 7));
2432#if ARCH_BITS == 64
2433 *pi64 = i64;
2434#else
2435 ASMAtomicXchgS64(pi64, i64);
2436#endif
2437}
2438
2439
2440/**
2441 * Atomically writes a size_t value, ordered.
2442 *
2443 * @returns nothing.
2444 * @param pcb Pointer to the size_t variable to write.
2445 * @param cb The value to assign to *pcb.
2446 */
2447DECLINLINE(void) ASMAtomicWriteZ(volatile size_t RT_FAR *pcb, size_t cb)
2448{
2449#if ARCH_BITS == 64
2450 ASMAtomicWriteU64((uint64_t volatile *)pcb, cb);
2451#elif ARCH_BITS == 32
2452 ASMAtomicWriteU32((uint32_t volatile *)pcb, cb);
2453#elif ARCH_BITS == 16
2454 AssertCompileSize(size_t, 2);
2455 ASMAtomicWriteU16((uint16_t volatile *)pcb, cb);
2456#else
2457# error "Unsupported ARCH_BITS value"
2458#endif
2459}
2460
2461
2462/**
2463 * Atomically writes a boolean value, unordered.
2464 *
2465 * @param pf Pointer to the boolean variable to write.
2466 * @param f The boolean value to assign to *pf.
2467 */
2468DECLINLINE(void) ASMAtomicWriteBool(volatile bool RT_FAR *pf, bool f)
2469{
2470 ASMAtomicWriteU8((uint8_t volatile RT_FAR *)pf, f);
2471}
2472
2473
2474/**
2475 * Atomically writes a boolean value, unordered.
2476 *
2477 * @param pf Pointer to the boolean variable to write.
2478 * @param f The boolean value to assign to *pf.
2479 */
2480DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool RT_FAR *pf, bool f)
2481{
2482 *pf = f; /* byte writes are atomic on x86 */
2483}
2484
2485
2486/**
2487 * Atomically writes a pointer value, ordered.
2488 *
2489 * @param ppv Pointer to the pointer variable to write.
2490 * @param pv The pointer value to assign to *ppv.
2491 */
2492DECLINLINE(void) ASMAtomicWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv)
2493{
2494#if ARCH_BITS == 32 || ARCH_BITS == 16
2495 ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
2496#elif ARCH_BITS == 64
2497 ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
2498#else
2499# error "ARCH_BITS is bogus"
2500#endif
2501}
2502
2503
2504/**
2505 * Atomically writes a pointer value, ordered.
2506 *
2507 * @param ppv Pointer to the pointer variable to write.
2508 * @param pv The pointer value to assign to *ppv. If NULL use
2509 * ASMAtomicWriteNullPtr or you'll land in trouble.
2510 *
2511 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2512 * NULL.
2513 */
2514#ifdef __GNUC__
2515# define ASMAtomicWritePtr(ppv, pv) \
2516 do \
2517 { \
2518 __typeof__(*(ppv)) volatile RT_FAR * const ppvTypeChecked = (ppv); \
2519 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2520 \
2521 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2522 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2523 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2524 \
2525 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), (void RT_FAR *)(pvTypeChecked)); \
2526 } while (0)
2527#else
2528# define ASMAtomicWritePtr(ppv, pv) \
2529 do \
2530 { \
2531 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2532 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2533 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2534 \
2535 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv)); \
2536 } while (0)
2537#endif
2538
2539
2540/**
2541 * Atomically sets a pointer to NULL, ordered.
2542 *
2543 * @param ppv Pointer to the pointer variable that should be set to NULL.
2544 *
2545 * @remarks This is relatively type safe on GCC platforms.
2546 */
2547#if RT_GNUC_PREREQ(4, 2)
2548# define ASMAtomicWriteNullPtr(ppv) \
2549 do \
2550 { \
2551 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
2552 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2553 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2554 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), NULL); \
2555 } while (0)
2556#else
2557# define ASMAtomicWriteNullPtr(ppv) \
2558 do \
2559 { \
2560 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2561 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2562 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), NULL); \
2563 } while (0)
2564#endif
2565
2566
2567/**
2568 * Atomically writes a pointer value, unordered.
2569 *
2570 * @returns Current *pv value
2571 * @param ppv Pointer to the pointer variable.
2572 * @param pv The pointer value to assign to *ppv. If NULL use
2573 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2574 *
2575 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2576 * NULL.
2577 */
2578#ifdef __GNUC__
2579# define ASMAtomicUoWritePtr(ppv, pv) \
2580 do \
2581 { \
2582 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2583 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2584 \
2585 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2586 AssertCompile(sizeof(pv) == sizeof(void *)); \
2587 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2588 \
2589 *(ppvTypeChecked) = pvTypeChecked; \
2590 } while (0)
2591#else
2592# define ASMAtomicUoWritePtr(ppv, pv) \
2593 do \
2594 { \
2595 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2596 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2597 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2598 *(ppv) = pv; \
2599 } while (0)
2600#endif
2601
2602
2603/**
2604 * Atomically sets a pointer to NULL, unordered.
2605 *
2606 * @param ppv Pointer to the pointer variable that should be set to NULL.
2607 *
2608 * @remarks This is relatively type safe on GCC platforms.
2609 */
2610#ifdef __GNUC__
2611# define ASMAtomicUoWriteNullPtr(ppv) \
2612 do \
2613 { \
2614 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2615 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2616 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2617 *(ppvTypeChecked) = NULL; \
2618 } while (0)
2619#else
2620# define ASMAtomicUoWriteNullPtr(ppv) \
2621 do \
2622 { \
2623 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2624 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2625 *(ppv) = NULL; \
2626 } while (0)
2627#endif
2628
2629
2630/**
2631 * Atomically write a typical IPRT handle value, ordered.
2632 *
2633 * @param ph Pointer to the variable to update.
2634 * @param hNew The value to assign to *ph.
2635 *
2636 * @remarks This doesn't currently work for all handles (like RTFILE).
2637 */
2638#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2639# define ASMAtomicWriteHandle(ph, hNew) \
2640 do { \
2641 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2642 ASMAtomicWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
2643 } while (0)
2644#elif HC_ARCH_BITS == 64
2645# define ASMAtomicWriteHandle(ph, hNew) \
2646 do { \
2647 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2648 ASMAtomicWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
2649 } while (0)
2650#else
2651# error HC_ARCH_BITS
2652#endif
2653
2654
2655/**
2656 * Atomically write a typical IPRT handle value, unordered.
2657 *
2658 * @param ph Pointer to the variable to update.
2659 * @param hNew The value to assign to *ph.
2660 *
2661 * @remarks This doesn't currently work for all handles (like RTFILE).
2662 */
2663#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2664# define ASMAtomicUoWriteHandle(ph, hNew) \
2665 do { \
2666 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2667 ASMAtomicUoWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)hNew); \
2668 } while (0)
2669#elif HC_ARCH_BITS == 64
2670# define ASMAtomicUoWriteHandle(ph, hNew) \
2671 do { \
2672 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2673 ASMAtomicUoWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)hNew); \
2674 } while (0)
2675#else
2676# error HC_ARCH_BITS
2677#endif
2678
2679
2680/**
2681 * Atomically write a value which size might differ
2682 * between platforms or compilers, ordered.
2683 *
2684 * @param pu Pointer to the variable to update.
2685 * @param uNew The value to assign to *pu.
2686 */
2687#define ASMAtomicWriteSize(pu, uNew) \
2688 do { \
2689 switch (sizeof(*(pu))) { \
2690 case 1: ASMAtomicWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
2691 case 2: ASMAtomicWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
2692 case 4: ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2693 case 8: ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2694 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2695 } \
2696 } while (0)
2697
2698/**
2699 * Atomically write a value which size might differ
2700 * between platforms or compilers, unordered.
2701 *
2702 * @param pu Pointer to the variable to update.
2703 * @param uNew The value to assign to *pu.
2704 */
2705#define ASMAtomicUoWriteSize(pu, uNew) \
2706 do { \
2707 switch (sizeof(*(pu))) { \
2708 case 1: ASMAtomicUoWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
2709 case 2: ASMAtomicUoWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
2710 case 4: ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2711 case 8: ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2712 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2713 } \
2714 } while (0)
2715
2716
2717
2718/**
2719 * Atomically exchanges and adds to a 16-bit value, ordered.
2720 *
2721 * @returns The old value.
2722 * @param pu16 Pointer to the value.
2723 * @param u16 Number to add.
2724 *
2725 * @remarks Currently not implemented, just to make 16-bit code happy.
2726 * @remarks x86: Requires a 486 or later.
2727 */
2728RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicAddU16(uint16_t volatile RT_FAR *pu16, uint32_t u16);
2729
2730
2731/**
2732 * Atomically exchanges and adds to a 32-bit value, ordered.
2733 *
2734 * @returns The old value.
2735 * @param pu32 Pointer to the value.
2736 * @param u32 Number to add.
2737 *
2738 * @remarks x86: Requires a 486 or later.
2739 */
2740#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2741RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
2742#else
2743DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
2744{
2745# if RT_INLINE_ASM_USES_INTRIN
2746 u32 = _InterlockedExchangeAdd((long RT_FAR *)pu32, u32);
2747 return u32;
2748
2749# elif RT_INLINE_ASM_GNU_STYLE
2750 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2751 : "=r" (u32),
2752 "=m" (*pu32)
2753 : "0" (u32),
2754 "m" (*pu32)
2755 : "memory");
2756 return u32;
2757# else
2758 __asm
2759 {
2760 mov eax, [u32]
2761# ifdef RT_ARCH_AMD64
2762 mov rdx, [pu32]
2763 lock xadd [rdx], eax
2764# else
2765 mov edx, [pu32]
2766 lock xadd [edx], eax
2767# endif
2768 mov [u32], eax
2769 }
2770 return u32;
2771# endif
2772}
2773#endif
2774
2775
2776/**
2777 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2778 *
2779 * @returns The old value.
2780 * @param pi32 Pointer to the value.
2781 * @param i32 Number to add.
2782 *
2783 * @remarks x86: Requires a 486 or later.
2784 */
2785DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile RT_FAR *pi32, int32_t i32)
2786{
2787 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
2788}
2789
2790
2791/**
2792 * Atomically exchanges and adds to a 64-bit value, ordered.
2793 *
2794 * @returns The old value.
2795 * @param pu64 Pointer to the value.
2796 * @param u64 Number to add.
2797 *
2798 * @remarks x86: Requires a Pentium or later.
2799 */
2800#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2801DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
2802#else
2803DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
2804{
2805# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2806 u64 = _InterlockedExchangeAdd64((__int64 RT_FAR *)pu64, u64);
2807 return u64;
2808
2809# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2810 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2811 : "=r" (u64),
2812 "=m" (*pu64)
2813 : "0" (u64),
2814 "m" (*pu64)
2815 : "memory");
2816 return u64;
2817# else
2818 uint64_t u64Old;
2819 for (;;)
2820 {
2821 uint64_t u64New;
2822 u64Old = ASMAtomicUoReadU64(pu64);
2823 u64New = u64Old + u64;
2824 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2825 break;
2826 ASMNopPause();
2827 }
2828 return u64Old;
2829# endif
2830}
2831#endif
2832
2833
2834/**
2835 * Atomically exchanges and adds to a signed 64-bit value, ordered.
2836 *
2837 * @returns The old value.
2838 * @param pi64 Pointer to the value.
2839 * @param i64 Number to add.
2840 *
2841 * @remarks x86: Requires a Pentium or later.
2842 */
2843DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile RT_FAR *pi64, int64_t i64)
2844{
2845 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
2846}
2847
2848
2849/**
2850 * Atomically exchanges and adds to a size_t value, ordered.
2851 *
2852 * @returns The old value.
2853 * @param pcb Pointer to the size_t value.
2854 * @param cb Number to add.
2855 */
2856DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile RT_FAR *pcb, size_t cb)
2857{
2858#if ARCH_BITS == 64
2859 AssertCompileSize(size_t, 8);
2860 return ASMAtomicAddU64((uint64_t volatile RT_FAR *)pcb, cb);
2861#elif ARCH_BITS == 32
2862 AssertCompileSize(size_t, 4);
2863 return ASMAtomicAddU32((uint32_t volatile RT_FAR *)pcb, cb);
2864#elif ARCH_BITS == 16
2865 AssertCompileSize(size_t, 2);
2866 return ASMAtomicAddU16((uint16_t volatile RT_FAR *)pcb, cb);
2867#else
2868# error "Unsupported ARCH_BITS value"
2869#endif
2870}
2871
2872
2873/**
2874 * Atomically exchanges and adds a value which size might differ between
2875 * platforms or compilers, ordered.
2876 *
2877 * @param pu Pointer to the variable to update.
2878 * @param uNew The value to add to *pu.
2879 * @param puOld Where to store the old value.
2880 */
2881#define ASMAtomicAddSize(pu, uNew, puOld) \
2882 do { \
2883 switch (sizeof(*(pu))) { \
2884 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2885 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2886 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
2887 } \
2888 } while (0)
2889
2890
2891
2892/**
2893 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
2894 *
2895 * @returns The old value.
2896 * @param pu16 Pointer to the value.
2897 * @param u16 Number to subtract.
2898 *
2899 * @remarks x86: Requires a 486 or later.
2900 */
2901DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile RT_FAR *pu16, uint32_t u16)
2902{
2903 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
2904}
2905
2906
2907/**
2908 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
2909 *
2910 * @returns The old value.
2911 * @param pi16 Pointer to the value.
2912 * @param i16 Number to subtract.
2913 *
2914 * @remarks x86: Requires a 486 or later.
2915 */
2916DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile RT_FAR *pi16, int16_t i16)
2917{
2918 return (int16_t)ASMAtomicAddU16((uint16_t volatile RT_FAR *)pi16, (uint16_t)-i16);
2919}
2920
2921
2922/**
2923 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
2924 *
2925 * @returns The old value.
2926 * @param pu32 Pointer to the value.
2927 * @param u32 Number to subtract.
2928 *
2929 * @remarks x86: Requires a 486 or later.
2930 */
2931DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
2932{
2933 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
2934}
2935
2936
2937/**
2938 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
2939 *
2940 * @returns The old value.
2941 * @param pi32 Pointer to the value.
2942 * @param i32 Number to subtract.
2943 *
2944 * @remarks x86: Requires a 486 or later.
2945 */
2946DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile RT_FAR *pi32, int32_t i32)
2947{
2948 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)-i32);
2949}
2950
2951
2952/**
2953 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
2954 *
2955 * @returns The old value.
2956 * @param pu64 Pointer to the value.
2957 * @param u64 Number to subtract.
2958 *
2959 * @remarks x86: Requires a Pentium or later.
2960 */
2961DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
2962{
2963 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
2964}
2965
2966
2967/**
2968 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
2969 *
2970 * @returns The old value.
2971 * @param pi64 Pointer to the value.
2972 * @param i64 Number to subtract.
2973 *
2974 * @remarks x86: Requires a Pentium or later.
2975 */
2976DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile RT_FAR *pi64, int64_t i64)
2977{
2978 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)-i64);
2979}
2980
2981
2982/**
2983 * Atomically exchanges and subtracts to a size_t value, ordered.
2984 *
2985 * @returns The old value.
2986 * @param pcb Pointer to the size_t value.
2987 * @param cb Number to subtract.
2988 *
2989 * @remarks x86: Requires a 486 or later.
2990 */
2991DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile RT_FAR *pcb, size_t cb)
2992{
2993#if ARCH_BITS == 64
2994 return ASMAtomicSubU64((uint64_t volatile RT_FAR *)pcb, cb);
2995#elif ARCH_BITS == 32
2996 return ASMAtomicSubU32((uint32_t volatile RT_FAR *)pcb, cb);
2997#elif ARCH_BITS == 16
2998 AssertCompileSize(size_t, 2);
2999 return ASMAtomicSubU16((uint16_t volatile RT_FAR *)pcb, cb);
3000#else
3001# error "Unsupported ARCH_BITS value"
3002#endif
3003}
3004
3005
3006/**
3007 * Atomically exchanges and subtracts a value which size might differ between
3008 * platforms or compilers, ordered.
3009 *
3010 * @param pu Pointer to the variable to update.
3011 * @param uNew The value to subtract to *pu.
3012 * @param puOld Where to store the old value.
3013 *
3014 * @remarks x86: Requires a 486 or later.
3015 */
3016#define ASMAtomicSubSize(pu, uNew, puOld) \
3017 do { \
3018 switch (sizeof(*(pu))) { \
3019 case 4: *(uint32_t RT_FAR *)(puOld) = ASMAtomicSubU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3020 case 8: *(uint64_t RT_FAR *)(puOld) = ASMAtomicSubU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3021 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
3022 } \
3023 } while (0)
3024
3025
3026
3027/**
3028 * Atomically increment a 16-bit value, ordered.
3029 *
3030 * @returns The new value.
3031 * @param pu16 Pointer to the value to increment.
3032 * @remarks Not implemented. Just to make 16-bit code happy.
3033 *
3034 * @remarks x86: Requires a 486 or later.
3035 */
3036RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicIncU16(uint16_t volatile RT_FAR *pu16);
3037
3038
3039/**
3040 * Atomically increment a 32-bit value, ordered.
3041 *
3042 * @returns The new value.
3043 * @param pu32 Pointer to the value to increment.
3044 *
3045 * @remarks x86: Requires a 486 or later.
3046 */
3047#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3048RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32);
3049#else
3050DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32)
3051{
3052 uint32_t u32;
3053# if RT_INLINE_ASM_USES_INTRIN
3054 u32 = _InterlockedIncrement((long RT_FAR *)pu32);
3055 return u32;
3056
3057# elif RT_INLINE_ASM_GNU_STYLE
3058 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3059 : "=r" (u32),
3060 "=m" (*pu32)
3061 : "0" (1),
3062 "m" (*pu32)
3063 : "memory");
3064 return u32+1;
3065# else
3066 __asm
3067 {
3068 mov eax, 1
3069# ifdef RT_ARCH_AMD64
3070 mov rdx, [pu32]
3071 lock xadd [rdx], eax
3072# else
3073 mov edx, [pu32]
3074 lock xadd [edx], eax
3075# endif
3076 mov u32, eax
3077 }
3078 return u32+1;
3079# endif
3080}
3081#endif
3082
3083
3084/**
3085 * Atomically increment a signed 32-bit value, ordered.
3086 *
3087 * @returns The new value.
3088 * @param pi32 Pointer to the value to increment.
3089 *
3090 * @remarks x86: Requires a 486 or later.
3091 */
3092DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile RT_FAR *pi32)
3093{
3094 return (int32_t)ASMAtomicIncU32((uint32_t volatile RT_FAR *)pi32);
3095}
3096
3097
3098/**
3099 * Atomically increment a 64-bit value, ordered.
3100 *
3101 * @returns The new value.
3102 * @param pu64 Pointer to the value to increment.
3103 *
3104 * @remarks x86: Requires a Pentium or later.
3105 */
3106#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3107DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64);
3108#else
3109DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64)
3110{
3111# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3112 uint64_t u64;
3113 u64 = _InterlockedIncrement64((__int64 RT_FAR *)pu64);
3114 return u64;
3115
3116# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3117 uint64_t u64;
3118 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
3119 : "=r" (u64),
3120 "=m" (*pu64)
3121 : "0" (1),
3122 "m" (*pu64)
3123 : "memory");
3124 return u64 + 1;
3125# else
3126 return ASMAtomicAddU64(pu64, 1) + 1;
3127# endif
3128}
3129#endif
3130
3131
3132/**
3133 * Atomically increment a signed 64-bit value, ordered.
3134 *
3135 * @returns The new value.
3136 * @param pi64 Pointer to the value to increment.
3137 *
3138 * @remarks x86: Requires a Pentium or later.
3139 */
3140DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile RT_FAR *pi64)
3141{
3142 return (int64_t)ASMAtomicIncU64((uint64_t volatile RT_FAR *)pi64);
3143}
3144
3145
3146/**
3147 * Atomically increment a size_t value, ordered.
3148 *
3149 * @returns The new value.
3150 * @param pcb Pointer to the value to increment.
3151 *
3152 * @remarks x86: Requires a 486 or later.
3153 */
3154DECLINLINE(int64_t) ASMAtomicIncZ(size_t volatile RT_FAR *pcb)
3155{
3156#if ARCH_BITS == 64
3157 return ASMAtomicIncU64((uint64_t volatile RT_FAR *)pcb);
3158#elif ARCH_BITS == 32
3159 return ASMAtomicIncU32((uint32_t volatile RT_FAR *)pcb);
3160#elif ARCH_BITS == 16
3161 return ASMAtomicIncU16((uint16_t volatile RT_FAR *)pcb);
3162#else
3163# error "Unsupported ARCH_BITS value"
3164#endif
3165}
3166
3167
3168
3169/**
3170 * Atomically decrement an unsigned 32-bit value, ordered.
3171 *
3172 * @returns The new value.
3173 * @param pu16 Pointer to the value to decrement.
3174 * @remarks Not implemented. Just to make 16-bit code happy.
3175 *
3176 * @remarks x86: Requires a 486 or later.
3177 */
3178RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU16(uint16_t volatile RT_FAR *pu16);
3179
3180
3181/**
3182 * Atomically decrement an unsigned 32-bit value, ordered.
3183 *
3184 * @returns The new value.
3185 * @param pu32 Pointer to the value to decrement.
3186 *
3187 * @remarks x86: Requires a 486 or later.
3188 */
3189#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3190RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32);
3191#else
3192DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32)
3193{
3194 uint32_t u32;
3195# if RT_INLINE_ASM_USES_INTRIN
3196 u32 = _InterlockedDecrement((long RT_FAR *)pu32);
3197 return u32;
3198
3199# elif RT_INLINE_ASM_GNU_STYLE
3200 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3201 : "=r" (u32),
3202 "=m" (*pu32)
3203 : "0" (-1),
3204 "m" (*pu32)
3205 : "memory");
3206 return u32-1;
3207# else
3208 __asm
3209 {
3210 mov eax, -1
3211# ifdef RT_ARCH_AMD64
3212 mov rdx, [pu32]
3213 lock xadd [rdx], eax
3214# else
3215 mov edx, [pu32]
3216 lock xadd [edx], eax
3217# endif
3218 mov u32, eax
3219 }
3220 return u32-1;
3221# endif
3222}
3223#endif
3224
3225
3226/**
3227 * Atomically decrement a signed 32-bit value, ordered.
3228 *
3229 * @returns The new value.
3230 * @param pi32 Pointer to the value to decrement.
3231 *
3232 * @remarks x86: Requires a 486 or later.
3233 */
3234DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile RT_FAR *pi32)
3235{
3236 return (int32_t)ASMAtomicDecU32((uint32_t volatile RT_FAR *)pi32);
3237}
3238
3239
3240/**
3241 * Atomically decrement an unsigned 64-bit value, ordered.
3242 *
3243 * @returns The new value.
3244 * @param pu64 Pointer to the value to decrement.
3245 *
3246 * @remarks x86: Requires a Pentium or later.
3247 */
3248#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3249RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64);
3250#else
3251DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64)
3252{
3253# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3254 uint64_t u64 = _InterlockedDecrement64((__int64 volatile RT_FAR *)pu64);
3255 return u64;
3256
3257# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3258 uint64_t u64;
3259 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
3260 : "=r" (u64),
3261 "=m" (*pu64)
3262 : "0" (~(uint64_t)0),
3263 "m" (*pu64)
3264 : "memory");
3265 return u64-1;
3266# else
3267 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
3268# endif
3269}
3270#endif
3271
3272
3273/**
3274 * Atomically decrement a signed 64-bit value, ordered.
3275 *
3276 * @returns The new value.
3277 * @param pi64 Pointer to the value to decrement.
3278 *
3279 * @remarks x86: Requires a Pentium or later.
3280 */
3281DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile RT_FAR *pi64)
3282{
3283 return (int64_t)ASMAtomicDecU64((uint64_t volatile RT_FAR *)pi64);
3284}
3285
3286
3287/**
3288 * Atomically decrement a size_t value, ordered.
3289 *
3290 * @returns The new value.
3291 * @param pcb Pointer to the value to decrement.
3292 *
3293 * @remarks x86: Requires a 486 or later.
3294 */
3295DECLINLINE(int64_t) ASMAtomicDecZ(size_t volatile RT_FAR *pcb)
3296{
3297#if ARCH_BITS == 64
3298 return ASMAtomicDecU64((uint64_t volatile RT_FAR *)pcb);
3299#elif ARCH_BITS == 32
3300 return ASMAtomicDecU32((uint32_t volatile RT_FAR *)pcb);
3301#elif ARCH_BITS == 16
3302 return ASMAtomicDecU16((uint16_t volatile RT_FAR *)pcb);
3303#else
3304# error "Unsupported ARCH_BITS value"
3305#endif
3306}
3307
3308
3309/**
3310 * Atomically Or an unsigned 32-bit value, ordered.
3311 *
3312 * @param pu32 Pointer to the pointer variable to OR u32 with.
3313 * @param u32 The value to OR *pu32 with.
3314 *
3315 * @remarks x86: Requires a 386 or later.
3316 */
3317#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3318RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3319#else
3320DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3321{
3322# if RT_INLINE_ASM_USES_INTRIN
3323 _InterlockedOr((long volatile RT_FAR *)pu32, (long)u32);
3324
3325# elif RT_INLINE_ASM_GNU_STYLE
3326 __asm__ __volatile__("lock; orl %1, %0\n\t"
3327 : "=m" (*pu32)
3328 : "ir" (u32),
3329 "m" (*pu32));
3330# else
3331 __asm
3332 {
3333 mov eax, [u32]
3334# ifdef RT_ARCH_AMD64
3335 mov rdx, [pu32]
3336 lock or [rdx], eax
3337# else
3338 mov edx, [pu32]
3339 lock or [edx], eax
3340# endif
3341 }
3342# endif
3343}
3344#endif
3345
3346
3347/**
3348 * Atomically Or a signed 32-bit value, ordered.
3349 *
3350 * @param pi32 Pointer to the pointer variable to OR u32 with.
3351 * @param i32 The value to OR *pu32 with.
3352 *
3353 * @remarks x86: Requires a 386 or later.
3354 */
3355DECLINLINE(void) ASMAtomicOrS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3356{
3357 ASMAtomicOrU32((uint32_t volatile RT_FAR *)pi32, i32);
3358}
3359
3360
3361/**
3362 * Atomically Or an unsigned 64-bit value, ordered.
3363 *
3364 * @param pu64 Pointer to the pointer variable to OR u64 with.
3365 * @param u64 The value to OR *pu64 with.
3366 *
3367 * @remarks x86: Requires a Pentium or later.
3368 */
3369#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3370DECLASM(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3371#else
3372DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3373{
3374# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3375 _InterlockedOr64((__int64 volatile RT_FAR *)pu64, (__int64)u64);
3376
3377# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3378 __asm__ __volatile__("lock; orq %1, %q0\n\t"
3379 : "=m" (*pu64)
3380 : "r" (u64),
3381 "m" (*pu64));
3382# else
3383 for (;;)
3384 {
3385 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3386 uint64_t u64New = u64Old | u64;
3387 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3388 break;
3389 ASMNopPause();
3390 }
3391# endif
3392}
3393#endif
3394
3395
3396/**
3397 * Atomically Or a signed 64-bit value, ordered.
3398 *
3399 * @param pi64 Pointer to the pointer variable to OR u64 with.
3400 * @param i64 The value to OR *pu64 with.
3401 *
3402 * @remarks x86: Requires a Pentium or later.
3403 */
3404DECLINLINE(void) ASMAtomicOrS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3405{
3406 ASMAtomicOrU64((uint64_t volatile RT_FAR *)pi64, i64);
3407}
3408
3409
3410/**
3411 * Atomically And an unsigned 32-bit value, ordered.
3412 *
3413 * @param pu32 Pointer to the pointer variable to AND u32 with.
3414 * @param u32 The value to AND *pu32 with.
3415 *
3416 * @remarks x86: Requires a 386 or later.
3417 */
3418#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3419RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3420#else
3421DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3422{
3423# if RT_INLINE_ASM_USES_INTRIN
3424 _InterlockedAnd((long volatile RT_FAR *)pu32, u32);
3425
3426# elif RT_INLINE_ASM_GNU_STYLE
3427 __asm__ __volatile__("lock; andl %1, %0\n\t"
3428 : "=m" (*pu32)
3429 : "ir" (u32),
3430 "m" (*pu32));
3431# else
3432 __asm
3433 {
3434 mov eax, [u32]
3435# ifdef RT_ARCH_AMD64
3436 mov rdx, [pu32]
3437 lock and [rdx], eax
3438# else
3439 mov edx, [pu32]
3440 lock and [edx], eax
3441# endif
3442 }
3443# endif
3444}
3445#endif
3446
3447
3448/**
3449 * Atomically And a signed 32-bit value, ordered.
3450 *
3451 * @param pi32 Pointer to the pointer variable to AND i32 with.
3452 * @param i32 The value to AND *pi32 with.
3453 *
3454 * @remarks x86: Requires a 386 or later.
3455 */
3456DECLINLINE(void) ASMAtomicAndS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3457{
3458 ASMAtomicAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3459}
3460
3461
3462/**
3463 * Atomically And an unsigned 64-bit value, ordered.
3464 *
3465 * @param pu64 Pointer to the pointer variable to AND u64 with.
3466 * @param u64 The value to AND *pu64 with.
3467 *
3468 * @remarks x86: Requires a Pentium or later.
3469 */
3470#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3471DECLASM(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3472#else
3473DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3474{
3475# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3476 _InterlockedAnd64((__int64 volatile RT_FAR *)pu64, u64);
3477
3478# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3479 __asm__ __volatile__("lock; andq %1, %0\n\t"
3480 : "=m" (*pu64)
3481 : "r" (u64),
3482 "m" (*pu64));
3483# else
3484 for (;;)
3485 {
3486 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3487 uint64_t u64New = u64Old & u64;
3488 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3489 break;
3490 ASMNopPause();
3491 }
3492# endif
3493}
3494#endif
3495
3496
3497/**
3498 * Atomically And a signed 64-bit value, ordered.
3499 *
3500 * @param pi64 Pointer to the pointer variable to AND i64 with.
3501 * @param i64 The value to AND *pi64 with.
3502 *
3503 * @remarks x86: Requires a Pentium or later.
3504 */
3505DECLINLINE(void) ASMAtomicAndS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3506{
3507 ASMAtomicAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3508}
3509
3510
3511/**
3512 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
3513 *
3514 * @param pu32 Pointer to the pointer variable to OR u32 with.
3515 * @param u32 The value to OR *pu32 with.
3516 *
3517 * @remarks x86: Requires a 386 or later.
3518 */
3519#if RT_INLINE_ASM_EXTERNAL
3520RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3521#else
3522DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3523{
3524# if RT_INLINE_ASM_GNU_STYLE
3525 __asm__ __volatile__("orl %1, %0\n\t"
3526 : "=m" (*pu32)
3527 : "ir" (u32),
3528 "m" (*pu32));
3529# else
3530 __asm
3531 {
3532 mov eax, [u32]
3533# ifdef RT_ARCH_AMD64
3534 mov rdx, [pu32]
3535 or [rdx], eax
3536# else
3537 mov edx, [pu32]
3538 or [edx], eax
3539# endif
3540 }
3541# endif
3542}
3543#endif
3544
3545
3546/**
3547 * Atomically OR a signed 32-bit value, unordered.
3548 *
3549 * @param pi32 Pointer to the pointer variable to OR u32 with.
3550 * @param i32 The value to OR *pu32 with.
3551 *
3552 * @remarks x86: Requires a 386 or later.
3553 */
3554DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3555{
3556 ASMAtomicUoOrU32((uint32_t volatile RT_FAR *)pi32, i32);
3557}
3558
3559
3560/**
3561 * Atomically OR an unsigned 64-bit value, unordered.
3562 *
3563 * @param pu64 Pointer to the pointer variable to OR u64 with.
3564 * @param u64 The value to OR *pu64 with.
3565 *
3566 * @remarks x86: Requires a Pentium or later.
3567 */
3568#if RT_INLINE_ASM_EXTERNAL
3569DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3570#else
3571DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3572{
3573# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3574 __asm__ __volatile__("orq %1, %q0\n\t"
3575 : "=m" (*pu64)
3576 : "r" (u64),
3577 "m" (*pu64));
3578# else
3579 for (;;)
3580 {
3581 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3582 uint64_t u64New = u64Old | u64;
3583 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3584 break;
3585 ASMNopPause();
3586 }
3587# endif
3588}
3589#endif
3590
3591
3592/**
3593 * Atomically Or a signed 64-bit value, unordered.
3594 *
3595 * @param pi64 Pointer to the pointer variable to OR u64 with.
3596 * @param i64 The value to OR *pu64 with.
3597 *
3598 * @remarks x86: Requires a Pentium or later.
3599 */
3600DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3601{
3602 ASMAtomicUoOrU64((uint64_t volatile RT_FAR *)pi64, i64);
3603}
3604
3605
3606/**
3607 * Atomically And an unsigned 32-bit value, unordered.
3608 *
3609 * @param pu32 Pointer to the pointer variable to AND u32 with.
3610 * @param u32 The value to AND *pu32 with.
3611 *
3612 * @remarks x86: Requires a 386 or later.
3613 */
3614#if RT_INLINE_ASM_EXTERNAL
3615RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3616#else
3617DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3618{
3619# if RT_INLINE_ASM_GNU_STYLE
3620 __asm__ __volatile__("andl %1, %0\n\t"
3621 : "=m" (*pu32)
3622 : "ir" (u32),
3623 "m" (*pu32));
3624# else
3625 __asm
3626 {
3627 mov eax, [u32]
3628# ifdef RT_ARCH_AMD64
3629 mov rdx, [pu32]
3630 and [rdx], eax
3631# else
3632 mov edx, [pu32]
3633 and [edx], eax
3634# endif
3635 }
3636# endif
3637}
3638#endif
3639
3640
3641/**
3642 * Atomically And a signed 32-bit value, unordered.
3643 *
3644 * @param pi32 Pointer to the pointer variable to AND i32 with.
3645 * @param i32 The value to AND *pi32 with.
3646 *
3647 * @remarks x86: Requires a 386 or later.
3648 */
3649DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3650{
3651 ASMAtomicUoAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3652}
3653
3654
3655/**
3656 * Atomically And an unsigned 64-bit value, unordered.
3657 *
3658 * @param pu64 Pointer to the pointer variable to AND u64 with.
3659 * @param u64 The value to AND *pu64 with.
3660 *
3661 * @remarks x86: Requires a Pentium or later.
3662 */
3663#if RT_INLINE_ASM_EXTERNAL
3664DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3665#else
3666DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3667{
3668# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3669 __asm__ __volatile__("andq %1, %0\n\t"
3670 : "=m" (*pu64)
3671 : "r" (u64),
3672 "m" (*pu64));
3673# else
3674 for (;;)
3675 {
3676 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3677 uint64_t u64New = u64Old & u64;
3678 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3679 break;
3680 ASMNopPause();
3681 }
3682# endif
3683}
3684#endif
3685
3686
3687/**
3688 * Atomically And a signed 64-bit value, unordered.
3689 *
3690 * @param pi64 Pointer to the pointer variable to AND i64 with.
3691 * @param i64 The value to AND *pi64 with.
3692 *
3693 * @remarks x86: Requires a Pentium or later.
3694 */
3695DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3696{
3697 ASMAtomicUoAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3698}
3699
3700
3701/**
3702 * Atomically increment an unsigned 32-bit value, unordered.
3703 *
3704 * @returns the new value.
3705 * @param pu32 Pointer to the variable to increment.
3706 *
3707 * @remarks x86: Requires a 486 or later.
3708 */
3709#if RT_INLINE_ASM_EXTERNAL
3710RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32);
3711#else
3712DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32)
3713{
3714 uint32_t u32;
3715# if RT_INLINE_ASM_GNU_STYLE
3716 __asm__ __volatile__("xaddl %0, %1\n\t"
3717 : "=r" (u32),
3718 "=m" (*pu32)
3719 : "0" (1),
3720 "m" (*pu32)
3721 : "memory");
3722 return u32 + 1;
3723# else
3724 __asm
3725 {
3726 mov eax, 1
3727# ifdef RT_ARCH_AMD64
3728 mov rdx, [pu32]
3729 xadd [rdx], eax
3730# else
3731 mov edx, [pu32]
3732 xadd [edx], eax
3733# endif
3734 mov u32, eax
3735 }
3736 return u32 + 1;
3737# endif
3738}
3739#endif
3740
3741
3742/**
3743 * Atomically decrement an unsigned 32-bit value, unordered.
3744 *
3745 * @returns the new value.
3746 * @param pu32 Pointer to the variable to decrement.
3747 *
3748 * @remarks x86: Requires a 486 or later.
3749 */
3750#if RT_INLINE_ASM_EXTERNAL
3751RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32);
3752#else
3753DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32)
3754{
3755 uint32_t u32;
3756# if RT_INLINE_ASM_GNU_STYLE
3757 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3758 : "=r" (u32),
3759 "=m" (*pu32)
3760 : "0" (-1),
3761 "m" (*pu32)
3762 : "memory");
3763 return u32 - 1;
3764# else
3765 __asm
3766 {
3767 mov eax, -1
3768# ifdef RT_ARCH_AMD64
3769 mov rdx, [pu32]
3770 xadd [rdx], eax
3771# else
3772 mov edx, [pu32]
3773 xadd [edx], eax
3774# endif
3775 mov u32, eax
3776 }
3777 return u32 - 1;
3778# endif
3779}
3780#endif
3781
3782
3783/** @def RT_ASM_PAGE_SIZE
3784 * We try avoid dragging in iprt/param.h here.
3785 * @internal
3786 */
3787#if defined(RT_ARCH_SPARC64)
3788# define RT_ASM_PAGE_SIZE 0x2000
3789# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3790# if PAGE_SIZE != 0x2000
3791# error "PAGE_SIZE is not 0x2000!"
3792# endif
3793# endif
3794#else
3795# define RT_ASM_PAGE_SIZE 0x1000
3796# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3797# if PAGE_SIZE != 0x1000
3798# error "PAGE_SIZE is not 0x1000!"
3799# endif
3800# endif
3801#endif
3802
3803/**
3804 * Zeros a 4K memory page.
3805 *
3806 * @param pv Pointer to the memory block. This must be page aligned.
3807 */
3808#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3809RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZeroPage(volatile void RT_FAR *pv);
3810# else
3811DECLINLINE(void) ASMMemZeroPage(volatile void RT_FAR *pv)
3812{
3813# if RT_INLINE_ASM_USES_INTRIN
3814# ifdef RT_ARCH_AMD64
3815 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
3816# else
3817 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
3818# endif
3819
3820# elif RT_INLINE_ASM_GNU_STYLE
3821 RTCCUINTREG uDummy;
3822# ifdef RT_ARCH_AMD64
3823 __asm__ __volatile__("rep stosq"
3824 : "=D" (pv),
3825 "=c" (uDummy)
3826 : "0" (pv),
3827 "c" (RT_ASM_PAGE_SIZE >> 3),
3828 "a" (0)
3829 : "memory");
3830# else
3831 __asm__ __volatile__("rep stosl"
3832 : "=D" (pv),
3833 "=c" (uDummy)
3834 : "0" (pv),
3835 "c" (RT_ASM_PAGE_SIZE >> 2),
3836 "a" (0)
3837 : "memory");
3838# endif
3839# else
3840 __asm
3841 {
3842# ifdef RT_ARCH_AMD64
3843 xor rax, rax
3844 mov ecx, 0200h
3845 mov rdi, [pv]
3846 rep stosq
3847# else
3848 xor eax, eax
3849 mov ecx, 0400h
3850 mov edi, [pv]
3851 rep stosd
3852# endif
3853 }
3854# endif
3855}
3856# endif
3857
3858
3859/**
3860 * Zeros a memory block with a 32-bit aligned size.
3861 *
3862 * @param pv Pointer to the memory block.
3863 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3864 */
3865#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3866RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb);
3867#else
3868DECLINLINE(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb)
3869{
3870# if RT_INLINE_ASM_USES_INTRIN
3871# ifdef RT_ARCH_AMD64
3872 if (!(cb & 7))
3873 __stosq((unsigned __int64 RT_FAR *)pv, 0, cb / 8);
3874 else
3875# endif
3876 __stosd((unsigned long RT_FAR *)pv, 0, cb / 4);
3877
3878# elif RT_INLINE_ASM_GNU_STYLE
3879 __asm__ __volatile__("rep stosl"
3880 : "=D" (pv),
3881 "=c" (cb)
3882 : "0" (pv),
3883 "1" (cb >> 2),
3884 "a" (0)
3885 : "memory");
3886# else
3887 __asm
3888 {
3889 xor eax, eax
3890# ifdef RT_ARCH_AMD64
3891 mov rcx, [cb]
3892 shr rcx, 2
3893 mov rdi, [pv]
3894# else
3895 mov ecx, [cb]
3896 shr ecx, 2
3897 mov edi, [pv]
3898# endif
3899 rep stosd
3900 }
3901# endif
3902}
3903#endif
3904
3905
3906/**
3907 * Fills a memory block with a 32-bit aligned size.
3908 *
3909 * @param pv Pointer to the memory block.
3910 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3911 * @param u32 The value to fill with.
3912 */
3913#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3914RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32);
3915#else
3916DECLINLINE(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32)
3917{
3918# if RT_INLINE_ASM_USES_INTRIN
3919# ifdef RT_ARCH_AMD64
3920 if (!(cb & 7))
3921 __stosq((unsigned __int64 RT_FAR *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3922 else
3923# endif
3924 __stosd((unsigned long RT_FAR *)pv, u32, cb / 4);
3925
3926# elif RT_INLINE_ASM_GNU_STYLE
3927 __asm__ __volatile__("rep stosl"
3928 : "=D" (pv),
3929 "=c" (cb)
3930 : "0" (pv),
3931 "1" (cb >> 2),
3932 "a" (u32)
3933 : "memory");
3934# else
3935 __asm
3936 {
3937# ifdef RT_ARCH_AMD64
3938 mov rcx, [cb]
3939 shr rcx, 2
3940 mov rdi, [pv]
3941# else
3942 mov ecx, [cb]
3943 shr ecx, 2
3944 mov edi, [pv]
3945# endif
3946 mov eax, [u32]
3947 rep stosd
3948 }
3949# endif
3950}
3951#endif
3952
3953
3954/**
3955 * Checks if a memory block is all zeros.
3956 *
3957 * @returns Pointer to the first non-zero byte.
3958 * @returns NULL if all zero.
3959 *
3960 * @param pv Pointer to the memory block.
3961 * @param cb Number of bytes in the block.
3962 *
3963 * @todo Fix name, it is a predicate function but it's not returning boolean!
3964 */
3965#if !defined(RDESKTOP) && (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
3966 && !defined(RT_ARCH_SPARC64) \
3967 && !defined(RT_ARCH_SPARC)
3968DECLASM(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb);
3969#else
3970DECLINLINE(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb)
3971{
3972 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
3973 for (; cb; cb--, pb++)
3974 if (RT_LIKELY(*pb == 0))
3975 { /* likely */ }
3976 else
3977 return (void RT_FAR *)pb;
3978 return NULL;
3979}
3980#endif
3981
3982
3983/**
3984 * Checks if a memory block is all zeros.
3985 *
3986 * @returns true if zero, false if not.
3987 *
3988 * @param pv Pointer to the memory block.
3989 * @param cb Number of bytes in the block.
3990 *
3991 * @sa ASMMemFirstNonZero
3992 */
3993DECLINLINE(bool) ASMMemIsZero(void const RT_FAR *pv, size_t cb)
3994{
3995 return ASMMemFirstNonZero(pv, cb) == NULL;
3996}
3997
3998
3999/**
4000 * Checks if a memory page is all zeros.
4001 *
4002 * @returns true / false.
4003 *
4004 * @param pvPage Pointer to the page. Must be aligned on 16 byte
4005 * boundary
4006 */
4007DECLINLINE(bool) ASMMemIsZeroPage(void const RT_FAR *pvPage)
4008{
4009# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
4010 union { RTCCUINTREG r; bool f; } uAX;
4011 RTCCUINTREG xCX, xDI;
4012 Assert(!((uintptr_t)pvPage & 15));
4013 __asm__ __volatile__("repe; "
4014# ifdef RT_ARCH_AMD64
4015 "scasq\n\t"
4016# else
4017 "scasl\n\t"
4018# endif
4019 "setnc %%al\n\t"
4020 : "=&c" (xCX),
4021 "=&D" (xDI),
4022 "=&a" (uAX.r)
4023 : "mr" (pvPage),
4024# ifdef RT_ARCH_AMD64
4025 "0" (RT_ASM_PAGE_SIZE/8),
4026# else
4027 "0" (RT_ASM_PAGE_SIZE/4),
4028# endif
4029 "1" (pvPage),
4030 "2" (0));
4031 return uAX.f;
4032# else
4033 uintptr_t const RT_FAR *puPtr = (uintptr_t const RT_FAR *)pvPage;
4034 size_t cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
4035 Assert(!((uintptr_t)pvPage & 15));
4036 for (;;)
4037 {
4038 if (puPtr[0]) return false;
4039 if (puPtr[4]) return false;
4040
4041 if (puPtr[2]) return false;
4042 if (puPtr[6]) return false;
4043
4044 if (puPtr[1]) return false;
4045 if (puPtr[5]) return false;
4046
4047 if (puPtr[3]) return false;
4048 if (puPtr[7]) return false;
4049
4050 if (!--cLeft)
4051 return true;
4052 puPtr += 8;
4053 }
4054# endif
4055}
4056
4057
4058/**
4059 * Checks if a memory block is filled with the specified byte, returning the
4060 * first mismatch.
4061 *
4062 * This is sort of an inverted memchr.
4063 *
4064 * @returns Pointer to the byte which doesn't equal u8.
4065 * @returns NULL if all equal to u8.
4066 *
4067 * @param pv Pointer to the memory block.
4068 * @param cb Number of bytes in the block.
4069 * @param u8 The value it's supposed to be filled with.
4070 *
4071 * @remarks No alignment requirements.
4072 */
4073#if (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
4074 && (!defined(RT_OS_FREEBSD) || !defined(_KERNEL)) \
4075 && !defined(RT_ARCH_SPARC64) \
4076 && !defined(RT_ARCH_SPARC)
4077DECLASM(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8);
4078#else
4079DECLINLINE(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8)
4080{
4081 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
4082 for (; cb; cb--, pb++)
4083 if (RT_LIKELY(*pb == u8))
4084 { /* likely */ }
4085 else
4086 return (void *)pb;
4087 return NULL;
4088}
4089#endif
4090
4091
4092/**
4093 * Checks if a memory block is filled with the specified byte.
4094 *
4095 * @returns true if all matching, false if not.
4096 *
4097 * @param pv Pointer to the memory block.
4098 * @param cb Number of bytes in the block.
4099 * @param u8 The value it's supposed to be filled with.
4100 *
4101 * @remarks No alignment requirements.
4102 */
4103DECLINLINE(bool) ASMMemIsAllU8(void const RT_FAR *pv, size_t cb, uint8_t u8)
4104{
4105 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
4106}
4107
4108
4109/**
4110 * Checks if a memory block is filled with the specified 32-bit value.
4111 *
4112 * This is a sort of inverted memchr.
4113 *
4114 * @returns Pointer to the first value which doesn't equal u32.
4115 * @returns NULL if all equal to u32.
4116 *
4117 * @param pv Pointer to the memory block.
4118 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4119 * @param u32 The value it's supposed to be filled with.
4120 */
4121DECLINLINE(uint32_t RT_FAR *) ASMMemFirstMismatchingU32(void const RT_FAR *pv, size_t cb, uint32_t u32)
4122{
4123/** @todo rewrite this in inline assembly? */
4124 uint32_t const RT_FAR *pu32 = (uint32_t const RT_FAR *)pv;
4125 for (; cb; cb -= 4, pu32++)
4126 if (RT_LIKELY(*pu32 == u32))
4127 { /* likely */ }
4128 else
4129 return (uint32_t RT_FAR *)pu32;
4130 return NULL;
4131}
4132
4133
4134/**
4135 * Probes a byte pointer for read access.
4136 *
4137 * While the function will not fault if the byte is not read accessible,
4138 * the idea is to do this in a safe place like before acquiring locks
4139 * and such like.
4140 *
4141 * Also, this functions guarantees that an eager compiler is not going
4142 * to optimize the probing away.
4143 *
4144 * @param pvByte Pointer to the byte.
4145 */
4146#if RT_INLINE_ASM_EXTERNAL
4147RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte);
4148#else
4149DECLINLINE(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte)
4150{
4151 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4152 uint8_t u8;
4153# if RT_INLINE_ASM_GNU_STYLE
4154 __asm__ __volatile__("movb (%1), %0\n\t"
4155 : "=r" (u8)
4156 : "r" (pvByte));
4157# else
4158 __asm
4159 {
4160# ifdef RT_ARCH_AMD64
4161 mov rax, [pvByte]
4162 mov al, [rax]
4163# else
4164 mov eax, [pvByte]
4165 mov al, [eax]
4166# endif
4167 mov [u8], al
4168 }
4169# endif
4170 return u8;
4171}
4172#endif
4173
4174/**
4175 * Probes a buffer for read access page by page.
4176 *
4177 * While the function will fault if the buffer is not fully read
4178 * accessible, the idea is to do this in a safe place like before
4179 * acquiring locks and such like.
4180 *
4181 * Also, this functions guarantees that an eager compiler is not going
4182 * to optimize the probing away.
4183 *
4184 * @param pvBuf Pointer to the buffer.
4185 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4186 */
4187DECLINLINE(void) ASMProbeReadBuffer(const void RT_FAR *pvBuf, size_t cbBuf)
4188{
4189 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4190 /* the first byte */
4191 const uint8_t RT_FAR *pu8 = (const uint8_t RT_FAR *)pvBuf;
4192 ASMProbeReadByte(pu8);
4193
4194 /* the pages in between pages. */
4195 while (cbBuf > RT_ASM_PAGE_SIZE)
4196 {
4197 ASMProbeReadByte(pu8);
4198 cbBuf -= RT_ASM_PAGE_SIZE;
4199 pu8 += RT_ASM_PAGE_SIZE;
4200 }
4201
4202 /* the last byte */
4203 ASMProbeReadByte(pu8 + cbBuf - 1);
4204}
4205
4206
4207
4208/** @defgroup grp_inline_bits Bit Operations
4209 * @{
4210 */
4211
4212
4213/**
4214 * Sets a bit in a bitmap.
4215 *
4216 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
4217 * @param iBit The bit to set.
4218 *
4219 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4220 * However, doing so will yield better performance as well as avoiding
4221 * traps accessing the last bits in the bitmap.
4222 */
4223#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4224RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4225#else
4226DECLINLINE(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4227{
4228# if RT_INLINE_ASM_USES_INTRIN
4229 _bittestandset((long RT_FAR *)pvBitmap, iBit);
4230
4231# elif RT_INLINE_ASM_GNU_STYLE
4232 __asm__ __volatile__("btsl %1, %0"
4233 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4234 : "Ir" (iBit),
4235 "m" (*(volatile long RT_FAR *)pvBitmap)
4236 : "memory");
4237# else
4238 __asm
4239 {
4240# ifdef RT_ARCH_AMD64
4241 mov rax, [pvBitmap]
4242 mov edx, [iBit]
4243 bts [rax], edx
4244# else
4245 mov eax, [pvBitmap]
4246 mov edx, [iBit]
4247 bts [eax], edx
4248# endif
4249 }
4250# endif
4251}
4252#endif
4253
4254
4255/**
4256 * Atomically sets a bit in a bitmap, ordered.
4257 *
4258 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4259 * the memory access isn't atomic!
4260 * @param iBit The bit to set.
4261 *
4262 * @remarks x86: Requires a 386 or later.
4263 */
4264#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4265RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4266#else
4267DECLINLINE(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4268{
4269 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4270# if RT_INLINE_ASM_USES_INTRIN
4271 _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
4272# elif RT_INLINE_ASM_GNU_STYLE
4273 __asm__ __volatile__("lock; btsl %1, %0"
4274 : "=m" (*(volatile long *)pvBitmap)
4275 : "Ir" (iBit),
4276 "m" (*(volatile long *)pvBitmap)
4277 : "memory");
4278# else
4279 __asm
4280 {
4281# ifdef RT_ARCH_AMD64
4282 mov rax, [pvBitmap]
4283 mov edx, [iBit]
4284 lock bts [rax], edx
4285# else
4286 mov eax, [pvBitmap]
4287 mov edx, [iBit]
4288 lock bts [eax], edx
4289# endif
4290 }
4291# endif
4292}
4293#endif
4294
4295
4296/**
4297 * Clears a bit in a bitmap.
4298 *
4299 * @param pvBitmap Pointer to the bitmap.
4300 * @param iBit The bit to clear.
4301 *
4302 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4303 * However, doing so will yield better performance as well as avoiding
4304 * traps accessing the last bits in the bitmap.
4305 */
4306#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4307RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4308#else
4309DECLINLINE(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4310{
4311# if RT_INLINE_ASM_USES_INTRIN
4312 _bittestandreset((long RT_FAR *)pvBitmap, iBit);
4313
4314# elif RT_INLINE_ASM_GNU_STYLE
4315 __asm__ __volatile__("btrl %1, %0"
4316 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4317 : "Ir" (iBit),
4318 "m" (*(volatile long RT_FAR *)pvBitmap)
4319 : "memory");
4320# else
4321 __asm
4322 {
4323# ifdef RT_ARCH_AMD64
4324 mov rax, [pvBitmap]
4325 mov edx, [iBit]
4326 btr [rax], edx
4327# else
4328 mov eax, [pvBitmap]
4329 mov edx, [iBit]
4330 btr [eax], edx
4331# endif
4332 }
4333# endif
4334}
4335#endif
4336
4337
4338/**
4339 * Atomically clears a bit in a bitmap, ordered.
4340 *
4341 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4342 * the memory access isn't atomic!
4343 * @param iBit The bit to toggle set.
4344 *
4345 * @remarks No memory barrier, take care on smp.
4346 * @remarks x86: Requires a 386 or later.
4347 */
4348#if RT_INLINE_ASM_EXTERNAL
4349RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4350#else
4351DECLINLINE(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4352{
4353 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4354# if RT_INLINE_ASM_GNU_STYLE
4355 __asm__ __volatile__("lock; btrl %1, %0"
4356 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4357 : "Ir" (iBit),
4358 "m" (*(volatile long RT_FAR *)pvBitmap)
4359 : "memory");
4360# else
4361 __asm
4362 {
4363# ifdef RT_ARCH_AMD64
4364 mov rax, [pvBitmap]
4365 mov edx, [iBit]
4366 lock btr [rax], edx
4367# else
4368 mov eax, [pvBitmap]
4369 mov edx, [iBit]
4370 lock btr [eax], edx
4371# endif
4372 }
4373# endif
4374}
4375#endif
4376
4377
4378/**
4379 * Toggles a bit in a bitmap.
4380 *
4381 * @param pvBitmap Pointer to the bitmap.
4382 * @param iBit The bit to toggle.
4383 *
4384 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4385 * However, doing so will yield better performance as well as avoiding
4386 * traps accessing the last bits in the bitmap.
4387 */
4388#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4389RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4390#else
4391DECLINLINE(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4392{
4393# if RT_INLINE_ASM_USES_INTRIN
4394 _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
4395# elif RT_INLINE_ASM_GNU_STYLE
4396 __asm__ __volatile__("btcl %1, %0"
4397 : "=m" (*(volatile long *)pvBitmap)
4398 : "Ir" (iBit),
4399 "m" (*(volatile long *)pvBitmap)
4400 : "memory");
4401# else
4402 __asm
4403 {
4404# ifdef RT_ARCH_AMD64
4405 mov rax, [pvBitmap]
4406 mov edx, [iBit]
4407 btc [rax], edx
4408# else
4409 mov eax, [pvBitmap]
4410 mov edx, [iBit]
4411 btc [eax], edx
4412# endif
4413 }
4414# endif
4415}
4416#endif
4417
4418
4419/**
4420 * Atomically toggles a bit in a bitmap, ordered.
4421 *
4422 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4423 * the memory access isn't atomic!
4424 * @param iBit The bit to test and set.
4425 *
4426 * @remarks x86: Requires a 386 or later.
4427 */
4428#if RT_INLINE_ASM_EXTERNAL
4429RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4430#else
4431DECLINLINE(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4432{
4433 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4434# if RT_INLINE_ASM_GNU_STYLE
4435 __asm__ __volatile__("lock; btcl %1, %0"
4436 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4437 : "Ir" (iBit),
4438 "m" (*(volatile long RT_FAR *)pvBitmap)
4439 : "memory");
4440# else
4441 __asm
4442 {
4443# ifdef RT_ARCH_AMD64
4444 mov rax, [pvBitmap]
4445 mov edx, [iBit]
4446 lock btc [rax], edx
4447# else
4448 mov eax, [pvBitmap]
4449 mov edx, [iBit]
4450 lock btc [eax], edx
4451# endif
4452 }
4453# endif
4454}
4455#endif
4456
4457
4458/**
4459 * Tests and sets a bit in a bitmap.
4460 *
4461 * @returns true if the bit was set.
4462 * @returns false if the bit was clear.
4463 *
4464 * @param pvBitmap Pointer to the bitmap.
4465 * @param iBit The bit to test and set.
4466 *
4467 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4468 * However, doing so will yield better performance as well as avoiding
4469 * traps accessing the last bits in the bitmap.
4470 */
4471#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4472RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4473#else
4474DECLINLINE(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4475{
4476 union { bool f; uint32_t u32; uint8_t u8; } rc;
4477# if RT_INLINE_ASM_USES_INTRIN
4478 rc.u8 = _bittestandset((long RT_FAR *)pvBitmap, iBit);
4479
4480# elif RT_INLINE_ASM_GNU_STYLE
4481 __asm__ __volatile__("btsl %2, %1\n\t"
4482 "setc %b0\n\t"
4483 "andl $1, %0\n\t"
4484 : "=q" (rc.u32),
4485 "=m" (*(volatile long RT_FAR *)pvBitmap)
4486 : "Ir" (iBit),
4487 "m" (*(volatile long RT_FAR *)pvBitmap)
4488 : "memory");
4489# else
4490 __asm
4491 {
4492 mov edx, [iBit]
4493# ifdef RT_ARCH_AMD64
4494 mov rax, [pvBitmap]
4495 bts [rax], edx
4496# else
4497 mov eax, [pvBitmap]
4498 bts [eax], edx
4499# endif
4500 setc al
4501 and eax, 1
4502 mov [rc.u32], eax
4503 }
4504# endif
4505 return rc.f;
4506}
4507#endif
4508
4509
4510/**
4511 * Atomically tests and sets a bit in a bitmap, ordered.
4512 *
4513 * @returns true if the bit was set.
4514 * @returns false if the bit was clear.
4515 *
4516 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4517 * the memory access isn't atomic!
4518 * @param iBit The bit to set.
4519 *
4520 * @remarks x86: Requires a 386 or later.
4521 */
4522#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4523RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4524#else
4525DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4526{
4527 union { bool f; uint32_t u32; uint8_t u8; } rc;
4528 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4529# if RT_INLINE_ASM_USES_INTRIN
4530 rc.u8 = _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
4531# elif RT_INLINE_ASM_GNU_STYLE
4532 __asm__ __volatile__("lock; btsl %2, %1\n\t"
4533 "setc %b0\n\t"
4534 "andl $1, %0\n\t"
4535 : "=q" (rc.u32),
4536 "=m" (*(volatile long RT_FAR *)pvBitmap)
4537 : "Ir" (iBit),
4538 "m" (*(volatile long RT_FAR *)pvBitmap)
4539 : "memory");
4540# else
4541 __asm
4542 {
4543 mov edx, [iBit]
4544# ifdef RT_ARCH_AMD64
4545 mov rax, [pvBitmap]
4546 lock bts [rax], edx
4547# else
4548 mov eax, [pvBitmap]
4549 lock bts [eax], edx
4550# endif
4551 setc al
4552 and eax, 1
4553 mov [rc.u32], eax
4554 }
4555# endif
4556 return rc.f;
4557}
4558#endif
4559
4560
4561/**
4562 * Tests and clears a bit in a bitmap.
4563 *
4564 * @returns true if the bit was set.
4565 * @returns false if the bit was clear.
4566 *
4567 * @param pvBitmap Pointer to the bitmap.
4568 * @param iBit The bit to test and clear.
4569 *
4570 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4571 * However, doing so will yield better performance as well as avoiding
4572 * traps accessing the last bits in the bitmap.
4573 */
4574#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4575RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4576#else
4577DECLINLINE(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4578{
4579 union { bool f; uint32_t u32; uint8_t u8; } rc;
4580# if RT_INLINE_ASM_USES_INTRIN
4581 rc.u8 = _bittestandreset((long RT_FAR *)pvBitmap, iBit);
4582
4583# elif RT_INLINE_ASM_GNU_STYLE
4584 __asm__ __volatile__("btrl %2, %1\n\t"
4585 "setc %b0\n\t"
4586 "andl $1, %0\n\t"
4587 : "=q" (rc.u32),
4588 "=m" (*(volatile long RT_FAR *)pvBitmap)
4589 : "Ir" (iBit),
4590 "m" (*(volatile long RT_FAR *)pvBitmap)
4591 : "memory");
4592# else
4593 __asm
4594 {
4595 mov edx, [iBit]
4596# ifdef RT_ARCH_AMD64
4597 mov rax, [pvBitmap]
4598 btr [rax], edx
4599# else
4600 mov eax, [pvBitmap]
4601 btr [eax], edx
4602# endif
4603 setc al
4604 and eax, 1
4605 mov [rc.u32], eax
4606 }
4607# endif
4608 return rc.f;
4609}
4610#endif
4611
4612
4613/**
4614 * Atomically tests and clears a bit in a bitmap, ordered.
4615 *
4616 * @returns true if the bit was set.
4617 * @returns false if the bit was clear.
4618 *
4619 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4620 * the memory access isn't atomic!
4621 * @param iBit The bit to test and clear.
4622 *
4623 * @remarks No memory barrier, take care on smp.
4624 * @remarks x86: Requires a 386 or later.
4625 */
4626#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4627RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4628#else
4629DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4630{
4631 union { bool f; uint32_t u32; uint8_t u8; } rc;
4632 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4633# if RT_INLINE_ASM_USES_INTRIN
4634 rc.u8 = _interlockedbittestandreset((long RT_FAR *)pvBitmap, iBit);
4635
4636# elif RT_INLINE_ASM_GNU_STYLE
4637 __asm__ __volatile__("lock; btrl %2, %1\n\t"
4638 "setc %b0\n\t"
4639 "andl $1, %0\n\t"
4640 : "=q" (rc.u32),
4641 "=m" (*(volatile long RT_FAR *)pvBitmap)
4642 : "Ir" (iBit),
4643 "m" (*(volatile long RT_FAR *)pvBitmap)
4644 : "memory");
4645# else
4646 __asm
4647 {
4648 mov edx, [iBit]
4649# ifdef RT_ARCH_AMD64
4650 mov rax, [pvBitmap]
4651 lock btr [rax], edx
4652# else
4653 mov eax, [pvBitmap]
4654 lock btr [eax], edx
4655# endif
4656 setc al
4657 and eax, 1
4658 mov [rc.u32], eax
4659 }
4660# endif
4661 return rc.f;
4662}
4663#endif
4664
4665
4666/**
4667 * Tests and toggles a bit in a bitmap.
4668 *
4669 * @returns true if the bit was set.
4670 * @returns false if the bit was clear.
4671 *
4672 * @param pvBitmap Pointer to the bitmap.
4673 * @param iBit The bit to test and toggle.
4674 *
4675 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4676 * However, doing so will yield better performance as well as avoiding
4677 * traps accessing the last bits in the bitmap.
4678 */
4679#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4680RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4681#else
4682DECLINLINE(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4683{
4684 union { bool f; uint32_t u32; uint8_t u8; } rc;
4685# if RT_INLINE_ASM_USES_INTRIN
4686 rc.u8 = _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
4687
4688# elif RT_INLINE_ASM_GNU_STYLE
4689 __asm__ __volatile__("btcl %2, %1\n\t"
4690 "setc %b0\n\t"
4691 "andl $1, %0\n\t"
4692 : "=q" (rc.u32),
4693 "=m" (*(volatile long RT_FAR *)pvBitmap)
4694 : "Ir" (iBit),
4695 "m" (*(volatile long RT_FAR *)pvBitmap)
4696 : "memory");
4697# else
4698 __asm
4699 {
4700 mov edx, [iBit]
4701# ifdef RT_ARCH_AMD64
4702 mov rax, [pvBitmap]
4703 btc [rax], edx
4704# else
4705 mov eax, [pvBitmap]
4706 btc [eax], edx
4707# endif
4708 setc al
4709 and eax, 1
4710 mov [rc.u32], eax
4711 }
4712# endif
4713 return rc.f;
4714}
4715#endif
4716
4717
4718/**
4719 * Atomically tests and toggles a bit in a bitmap, ordered.
4720 *
4721 * @returns true if the bit was set.
4722 * @returns false if the bit was clear.
4723 *
4724 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4725 * the memory access isn't atomic!
4726 * @param iBit The bit to test and toggle.
4727 *
4728 * @remarks x86: Requires a 386 or later.
4729 */
4730#if RT_INLINE_ASM_EXTERNAL
4731RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4732#else
4733DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4734{
4735 union { bool f; uint32_t u32; uint8_t u8; } rc;
4736 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4737# if RT_INLINE_ASM_GNU_STYLE
4738 __asm__ __volatile__("lock; btcl %2, %1\n\t"
4739 "setc %b0\n\t"
4740 "andl $1, %0\n\t"
4741 : "=q" (rc.u32),
4742 "=m" (*(volatile long RT_FAR *)pvBitmap)
4743 : "Ir" (iBit),
4744 "m" (*(volatile long RT_FAR *)pvBitmap)
4745 : "memory");
4746# else
4747 __asm
4748 {
4749 mov edx, [iBit]
4750# ifdef RT_ARCH_AMD64
4751 mov rax, [pvBitmap]
4752 lock btc [rax], edx
4753# else
4754 mov eax, [pvBitmap]
4755 lock btc [eax], edx
4756# endif
4757 setc al
4758 and eax, 1
4759 mov [rc.u32], eax
4760 }
4761# endif
4762 return rc.f;
4763}
4764#endif
4765
4766
4767/**
4768 * Tests if a bit in a bitmap is set.
4769 *
4770 * @returns true if the bit is set.
4771 * @returns false if the bit is clear.
4772 *
4773 * @param pvBitmap Pointer to the bitmap.
4774 * @param iBit The bit to test.
4775 *
4776 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4777 * However, doing so will yield better performance as well as avoiding
4778 * traps accessing the last bits in the bitmap.
4779 */
4780#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4781RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit);
4782#else
4783DECLINLINE(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit)
4784{
4785 union { bool f; uint32_t u32; uint8_t u8; } rc;
4786# if RT_INLINE_ASM_USES_INTRIN
4787 rc.u32 = _bittest((long *)pvBitmap, iBit);
4788# elif RT_INLINE_ASM_GNU_STYLE
4789
4790 __asm__ __volatile__("btl %2, %1\n\t"
4791 "setc %b0\n\t"
4792 "andl $1, %0\n\t"
4793 : "=q" (rc.u32)
4794 : "m" (*(const volatile long RT_FAR *)pvBitmap),
4795 "Ir" (iBit)
4796 : "memory");
4797# else
4798 __asm
4799 {
4800 mov edx, [iBit]
4801# ifdef RT_ARCH_AMD64
4802 mov rax, [pvBitmap]
4803 bt [rax], edx
4804# else
4805 mov eax, [pvBitmap]
4806 bt [eax], edx
4807# endif
4808 setc al
4809 and eax, 1
4810 mov [rc.u32], eax
4811 }
4812# endif
4813 return rc.f;
4814}
4815#endif
4816
4817
4818/**
4819 * Clears a bit range within a bitmap.
4820 *
4821 * @param pvBitmap Pointer to the bitmap.
4822 * @param iBitStart The First bit to clear.
4823 * @param iBitEnd The first bit not to clear.
4824 */
4825DECLINLINE(void) ASMBitClearRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4826{
4827 if (iBitStart < iBitEnd)
4828 {
4829 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
4830 int32_t iStart = iBitStart & ~31;
4831 int32_t iEnd = iBitEnd & ~31;
4832 if (iStart == iEnd)
4833 *pu32 &= ((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4834 else
4835 {
4836 /* bits in first dword. */
4837 if (iBitStart & 31)
4838 {
4839 *pu32 &= (UINT32_C(1) << (iBitStart & 31)) - 1;
4840 pu32++;
4841 iBitStart = iStart + 32;
4842 }
4843
4844 /* whole dword. */
4845 if (iBitStart != iEnd)
4846 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4847
4848 /* bits in last dword. */
4849 if (iBitEnd & 31)
4850 {
4851 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4852 *pu32 &= ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4853 }
4854 }
4855 }
4856}
4857
4858
4859/**
4860 * Sets a bit range within a bitmap.
4861 *
4862 * @param pvBitmap Pointer to the bitmap.
4863 * @param iBitStart The First bit to set.
4864 * @param iBitEnd The first bit not to set.
4865 */
4866DECLINLINE(void) ASMBitSetRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4867{
4868 if (iBitStart < iBitEnd)
4869 {
4870 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
4871 int32_t iStart = iBitStart & ~31;
4872 int32_t iEnd = iBitEnd & ~31;
4873 if (iStart == iEnd)
4874 *pu32 |= ((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31);
4875 else
4876 {
4877 /* bits in first dword. */
4878 if (iBitStart & 31)
4879 {
4880 *pu32 |= ~((UINT32_C(1) << (iBitStart & 31)) - 1);
4881 pu32++;
4882 iBitStart = iStart + 32;
4883 }
4884
4885 /* whole dword. */
4886 if (iBitStart != iEnd)
4887 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
4888
4889 /* bits in last dword. */
4890 if (iBitEnd & 31)
4891 {
4892 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
4893 *pu32 |= (UINT32_C(1) << (iBitEnd & 31)) - 1;
4894 }
4895 }
4896 }
4897}
4898
4899
4900/**
4901 * Finds the first clear bit in a bitmap.
4902 *
4903 * @returns Index of the first zero bit.
4904 * @returns -1 if no clear bit was found.
4905 * @param pvBitmap Pointer to the bitmap.
4906 * @param cBits The number of bits in the bitmap. Multiple of 32.
4907 */
4908#if RT_INLINE_ASM_EXTERNAL
4909DECLASM(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits);
4910#else
4911DECLINLINE(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits)
4912{
4913 if (cBits)
4914 {
4915 int32_t iBit;
4916# if RT_INLINE_ASM_GNU_STYLE
4917 RTCCUINTREG uEAX, uECX, uEDI;
4918 cBits = RT_ALIGN_32(cBits, 32);
4919 __asm__ __volatile__("repe; scasl\n\t"
4920 "je 1f\n\t"
4921# ifdef RT_ARCH_AMD64
4922 "lea -4(%%rdi), %%rdi\n\t"
4923 "xorl (%%rdi), %%eax\n\t"
4924 "subq %5, %%rdi\n\t"
4925# else
4926 "lea -4(%%edi), %%edi\n\t"
4927 "xorl (%%edi), %%eax\n\t"
4928 "subl %5, %%edi\n\t"
4929# endif
4930 "shll $3, %%edi\n\t"
4931 "bsfl %%eax, %%edx\n\t"
4932 "addl %%edi, %%edx\n\t"
4933 "1:\t\n"
4934 : "=d" (iBit),
4935 "=&c" (uECX),
4936 "=&D" (uEDI),
4937 "=&a" (uEAX)
4938 : "0" (0xffffffff),
4939 "mr" (pvBitmap),
4940 "1" (cBits >> 5),
4941 "2" (pvBitmap),
4942 "3" (0xffffffff));
4943# else
4944 cBits = RT_ALIGN_32(cBits, 32);
4945 __asm
4946 {
4947# ifdef RT_ARCH_AMD64
4948 mov rdi, [pvBitmap]
4949 mov rbx, rdi
4950# else
4951 mov edi, [pvBitmap]
4952 mov ebx, edi
4953# endif
4954 mov edx, 0ffffffffh
4955 mov eax, edx
4956 mov ecx, [cBits]
4957 shr ecx, 5
4958 repe scasd
4959 je done
4960
4961# ifdef RT_ARCH_AMD64
4962 lea rdi, [rdi - 4]
4963 xor eax, [rdi]
4964 sub rdi, rbx
4965# else
4966 lea edi, [edi - 4]
4967 xor eax, [edi]
4968 sub edi, ebx
4969# endif
4970 shl edi, 3
4971 bsf edx, eax
4972 add edx, edi
4973 done:
4974 mov [iBit], edx
4975 }
4976# endif
4977 return iBit;
4978 }
4979 return -1;
4980}
4981#endif
4982
4983
4984/**
4985 * Finds the next clear bit in a bitmap.
4986 *
4987 * @returns Index of the first zero bit.
4988 * @returns -1 if no clear bit was found.
4989 * @param pvBitmap Pointer to the bitmap.
4990 * @param cBits The number of bits in the bitmap. Multiple of 32.
4991 * @param iBitPrev The bit returned from the last search.
4992 * The search will start at iBitPrev + 1.
4993 */
4994#if RT_INLINE_ASM_EXTERNAL
4995DECLASM(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4996#else
4997DECLINLINE(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4998{
4999 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
5000 int iBit = ++iBitPrev & 31;
5001 if (iBit)
5002 {
5003 /*
5004 * Inspect the 32-bit word containing the unaligned bit.
5005 */
5006 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
5007
5008# if RT_INLINE_ASM_USES_INTRIN
5009 unsigned long ulBit = 0;
5010 if (_BitScanForward(&ulBit, u32))
5011 return ulBit + iBitPrev;
5012# else
5013# if RT_INLINE_ASM_GNU_STYLE
5014 __asm__ __volatile__("bsf %1, %0\n\t"
5015 "jnz 1f\n\t"
5016 "movl $-1, %0\n\t"
5017 "1:\n\t"
5018 : "=r" (iBit)
5019 : "r" (u32));
5020# else
5021 __asm
5022 {
5023 mov edx, [u32]
5024 bsf eax, edx
5025 jnz done
5026 mov eax, 0ffffffffh
5027 done:
5028 mov [iBit], eax
5029 }
5030# endif
5031 if (iBit >= 0)
5032 return iBit + iBitPrev;
5033# endif
5034
5035 /*
5036 * Skip ahead and see if there is anything left to search.
5037 */
5038 iBitPrev |= 31;
5039 iBitPrev++;
5040 if (cBits <= (uint32_t)iBitPrev)
5041 return -1;
5042 }
5043
5044 /*
5045 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
5046 */
5047 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
5048 if (iBit >= 0)
5049 iBit += iBitPrev;
5050 return iBit;
5051}
5052#endif
5053
5054
5055/**
5056 * Finds the first set bit in a bitmap.
5057 *
5058 * @returns Index of the first set bit.
5059 * @returns -1 if no clear bit was found.
5060 * @param pvBitmap Pointer to the bitmap.
5061 * @param cBits The number of bits in the bitmap. Multiple of 32.
5062 */
5063#if RT_INLINE_ASM_EXTERNAL
5064DECLASM(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits);
5065#else
5066DECLINLINE(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits)
5067{
5068 if (cBits)
5069 {
5070 int32_t iBit;
5071# if RT_INLINE_ASM_GNU_STYLE
5072 RTCCUINTREG uEAX, uECX, uEDI;
5073 cBits = RT_ALIGN_32(cBits, 32);
5074 __asm__ __volatile__("repe; scasl\n\t"
5075 "je 1f\n\t"
5076# ifdef RT_ARCH_AMD64
5077 "lea -4(%%rdi), %%rdi\n\t"
5078 "movl (%%rdi), %%eax\n\t"
5079 "subq %5, %%rdi\n\t"
5080# else
5081 "lea -4(%%edi), %%edi\n\t"
5082 "movl (%%edi), %%eax\n\t"
5083 "subl %5, %%edi\n\t"
5084# endif
5085 "shll $3, %%edi\n\t"
5086 "bsfl %%eax, %%edx\n\t"
5087 "addl %%edi, %%edx\n\t"
5088 "1:\t\n"
5089 : "=d" (iBit),
5090 "=&c" (uECX),
5091 "=&D" (uEDI),
5092 "=&a" (uEAX)
5093 : "0" (0xffffffff),
5094 "mr" (pvBitmap),
5095 "1" (cBits >> 5),
5096 "2" (pvBitmap),
5097 "3" (0));
5098# else
5099 cBits = RT_ALIGN_32(cBits, 32);
5100 __asm
5101 {
5102# ifdef RT_ARCH_AMD64
5103 mov rdi, [pvBitmap]
5104 mov rbx, rdi
5105# else
5106 mov edi, [pvBitmap]
5107 mov ebx, edi
5108# endif
5109 mov edx, 0ffffffffh
5110 xor eax, eax
5111 mov ecx, [cBits]
5112 shr ecx, 5
5113 repe scasd
5114 je done
5115# ifdef RT_ARCH_AMD64
5116 lea rdi, [rdi - 4]
5117 mov eax, [rdi]
5118 sub rdi, rbx
5119# else
5120 lea edi, [edi - 4]
5121 mov eax, [edi]
5122 sub edi, ebx
5123# endif
5124 shl edi, 3
5125 bsf edx, eax
5126 add edx, edi
5127 done:
5128 mov [iBit], edx
5129 }
5130# endif
5131 return iBit;
5132 }
5133 return -1;
5134}
5135#endif
5136
5137
5138/**
5139 * Finds the next set bit in a bitmap.
5140 *
5141 * @returns Index of the next set bit.
5142 * @returns -1 if no set bit was found.
5143 * @param pvBitmap Pointer to the bitmap.
5144 * @param cBits The number of bits in the bitmap. Multiple of 32.
5145 * @param iBitPrev The bit returned from the last search.
5146 * The search will start at iBitPrev + 1.
5147 */
5148#if RT_INLINE_ASM_EXTERNAL
5149DECLASM(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5150#else
5151DECLINLINE(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5152{
5153 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
5154 int iBit = ++iBitPrev & 31;
5155 if (iBit)
5156 {
5157 /*
5158 * Inspect the 32-bit word containing the unaligned bit.
5159 */
5160 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
5161
5162# if RT_INLINE_ASM_USES_INTRIN
5163 unsigned long ulBit = 0;
5164 if (_BitScanForward(&ulBit, u32))
5165 return ulBit + iBitPrev;
5166# else
5167# if RT_INLINE_ASM_GNU_STYLE
5168 __asm__ __volatile__("bsf %1, %0\n\t"
5169 "jnz 1f\n\t"
5170 "movl $-1, %0\n\t"
5171 "1:\n\t"
5172 : "=r" (iBit)
5173 : "r" (u32));
5174# else
5175 __asm
5176 {
5177 mov edx, [u32]
5178 bsf eax, edx
5179 jnz done
5180 mov eax, 0ffffffffh
5181 done:
5182 mov [iBit], eax
5183 }
5184# endif
5185 if (iBit >= 0)
5186 return iBit + iBitPrev;
5187# endif
5188
5189 /*
5190 * Skip ahead and see if there is anything left to search.
5191 */
5192 iBitPrev |= 31;
5193 iBitPrev++;
5194 if (cBits <= (uint32_t)iBitPrev)
5195 return -1;
5196 }
5197
5198 /*
5199 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
5200 */
5201 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
5202 if (iBit >= 0)
5203 iBit += iBitPrev;
5204 return iBit;
5205}
5206#endif
5207
5208
5209/**
5210 * Finds the first bit which is set in the given 32-bit integer.
5211 * Bits are numbered from 1 (least significant) to 32.
5212 *
5213 * @returns index [1..32] of the first set bit.
5214 * @returns 0 if all bits are cleared.
5215 * @param u32 Integer to search for set bits.
5216 * @remarks Similar to ffs() in BSD.
5217 */
5218#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5219RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU32(uint32_t u32);
5220#else
5221DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5222{
5223# if RT_INLINE_ASM_USES_INTRIN
5224 unsigned long iBit;
5225 if (_BitScanForward(&iBit, u32))
5226 iBit++;
5227 else
5228 iBit = 0;
5229# elif RT_INLINE_ASM_GNU_STYLE
5230 uint32_t iBit;
5231 __asm__ __volatile__("bsf %1, %0\n\t"
5232 "jnz 1f\n\t"
5233 "xorl %0, %0\n\t"
5234 "jmp 2f\n"
5235 "1:\n\t"
5236 "incl %0\n"
5237 "2:\n\t"
5238 : "=r" (iBit)
5239 : "rm" (u32));
5240# else
5241 uint32_t iBit;
5242 _asm
5243 {
5244 bsf eax, [u32]
5245 jnz found
5246 xor eax, eax
5247 jmp done
5248 found:
5249 inc eax
5250 done:
5251 mov [iBit], eax
5252 }
5253# endif
5254 return iBit;
5255}
5256#endif
5257
5258
5259/**
5260 * Finds the first bit which is set in the given 32-bit integer.
5261 * Bits are numbered from 1 (least significant) to 32.
5262 *
5263 * @returns index [1..32] of the first set bit.
5264 * @returns 0 if all bits are cleared.
5265 * @param i32 Integer to search for set bits.
5266 * @remark Similar to ffs() in BSD.
5267 */
5268DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5269{
5270 return ASMBitFirstSetU32((uint32_t)i32);
5271}
5272
5273
5274/**
5275 * Finds the first bit which is set in the given 64-bit integer.
5276 *
5277 * Bits are numbered from 1 (least significant) to 64.
5278 *
5279 * @returns index [1..64] of the first set bit.
5280 * @returns 0 if all bits are cleared.
5281 * @param u64 Integer to search for set bits.
5282 * @remarks Similar to ffs() in BSD.
5283 */
5284#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5285RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU64(uint64_t u64);
5286#else
5287DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64)
5288{
5289# if RT_INLINE_ASM_USES_INTRIN
5290 unsigned long iBit;
5291# if ARCH_BITS == 64
5292 if (_BitScanForward64(&iBit, u64))
5293 iBit++;
5294 else
5295 iBit = 0;
5296# else
5297 if (_BitScanForward(&iBit, (uint32_t)u64))
5298 iBit++;
5299 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
5300 iBit += 33;
5301 else
5302 iBit = 0;
5303# endif
5304# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5305 uint64_t iBit;
5306 __asm__ __volatile__("bsfq %1, %0\n\t"
5307 "jnz 1f\n\t"
5308 "xorl %k0, %k0\n\t"
5309 "jmp 2f\n"
5310 "1:\n\t"
5311 "incl %k0\n"
5312 "2:\n\t"
5313 : "=r" (iBit)
5314 : "rm" (u64));
5315# else
5316 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
5317 if (!iBit)
5318 {
5319 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
5320 if (iBit)
5321 iBit += 32;
5322 }
5323# endif
5324 return (unsigned)iBit;
5325}
5326#endif
5327
5328
5329/**
5330 * Finds the first bit which is set in the given 16-bit integer.
5331 *
5332 * Bits are numbered from 1 (least significant) to 16.
5333 *
5334 * @returns index [1..16] of the first set bit.
5335 * @returns 0 if all bits are cleared.
5336 * @param u16 Integer to search for set bits.
5337 * @remarks For 16-bit bs3kit code.
5338 */
5339#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5340RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU16(uint16_t u16);
5341#else
5342DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16)
5343{
5344 return ASMBitFirstSetU32((uint32_t)u16);
5345}
5346#endif
5347
5348
5349/**
5350 * Finds the last bit which is set in the given 32-bit integer.
5351 * Bits are numbered from 1 (least significant) to 32.
5352 *
5353 * @returns index [1..32] of the last set bit.
5354 * @returns 0 if all bits are cleared.
5355 * @param u32 Integer to search for set bits.
5356 * @remark Similar to fls() in BSD.
5357 */
5358#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5359RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU32(uint32_t u32);
5360#else
5361DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5362{
5363# if RT_INLINE_ASM_USES_INTRIN
5364 unsigned long iBit;
5365 if (_BitScanReverse(&iBit, u32))
5366 iBit++;
5367 else
5368 iBit = 0;
5369# elif RT_INLINE_ASM_GNU_STYLE
5370 uint32_t iBit;
5371 __asm__ __volatile__("bsrl %1, %0\n\t"
5372 "jnz 1f\n\t"
5373 "xorl %0, %0\n\t"
5374 "jmp 2f\n"
5375 "1:\n\t"
5376 "incl %0\n"
5377 "2:\n\t"
5378 : "=r" (iBit)
5379 : "rm" (u32));
5380# else
5381 uint32_t iBit;
5382 _asm
5383 {
5384 bsr eax, [u32]
5385 jnz found
5386 xor eax, eax
5387 jmp done
5388 found:
5389 inc eax
5390 done:
5391 mov [iBit], eax
5392 }
5393# endif
5394 return iBit;
5395}
5396#endif
5397
5398
5399/**
5400 * Finds the last bit which is set in the given 32-bit integer.
5401 * Bits are numbered from 1 (least significant) to 32.
5402 *
5403 * @returns index [1..32] of the last set bit.
5404 * @returns 0 if all bits are cleared.
5405 * @param i32 Integer to search for set bits.
5406 * @remark Similar to fls() in BSD.
5407 */
5408DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5409{
5410 return ASMBitLastSetU32((uint32_t)i32);
5411}
5412
5413
5414/**
5415 * Finds the last bit which is set in the given 64-bit integer.
5416 *
5417 * Bits are numbered from 1 (least significant) to 64.
5418 *
5419 * @returns index [1..64] of the last set bit.
5420 * @returns 0 if all bits are cleared.
5421 * @param u64 Integer to search for set bits.
5422 * @remark Similar to fls() in BSD.
5423 */
5424#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5425RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU64(uint64_t u64);
5426#else
5427DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64)
5428{
5429# if RT_INLINE_ASM_USES_INTRIN
5430 unsigned long iBit;
5431# if ARCH_BITS == 64
5432 if (_BitScanReverse64(&iBit, u64))
5433 iBit++;
5434 else
5435 iBit = 0;
5436# else
5437 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
5438 iBit += 33;
5439 else if (_BitScanReverse(&iBit, (uint32_t)u64))
5440 iBit++;
5441 else
5442 iBit = 0;
5443# endif
5444# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5445 uint64_t iBit;
5446 __asm__ __volatile__("bsrq %1, %0\n\t"
5447 "jnz 1f\n\t"
5448 "xorl %k0, %k0\n\t"
5449 "jmp 2f\n"
5450 "1:\n\t"
5451 "incl %k0\n"
5452 "2:\n\t"
5453 : "=r" (iBit)
5454 : "rm" (u64));
5455# else
5456 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
5457 if (iBit)
5458 iBit += 32;
5459 else
5460 iBit = ASMBitLastSetU32((uint32_t)u64);
5461#endif
5462 return (unsigned)iBit;
5463}
5464#endif
5465
5466
5467/**
5468 * Finds the last bit which is set in the given 16-bit integer.
5469 *
5470 * Bits are numbered from 1 (least significant) to 16.
5471 *
5472 * @returns index [1..16] of the last set bit.
5473 * @returns 0 if all bits are cleared.
5474 * @param u16 Integer to search for set bits.
5475 * @remarks For 16-bit bs3kit code.
5476 */
5477#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5478RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU16(uint16_t u16);
5479#else
5480DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16)
5481{
5482 return ASMBitLastSetU32((uint32_t)u16);
5483}
5484#endif
5485
5486
5487/**
5488 * Reverse the byte order of the given 16-bit integer.
5489 *
5490 * @returns Revert
5491 * @param u16 16-bit integer value.
5492 */
5493#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5494RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMByteSwapU16(uint16_t u16);
5495#else
5496DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
5497{
5498# if RT_INLINE_ASM_USES_INTRIN
5499 u16 = _byteswap_ushort(u16);
5500# elif RT_INLINE_ASM_GNU_STYLE
5501 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
5502# else
5503 _asm
5504 {
5505 mov ax, [u16]
5506 ror ax, 8
5507 mov [u16], ax
5508 }
5509# endif
5510 return u16;
5511}
5512#endif
5513
5514
5515/**
5516 * Reverse the byte order of the given 32-bit integer.
5517 *
5518 * @returns Revert
5519 * @param u32 32-bit integer value.
5520 */
5521#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5522RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMByteSwapU32(uint32_t u32);
5523#else
5524DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5525{
5526# if RT_INLINE_ASM_USES_INTRIN
5527 u32 = _byteswap_ulong(u32);
5528# elif RT_INLINE_ASM_GNU_STYLE
5529 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5530# else
5531 _asm
5532 {
5533 mov eax, [u32]
5534 bswap eax
5535 mov [u32], eax
5536 }
5537# endif
5538 return u32;
5539}
5540#endif
5541
5542
5543/**
5544 * Reverse the byte order of the given 64-bit integer.
5545 *
5546 * @returns Revert
5547 * @param u64 64-bit integer value.
5548 */
5549DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
5550{
5551#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
5552 u64 = _byteswap_uint64(u64);
5553#else
5554 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
5555 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
5556#endif
5557 return u64;
5558}
5559
5560
5561/**
5562 * Rotate 32-bit unsigned value to the left by @a cShift.
5563 *
5564 * @returns Rotated value.
5565 * @param u32 The value to rotate.
5566 * @param cShift How many bits to rotate by.
5567 */
5568#ifdef __WATCOMC__
5569RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift);
5570#else
5571DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift)
5572{
5573# if RT_INLINE_ASM_USES_INTRIN
5574 return _rotl(u32, cShift);
5575# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5576 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5577 return u32;
5578# else
5579 cShift &= 31;
5580 return (u32 << cShift) | (u32 >> (32 - cShift));
5581# endif
5582}
5583#endif
5584
5585
5586/**
5587 * Rotate 32-bit unsigned value to the right by @a cShift.
5588 *
5589 * @returns Rotated value.
5590 * @param u32 The value to rotate.
5591 * @param cShift How many bits to rotate by.
5592 */
5593#ifdef __WATCOMC__
5594RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift);
5595#else
5596DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift)
5597{
5598# if RT_INLINE_ASM_USES_INTRIN
5599 return _rotr(u32, cShift);
5600# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5601 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5602 return u32;
5603# else
5604 cShift &= 31;
5605 return (u32 >> cShift) | (u32 << (32 - cShift));
5606# endif
5607}
5608#endif
5609
5610
5611/**
5612 * Rotate 64-bit unsigned value to the left by @a cShift.
5613 *
5614 * @returns Rotated value.
5615 * @param u64 The value to rotate.
5616 * @param cShift How many bits to rotate by.
5617 */
5618DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift)
5619{
5620#if RT_INLINE_ASM_USES_INTRIN
5621 return _rotl64(u64, cShift);
5622#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5623 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5624 return u64;
5625#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5626 uint32_t uSpill;
5627 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5628 "jz 1f\n\t"
5629 "xchgl %%eax, %%edx\n\t"
5630 "1:\n\t"
5631 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5632 "jz 2f\n\t"
5633 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5634 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
5635 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
5636 "2:\n\t" /* } */
5637 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5638 : "0" (u64),
5639 "1" (cShift));
5640 return u64;
5641#else
5642 cShift &= 63;
5643 return (u64 << cShift) | (u64 >> (64 - cShift));
5644#endif
5645}
5646
5647
5648/**
5649 * Rotate 64-bit unsigned value to the right by @a cShift.
5650 *
5651 * @returns Rotated value.
5652 * @param u64 The value to rotate.
5653 * @param cShift How many bits to rotate by.
5654 */
5655DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift)
5656{
5657#if RT_INLINE_ASM_USES_INTRIN
5658 return _rotr64(u64, cShift);
5659#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5660 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5661 return u64;
5662#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5663 uint32_t uSpill;
5664 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5665 "jz 1f\n\t"
5666 "xchgl %%eax, %%edx\n\t"
5667 "1:\n\t"
5668 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5669 "jz 2f\n\t"
5670 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5671 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
5672 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
5673 "2:\n\t" /* } */
5674 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5675 : "0" (u64),
5676 "1" (cShift));
5677 return u64;
5678#else
5679 cShift &= 63;
5680 return (u64 >> cShift) | (u64 << (64 - cShift));
5681#endif
5682}
5683
5684/** @} */
5685
5686
5687/** @} */
5688
5689/*
5690 * Include #pragma aux definitions for Watcom C/C++.
5691 */
5692#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
5693# define IPRT_ASM_WATCOM_X86_16_WITH_PRAGMAS
5694# undef ___iprt_asm_watcom_x86_16_h
5695# include "asm-watcom-x86-16.h"
5696#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
5697# define IPRT_ASM_WATCOM_X86_32_WITH_PRAGMAS
5698# undef ___iprt_asm_watcom_x86_32_h
5699# include "asm-watcom-x86-32.h"
5700#endif
5701
5702#endif
5703
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette