VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 62634

Last change on this file since 62634 was 62634, checked in by vboxsync, 8 years ago

includes: Workaround for C4668 & C4255 warnings in MSC and WDK headers.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 158.3 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2016 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37/* Solaris 10 header ugliness */
38#ifdef u
39# undef u
40#endif
41
42#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
43# pragma warning(push)
44# pragma warning(disable:4668) /* Several incorrect __cplusplus uses. */
45# pragma warning(disable:4255) /* Incorrect __slwpcb prototype. */
46# include <intrin.h>
47# pragma warning(pop)
48 /* Emit the intrinsics at all optimization levels. */
49# pragma intrinsic(_ReadWriteBarrier)
50# pragma intrinsic(__cpuid)
51# pragma intrinsic(__stosd)
52# pragma intrinsic(__stosw)
53# pragma intrinsic(__stosb)
54# pragma intrinsic(_BitScanForward)
55# pragma intrinsic(_BitScanReverse)
56# pragma intrinsic(_bittest)
57# pragma intrinsic(_bittestandset)
58# pragma intrinsic(_bittestandreset)
59# pragma intrinsic(_bittestandcomplement)
60# pragma intrinsic(_byteswap_ushort)
61# pragma intrinsic(_byteswap_ulong)
62# pragma intrinsic(_interlockedbittestandset)
63# pragma intrinsic(_interlockedbittestandreset)
64# pragma intrinsic(_InterlockedAnd)
65# pragma intrinsic(_InterlockedOr)
66# pragma intrinsic(_InterlockedIncrement)
67# pragma intrinsic(_InterlockedDecrement)
68# pragma intrinsic(_InterlockedExchange)
69# pragma intrinsic(_InterlockedExchangeAdd)
70# pragma intrinsic(_InterlockedCompareExchange)
71# pragma intrinsic(_InterlockedCompareExchange64)
72# pragma intrinsic(_rotl)
73# pragma intrinsic(_rotr)
74# pragma intrinsic(_rotl64)
75# pragma intrinsic(_rotr64)
76# ifdef RT_ARCH_AMD64
77# pragma intrinsic(__stosq)
78# pragma intrinsic(_byteswap_uint64)
79# pragma intrinsic(_InterlockedExchange64)
80# pragma intrinsic(_InterlockedExchangeAdd64)
81# pragma intrinsic(_InterlockedAnd64)
82# pragma intrinsic(_InterlockedOr64)
83# pragma intrinsic(_InterlockedIncrement64)
84# pragma intrinsic(_InterlockedDecrement64)
85# endif
86#endif
87
88/*
89 * Include #pragma aux definitions for Watcom C/C++.
90 */
91#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
92# include "asm-watcom-x86-16.h"
93#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
94# include "asm-watcom-x86-32.h"
95#endif
96
97
98
99/** @defgroup grp_rt_asm ASM - Assembly Routines
100 * @ingroup grp_rt
101 *
102 * @remarks The difference between ordered and unordered atomic operations are that
103 * the former will complete outstanding reads and writes before continuing
104 * while the latter doesn't make any promises about the order. Ordered
105 * operations doesn't, it seems, make any 100% promise wrt to whether
106 * the operation will complete before any subsequent memory access.
107 * (please, correct if wrong.)
108 *
109 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
110 * are unordered (note the Uo).
111 *
112 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
113 * or even optimize assembler instructions away. For instance, in the following code
114 * the second rdmsr instruction is optimized away because gcc treats that instruction
115 * as deterministic:
116 *
117 * @code
118 * static inline uint64_t rdmsr_low(int idx)
119 * {
120 * uint32_t low;
121 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
122 * }
123 * ...
124 * uint32_t msr1 = rdmsr_low(1);
125 * foo(msr1);
126 * msr1 = rdmsr_low(1);
127 * bar(msr1);
128 * @endcode
129 *
130 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
131 * use the result of the first call as input parameter for bar() as well. For rdmsr this
132 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
133 * machine status information in general.
134 *
135 * @{
136 */
137
138
139/** @def RT_INLINE_ASM_GCC_4_3_X_X86
140 * Used to work around some 4.3.x register allocation issues in this version of
141 * the compiler. So far this workaround is still required for 4.4 and 4.5 but
142 * definitely not for 5.x */
143#define RT_INLINE_ASM_GCC_4_3_X_X86 (RT_GNUC_PREREQ(4, 3) && !RT_GNUC_PREREQ(5, 0) && defined(__i386__))
144#ifndef RT_INLINE_ASM_GCC_4_3_X_X86
145# define RT_INLINE_ASM_GCC_4_3_X_X86 0
146#endif
147
148/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
149 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
150 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
151 * mode, x86.
152 *
153 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
154 * when in PIC mode on x86.
155 */
156#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
157# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
158# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
159# elif defined(_MSC_VER) /* Visual C++ has trouble too, but it'll only tell us when C4688 is enabled. */
160# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
161# else
162# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC \
163 ( (defined(PIC) || defined(__PIC__)) \
164 && defined(RT_ARCH_X86) \
165 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
166 || defined(RT_OS_DARWIN)) )
167# endif
168#endif
169
170
171/** @def ASMReturnAddress
172 * Gets the return address of the current (or calling if you like) function or method.
173 */
174#ifdef _MSC_VER
175# ifdef __cplusplus
176extern "C"
177# endif
178void * _ReturnAddress(void);
179# pragma intrinsic(_ReturnAddress)
180# define ASMReturnAddress() _ReturnAddress()
181#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
182# define ASMReturnAddress() __builtin_return_address(0)
183#elif defined(__WATCOMC__)
184# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
185#else
186# error "Unsupported compiler."
187#endif
188
189
190/**
191 * Compiler memory barrier.
192 *
193 * Ensure that the compiler does not use any cached (register/tmp stack) memory
194 * values or any outstanding writes when returning from this function.
195 *
196 * This function must be used if non-volatile data is modified by a
197 * device or the VMM. Typical cases are port access, MMIO access,
198 * trapping instruction, etc.
199 */
200#if RT_INLINE_ASM_GNU_STYLE
201# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
202#elif RT_INLINE_ASM_USES_INTRIN
203# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
204#elif defined(__WATCOMC__)
205void ASMCompilerBarrier(void);
206#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
207DECLINLINE(void) ASMCompilerBarrier(void)
208{
209 __asm
210 {
211 }
212}
213#endif
214
215
216/** @def ASMBreakpoint
217 * Debugger Breakpoint.
218 * @deprecated Use RT_BREAKPOINT instead.
219 * @internal
220 */
221#define ASMBreakpoint() RT_BREAKPOINT()
222
223
224/**
225 * Spinloop hint for platforms that have these, empty function on the other
226 * platforms.
227 *
228 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
229 * spin locks.
230 */
231#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
232DECLASM(void) ASMNopPause(void);
233#else
234DECLINLINE(void) ASMNopPause(void)
235{
236# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
237# if RT_INLINE_ASM_GNU_STYLE
238 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
239# else
240 __asm {
241 _emit 0f3h
242 _emit 090h
243 }
244# endif
245# else
246 /* dummy */
247# endif
248}
249#endif
250
251
252/**
253 * Atomically Exchange an unsigned 8-bit value, ordered.
254 *
255 * @returns Current *pu8 value
256 * @param pu8 Pointer to the 8-bit variable to update.
257 * @param u8 The 8-bit value to assign to *pu8.
258 */
259#if RT_INLINE_ASM_EXTERNAL
260DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
261#else
262DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
263{
264# if RT_INLINE_ASM_GNU_STYLE
265 __asm__ __volatile__("xchgb %0, %1\n\t"
266 : "=m" (*pu8),
267 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
268 : "1" (u8),
269 "m" (*pu8));
270# else
271 __asm
272 {
273# ifdef RT_ARCH_AMD64
274 mov rdx, [pu8]
275 mov al, [u8]
276 xchg [rdx], al
277 mov [u8], al
278# else
279 mov edx, [pu8]
280 mov al, [u8]
281 xchg [edx], al
282 mov [u8], al
283# endif
284 }
285# endif
286 return u8;
287}
288#endif
289
290
291/**
292 * Atomically Exchange a signed 8-bit value, ordered.
293 *
294 * @returns Current *pu8 value
295 * @param pi8 Pointer to the 8-bit variable to update.
296 * @param i8 The 8-bit value to assign to *pi8.
297 */
298DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
299{
300 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
301}
302
303
304/**
305 * Atomically Exchange a bool value, ordered.
306 *
307 * @returns Current *pf value
308 * @param pf Pointer to the 8-bit variable to update.
309 * @param f The 8-bit value to assign to *pi8.
310 */
311DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
312{
313#ifdef _MSC_VER
314 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
315#else
316 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
317#endif
318}
319
320
321/**
322 * Atomically Exchange an unsigned 16-bit value, ordered.
323 *
324 * @returns Current *pu16 value
325 * @param pu16 Pointer to the 16-bit variable to update.
326 * @param u16 The 16-bit value to assign to *pu16.
327 */
328#if RT_INLINE_ASM_EXTERNAL
329DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
330#else
331DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
332{
333# if RT_INLINE_ASM_GNU_STYLE
334 __asm__ __volatile__("xchgw %0, %1\n\t"
335 : "=m" (*pu16),
336 "=r" (u16)
337 : "1" (u16),
338 "m" (*pu16));
339# else
340 __asm
341 {
342# ifdef RT_ARCH_AMD64
343 mov rdx, [pu16]
344 mov ax, [u16]
345 xchg [rdx], ax
346 mov [u16], ax
347# else
348 mov edx, [pu16]
349 mov ax, [u16]
350 xchg [edx], ax
351 mov [u16], ax
352# endif
353 }
354# endif
355 return u16;
356}
357#endif
358
359
360/**
361 * Atomically Exchange a signed 16-bit value, ordered.
362 *
363 * @returns Current *pu16 value
364 * @param pi16 Pointer to the 16-bit variable to update.
365 * @param i16 The 16-bit value to assign to *pi16.
366 */
367DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
368{
369 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
370}
371
372
373/**
374 * Atomically Exchange an unsigned 32-bit value, ordered.
375 *
376 * @returns Current *pu32 value
377 * @param pu32 Pointer to the 32-bit variable to update.
378 * @param u32 The 32-bit value to assign to *pu32.
379 *
380 * @remarks Does not work on 286 and earlier.
381 */
382#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
383DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
384#else
385DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
386{
387# if RT_INLINE_ASM_GNU_STYLE
388 __asm__ __volatile__("xchgl %0, %1\n\t"
389 : "=m" (*pu32),
390 "=r" (u32)
391 : "1" (u32),
392 "m" (*pu32));
393
394# elif RT_INLINE_ASM_USES_INTRIN
395 u32 = _InterlockedExchange((long *)pu32, u32);
396
397# else
398 __asm
399 {
400# ifdef RT_ARCH_AMD64
401 mov rdx, [pu32]
402 mov eax, u32
403 xchg [rdx], eax
404 mov [u32], eax
405# else
406 mov edx, [pu32]
407 mov eax, u32
408 xchg [edx], eax
409 mov [u32], eax
410# endif
411 }
412# endif
413 return u32;
414}
415#endif
416
417
418/**
419 * Atomically Exchange a signed 32-bit value, ordered.
420 *
421 * @returns Current *pu32 value
422 * @param pi32 Pointer to the 32-bit variable to update.
423 * @param i32 The 32-bit value to assign to *pi32.
424 */
425DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
426{
427 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
428}
429
430
431/**
432 * Atomically Exchange an unsigned 64-bit value, ordered.
433 *
434 * @returns Current *pu64 value
435 * @param pu64 Pointer to the 64-bit variable to update.
436 * @param u64 The 64-bit value to assign to *pu64.
437 *
438 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
439 */
440#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
441 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
442DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
443#else
444DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
445{
446# if defined(RT_ARCH_AMD64)
447# if RT_INLINE_ASM_USES_INTRIN
448 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
449
450# elif RT_INLINE_ASM_GNU_STYLE
451 __asm__ __volatile__("xchgq %0, %1\n\t"
452 : "=m" (*pu64),
453 "=r" (u64)
454 : "1" (u64),
455 "m" (*pu64));
456# else
457 __asm
458 {
459 mov rdx, [pu64]
460 mov rax, [u64]
461 xchg [rdx], rax
462 mov [u64], rax
463 }
464# endif
465# else /* !RT_ARCH_AMD64 */
466# if RT_INLINE_ASM_GNU_STYLE
467# if defined(PIC) || defined(__PIC__)
468 uint32_t u32EBX = (uint32_t)u64;
469 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
470 "xchgl %%ebx, %3\n\t"
471 "1:\n\t"
472 "lock; cmpxchg8b (%5)\n\t"
473 "jnz 1b\n\t"
474 "movl %3, %%ebx\n\t"
475 /*"xchgl %%esi, %5\n\t"*/
476 : "=A" (u64),
477 "=m" (*pu64)
478 : "0" (*pu64),
479 "m" ( u32EBX ),
480 "c" ( (uint32_t)(u64 >> 32) ),
481 "S" (pu64));
482# else /* !PIC */
483 __asm__ __volatile__("1:\n\t"
484 "lock; cmpxchg8b %1\n\t"
485 "jnz 1b\n\t"
486 : "=A" (u64),
487 "=m" (*pu64)
488 : "0" (*pu64),
489 "b" ( (uint32_t)u64 ),
490 "c" ( (uint32_t)(u64 >> 32) ));
491# endif
492# else
493 __asm
494 {
495 mov ebx, dword ptr [u64]
496 mov ecx, dword ptr [u64 + 4]
497 mov edi, pu64
498 mov eax, dword ptr [edi]
499 mov edx, dword ptr [edi + 4]
500 retry:
501 lock cmpxchg8b [edi]
502 jnz retry
503 mov dword ptr [u64], eax
504 mov dword ptr [u64 + 4], edx
505 }
506# endif
507# endif /* !RT_ARCH_AMD64 */
508 return u64;
509}
510#endif
511
512
513/**
514 * Atomically Exchange an signed 64-bit value, ordered.
515 *
516 * @returns Current *pi64 value
517 * @param pi64 Pointer to the 64-bit variable to update.
518 * @param i64 The 64-bit value to assign to *pi64.
519 */
520DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
521{
522 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
523}
524
525
526/**
527 * Atomically Exchange a pointer value, ordered.
528 *
529 * @returns Current *ppv value
530 * @param ppv Pointer to the pointer variable to update.
531 * @param pv The pointer value to assign to *ppv.
532 */
533DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
534{
535#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
536 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
537#elif ARCH_BITS == 64
538 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
539#else
540# error "ARCH_BITS is bogus"
541#endif
542}
543
544
545/**
546 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
547 *
548 * @returns Current *pv value
549 * @param ppv Pointer to the pointer variable to update.
550 * @param pv The pointer value to assign to *ppv.
551 * @param Type The type of *ppv, sans volatile.
552 */
553#ifdef __GNUC__
554# define ASMAtomicXchgPtrT(ppv, pv, Type) \
555 __extension__ \
556 ({\
557 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
558 Type const pvTypeChecked = (pv); \
559 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
560 pvTypeCheckedRet; \
561 })
562#else
563# define ASMAtomicXchgPtrT(ppv, pv, Type) \
564 (Type)ASMAtomicXchgPtr((void * volatile *)(ppv), (void *)(pv))
565#endif
566
567
568/**
569 * Atomically Exchange a raw-mode context pointer value, ordered.
570 *
571 * @returns Current *ppv value
572 * @param ppvRC Pointer to the pointer variable to update.
573 * @param pvRC The pointer value to assign to *ppv.
574 */
575DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
576{
577 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
578}
579
580
581/**
582 * Atomically Exchange a ring-0 pointer value, ordered.
583 *
584 * @returns Current *ppv value
585 * @param ppvR0 Pointer to the pointer variable to update.
586 * @param pvR0 The pointer value to assign to *ppv.
587 */
588DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
589{
590#if R0_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
591 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
592#elif R0_ARCH_BITS == 64
593 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
594#else
595# error "R0_ARCH_BITS is bogus"
596#endif
597}
598
599
600/**
601 * Atomically Exchange a ring-3 pointer value, ordered.
602 *
603 * @returns Current *ppv value
604 * @param ppvR3 Pointer to the pointer variable to update.
605 * @param pvR3 The pointer value to assign to *ppv.
606 */
607DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
608{
609#if R3_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
610 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
611#elif R3_ARCH_BITS == 64
612 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
613#else
614# error "R3_ARCH_BITS is bogus"
615#endif
616}
617
618
619/** @def ASMAtomicXchgHandle
620 * Atomically Exchange a typical IPRT handle value, ordered.
621 *
622 * @param ph Pointer to the value to update.
623 * @param hNew The new value to assigned to *pu.
624 * @param phRes Where to store the current *ph value.
625 *
626 * @remarks This doesn't currently work for all handles (like RTFILE).
627 */
628#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
629# define ASMAtomicXchgHandle(ph, hNew, phRes) \
630 do { \
631 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
632 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
633 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
634 } while (0)
635#elif HC_ARCH_BITS == 64
636# define ASMAtomicXchgHandle(ph, hNew, phRes) \
637 do { \
638 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
639 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
640 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
641 } while (0)
642#else
643# error HC_ARCH_BITS
644#endif
645
646
647/**
648 * Atomically Exchange a value which size might differ
649 * between platforms or compilers, ordered.
650 *
651 * @param pu Pointer to the variable to update.
652 * @param uNew The value to assign to *pu.
653 * @todo This is busted as its missing the result argument.
654 */
655#define ASMAtomicXchgSize(pu, uNew) \
656 do { \
657 switch (sizeof(*(pu))) { \
658 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
659 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
660 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
661 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
662 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
663 } \
664 } while (0)
665
666/**
667 * Atomically Exchange a value which size might differ
668 * between platforms or compilers, ordered.
669 *
670 * @param pu Pointer to the variable to update.
671 * @param uNew The value to assign to *pu.
672 * @param puRes Where to store the current *pu value.
673 */
674#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
675 do { \
676 switch (sizeof(*(pu))) { \
677 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
678 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
679 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
680 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
681 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
682 } \
683 } while (0)
684
685
686
687/**
688 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
689 *
690 * @returns true if xchg was done.
691 * @returns false if xchg wasn't done.
692 *
693 * @param pu8 Pointer to the value to update.
694 * @param u8New The new value to assigned to *pu8.
695 * @param u8Old The old value to *pu8 compare with.
696 *
697 * @remarks x86: Requires a 486 or later.
698 */
699#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
700DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, const uint8_t u8Old);
701#else
702DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, uint8_t u8Old)
703{
704 uint8_t u8Ret;
705 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
706 "setz %1\n\t"
707 : "=m" (*pu8),
708 "=qm" (u8Ret),
709 "=a" (u8Old)
710 : "q" (u8New),
711 "2" (u8Old),
712 "m" (*pu8));
713 return (bool)u8Ret;
714}
715#endif
716
717
718/**
719 * Atomically Compare and Exchange a signed 8-bit value, ordered.
720 *
721 * @returns true if xchg was done.
722 * @returns false if xchg wasn't done.
723 *
724 * @param pi8 Pointer to the value to update.
725 * @param i8New The new value to assigned to *pi8.
726 * @param i8Old The old value to *pi8 compare with.
727 *
728 * @remarks x86: Requires a 486 or later.
729 */
730DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t *pi8, const int8_t i8New, const int8_t i8Old)
731{
732 return ASMAtomicCmpXchgU8((volatile uint8_t *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old);
733}
734
735
736/**
737 * Atomically Compare and Exchange a bool value, ordered.
738 *
739 * @returns true if xchg was done.
740 * @returns false if xchg wasn't done.
741 *
742 * @param pf Pointer to the value to update.
743 * @param fNew The new value to assigned to *pf.
744 * @param fOld The old value to *pf compare with.
745 *
746 * @remarks x86: Requires a 486 or later.
747 */
748DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool *pf, const bool fNew, const bool fOld)
749{
750 return ASMAtomicCmpXchgU8((volatile uint8_t *)pf, (const uint8_t)fNew, (const uint8_t)fOld);
751}
752
753
754/**
755 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
756 *
757 * @returns true if xchg was done.
758 * @returns false if xchg wasn't done.
759 *
760 * @param pu32 Pointer to the value to update.
761 * @param u32New The new value to assigned to *pu32.
762 * @param u32Old The old value to *pu32 compare with.
763 *
764 * @remarks x86: Requires a 486 or later.
765 */
766#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
767DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
768#else
769DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
770{
771# if RT_INLINE_ASM_GNU_STYLE
772 uint8_t u8Ret;
773 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
774 "setz %1\n\t"
775 : "=m" (*pu32),
776 "=qm" (u8Ret),
777 "=a" (u32Old)
778 : "r" (u32New),
779 "2" (u32Old),
780 "m" (*pu32));
781 return (bool)u8Ret;
782
783# elif RT_INLINE_ASM_USES_INTRIN
784 return (uint32_t)_InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
785
786# else
787 uint32_t u32Ret;
788 __asm
789 {
790# ifdef RT_ARCH_AMD64
791 mov rdx, [pu32]
792# else
793 mov edx, [pu32]
794# endif
795 mov eax, [u32Old]
796 mov ecx, [u32New]
797# ifdef RT_ARCH_AMD64
798 lock cmpxchg [rdx], ecx
799# else
800 lock cmpxchg [edx], ecx
801# endif
802 setz al
803 movzx eax, al
804 mov [u32Ret], eax
805 }
806 return !!u32Ret;
807# endif
808}
809#endif
810
811
812/**
813 * Atomically Compare and Exchange a signed 32-bit value, ordered.
814 *
815 * @returns true if xchg was done.
816 * @returns false if xchg wasn't done.
817 *
818 * @param pi32 Pointer to the value to update.
819 * @param i32New The new value to assigned to *pi32.
820 * @param i32Old The old value to *pi32 compare with.
821 *
822 * @remarks x86: Requires a 486 or later.
823 */
824DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
825{
826 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
827}
828
829
830/**
831 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
832 *
833 * @returns true if xchg was done.
834 * @returns false if xchg wasn't done.
835 *
836 * @param pu64 Pointer to the 64-bit variable to update.
837 * @param u64New The 64-bit value to assign to *pu64.
838 * @param u64Old The value to compare with.
839 *
840 * @remarks x86: Requires a Pentium or later.
841 */
842#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
843 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
844DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
845#else
846DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, uint64_t u64New, uint64_t u64Old)
847{
848# if RT_INLINE_ASM_USES_INTRIN
849 return (uint64_t)_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
850
851# elif defined(RT_ARCH_AMD64)
852# if RT_INLINE_ASM_GNU_STYLE
853 uint8_t u8Ret;
854 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
855 "setz %1\n\t"
856 : "=m" (*pu64),
857 "=qm" (u8Ret),
858 "=a" (u64Old)
859 : "r" (u64New),
860 "2" (u64Old),
861 "m" (*pu64));
862 return (bool)u8Ret;
863# else
864 bool fRet;
865 __asm
866 {
867 mov rdx, [pu32]
868 mov rax, [u64Old]
869 mov rcx, [u64New]
870 lock cmpxchg [rdx], rcx
871 setz al
872 mov [fRet], al
873 }
874 return fRet;
875# endif
876# else /* !RT_ARCH_AMD64 */
877 uint32_t u32Ret;
878# if RT_INLINE_ASM_GNU_STYLE
879# if defined(PIC) || defined(__PIC__)
880 uint32_t u32EBX = (uint32_t)u64New;
881 uint32_t u32Spill;
882 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
883 "lock; cmpxchg8b (%6)\n\t"
884 "setz %%al\n\t"
885 "movl %4, %%ebx\n\t"
886 "movzbl %%al, %%eax\n\t"
887 : "=a" (u32Ret),
888 "=d" (u32Spill),
889# if RT_GNUC_PREREQ(4, 3)
890 "+m" (*pu64)
891# else
892 "=m" (*pu64)
893# endif
894 : "A" (u64Old),
895 "m" ( u32EBX ),
896 "c" ( (uint32_t)(u64New >> 32) ),
897 "S" (pu64));
898# else /* !PIC */
899 uint32_t u32Spill;
900 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
901 "setz %%al\n\t"
902 "movzbl %%al, %%eax\n\t"
903 : "=a" (u32Ret),
904 "=d" (u32Spill),
905 "+m" (*pu64)
906 : "A" (u64Old),
907 "b" ( (uint32_t)u64New ),
908 "c" ( (uint32_t)(u64New >> 32) ));
909# endif
910 return (bool)u32Ret;
911# else
912 __asm
913 {
914 mov ebx, dword ptr [u64New]
915 mov ecx, dword ptr [u64New + 4]
916 mov edi, [pu64]
917 mov eax, dword ptr [u64Old]
918 mov edx, dword ptr [u64Old + 4]
919 lock cmpxchg8b [edi]
920 setz al
921 movzx eax, al
922 mov dword ptr [u32Ret], eax
923 }
924 return !!u32Ret;
925# endif
926# endif /* !RT_ARCH_AMD64 */
927}
928#endif
929
930
931/**
932 * Atomically Compare and exchange a signed 64-bit value, ordered.
933 *
934 * @returns true if xchg was done.
935 * @returns false if xchg wasn't done.
936 *
937 * @param pi64 Pointer to the 64-bit variable to update.
938 * @param i64 The 64-bit value to assign to *pu64.
939 * @param i64Old The value to compare with.
940 *
941 * @remarks x86: Requires a Pentium or later.
942 */
943DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
944{
945 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
946}
947
948
949/**
950 * Atomically Compare and Exchange a pointer value, ordered.
951 *
952 * @returns true if xchg was done.
953 * @returns false if xchg wasn't done.
954 *
955 * @param ppv Pointer to the value to update.
956 * @param pvNew The new value to assigned to *ppv.
957 * @param pvOld The old value to *ppv compare with.
958 *
959 * @remarks x86: Requires a 486 or later.
960 */
961DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld)
962{
963#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
964 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
965#elif ARCH_BITS == 64
966 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
967#else
968# error "ARCH_BITS is bogus"
969#endif
970}
971
972
973/**
974 * Atomically Compare and Exchange a pointer value, ordered.
975 *
976 * @returns true if xchg was done.
977 * @returns false if xchg wasn't done.
978 *
979 * @param ppv Pointer to the value to update.
980 * @param pvNew The new value to assigned to *ppv.
981 * @param pvOld The old value to *ppv compare with.
982 *
983 * @remarks This is relatively type safe on GCC platforms.
984 * @remarks x86: Requires a 486 or later.
985 */
986#ifdef __GNUC__
987# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
988 __extension__ \
989 ({\
990 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
991 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
992 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
993 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
994 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
995 fMacroRet; \
996 })
997#else
998# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
999 ASMAtomicCmpXchgPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld))
1000#endif
1001
1002
1003/** @def ASMAtomicCmpXchgHandle
1004 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1005 *
1006 * @param ph Pointer to the value to update.
1007 * @param hNew The new value to assigned to *pu.
1008 * @param hOld The old value to *pu compare with.
1009 * @param fRc Where to store the result.
1010 *
1011 * @remarks This doesn't currently work for all handles (like RTFILE).
1012 * @remarks x86: Requires a 486 or later.
1013 */
1014#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1015# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1016 do { \
1017 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1018 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1019 } while (0)
1020#elif HC_ARCH_BITS == 64
1021# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1022 do { \
1023 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1024 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1025 } while (0)
1026#else
1027# error HC_ARCH_BITS
1028#endif
1029
1030
1031/** @def ASMAtomicCmpXchgSize
1032 * Atomically Compare and Exchange a value which size might differ
1033 * between platforms or compilers, ordered.
1034 *
1035 * @param pu Pointer to the value to update.
1036 * @param uNew The new value to assigned to *pu.
1037 * @param uOld The old value to *pu compare with.
1038 * @param fRc Where to store the result.
1039 *
1040 * @remarks x86: Requires a 486 or later.
1041 */
1042#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1043 do { \
1044 switch (sizeof(*(pu))) { \
1045 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1046 break; \
1047 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1048 break; \
1049 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1050 (fRc) = false; \
1051 break; \
1052 } \
1053 } while (0)
1054
1055
1056/**
1057 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1058 * passes back old value, ordered.
1059 *
1060 * @returns true if xchg was done.
1061 * @returns false if xchg wasn't done.
1062 *
1063 * @param pu32 Pointer to the value to update.
1064 * @param u32New The new value to assigned to *pu32.
1065 * @param u32Old The old value to *pu32 compare with.
1066 * @param pu32Old Pointer store the old value at.
1067 *
1068 * @remarks x86: Requires a 486 or later.
1069 */
1070#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1071DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
1072#else
1073DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
1074{
1075# if RT_INLINE_ASM_GNU_STYLE
1076 uint8_t u8Ret;
1077 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1078 "setz %1\n\t"
1079 : "=m" (*pu32),
1080 "=qm" (u8Ret),
1081 "=a" (*pu32Old)
1082 : "r" (u32New),
1083 "a" (u32Old),
1084 "m" (*pu32));
1085 return (bool)u8Ret;
1086
1087# elif RT_INLINE_ASM_USES_INTRIN
1088 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
1089
1090# else
1091 uint32_t u32Ret;
1092 __asm
1093 {
1094# ifdef RT_ARCH_AMD64
1095 mov rdx, [pu32]
1096# else
1097 mov edx, [pu32]
1098# endif
1099 mov eax, [u32Old]
1100 mov ecx, [u32New]
1101# ifdef RT_ARCH_AMD64
1102 lock cmpxchg [rdx], ecx
1103 mov rdx, [pu32Old]
1104 mov [rdx], eax
1105# else
1106 lock cmpxchg [edx], ecx
1107 mov edx, [pu32Old]
1108 mov [edx], eax
1109# endif
1110 setz al
1111 movzx eax, al
1112 mov [u32Ret], eax
1113 }
1114 return !!u32Ret;
1115# endif
1116}
1117#endif
1118
1119
1120/**
1121 * Atomically Compare and Exchange a signed 32-bit value, additionally
1122 * passes back old value, ordered.
1123 *
1124 * @returns true if xchg was done.
1125 * @returns false if xchg wasn't done.
1126 *
1127 * @param pi32 Pointer to the value to update.
1128 * @param i32New The new value to assigned to *pi32.
1129 * @param i32Old The old value to *pi32 compare with.
1130 * @param pi32Old Pointer store the old value at.
1131 *
1132 * @remarks x86: Requires a 486 or later.
1133 */
1134DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
1135{
1136 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
1137}
1138
1139
1140/**
1141 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1142 * passing back old value, ordered.
1143 *
1144 * @returns true if xchg was done.
1145 * @returns false if xchg wasn't done.
1146 *
1147 * @param pu64 Pointer to the 64-bit variable to update.
1148 * @param u64New The 64-bit value to assign to *pu64.
1149 * @param u64Old The value to compare with.
1150 * @param pu64Old Pointer store the old value at.
1151 *
1152 * @remarks x86: Requires a Pentium or later.
1153 */
1154#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1155 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1156DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
1157#else
1158DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
1159{
1160# if RT_INLINE_ASM_USES_INTRIN
1161 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
1162
1163# elif defined(RT_ARCH_AMD64)
1164# if RT_INLINE_ASM_GNU_STYLE
1165 uint8_t u8Ret;
1166 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1167 "setz %1\n\t"
1168 : "=m" (*pu64),
1169 "=qm" (u8Ret),
1170 "=a" (*pu64Old)
1171 : "r" (u64New),
1172 "a" (u64Old),
1173 "m" (*pu64));
1174 return (bool)u8Ret;
1175# else
1176 bool fRet;
1177 __asm
1178 {
1179 mov rdx, [pu32]
1180 mov rax, [u64Old]
1181 mov rcx, [u64New]
1182 lock cmpxchg [rdx], rcx
1183 mov rdx, [pu64Old]
1184 mov [rdx], rax
1185 setz al
1186 mov [fRet], al
1187 }
1188 return fRet;
1189# endif
1190# else /* !RT_ARCH_AMD64 */
1191# if RT_INLINE_ASM_GNU_STYLE
1192 uint64_t u64Ret;
1193# if defined(PIC) || defined(__PIC__)
1194 /* NB: this code uses a memory clobber description, because the clean
1195 * solution with an output value for *pu64 makes gcc run out of registers.
1196 * This will cause suboptimal code, and anyone with a better solution is
1197 * welcome to improve this. */
1198 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1199 "lock; cmpxchg8b %3\n\t"
1200 "xchgl %%ebx, %1\n\t"
1201 : "=A" (u64Ret)
1202 : "DS" ((uint32_t)u64New),
1203 "c" ((uint32_t)(u64New >> 32)),
1204 "m" (*pu64),
1205 "0" (u64Old)
1206 : "memory" );
1207# else /* !PIC */
1208 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1209 : "=A" (u64Ret),
1210 "=m" (*pu64)
1211 : "b" ((uint32_t)u64New),
1212 "c" ((uint32_t)(u64New >> 32)),
1213 "m" (*pu64),
1214 "0" (u64Old));
1215# endif
1216 *pu64Old = u64Ret;
1217 return u64Ret == u64Old;
1218# else
1219 uint32_t u32Ret;
1220 __asm
1221 {
1222 mov ebx, dword ptr [u64New]
1223 mov ecx, dword ptr [u64New + 4]
1224 mov edi, [pu64]
1225 mov eax, dword ptr [u64Old]
1226 mov edx, dword ptr [u64Old + 4]
1227 lock cmpxchg8b [edi]
1228 mov ebx, [pu64Old]
1229 mov [ebx], eax
1230 setz al
1231 movzx eax, al
1232 add ebx, 4
1233 mov [ebx], edx
1234 mov dword ptr [u32Ret], eax
1235 }
1236 return !!u32Ret;
1237# endif
1238# endif /* !RT_ARCH_AMD64 */
1239}
1240#endif
1241
1242
1243/**
1244 * Atomically Compare and exchange a signed 64-bit value, additionally
1245 * passing back old value, ordered.
1246 *
1247 * @returns true if xchg was done.
1248 * @returns false if xchg wasn't done.
1249 *
1250 * @param pi64 Pointer to the 64-bit variable to update.
1251 * @param i64 The 64-bit value to assign to *pu64.
1252 * @param i64Old The value to compare with.
1253 * @param pi64Old Pointer store the old value at.
1254 *
1255 * @remarks x86: Requires a Pentium or later.
1256 */
1257DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
1258{
1259 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
1260}
1261
1262/** @def ASMAtomicCmpXchgExHandle
1263 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1264 *
1265 * @param ph Pointer to the value to update.
1266 * @param hNew The new value to assigned to *pu.
1267 * @param hOld The old value to *pu compare with.
1268 * @param fRc Where to store the result.
1269 * @param phOldVal Pointer to where to store the old value.
1270 *
1271 * @remarks This doesn't currently work for all handles (like RTFILE).
1272 */
1273#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1274# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1275 do { \
1276 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1277 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1278 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
1279 } while (0)
1280#elif HC_ARCH_BITS == 64
1281# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1282 do { \
1283 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1284 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1285 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
1286 } while (0)
1287#else
1288# error HC_ARCH_BITS
1289#endif
1290
1291
1292/** @def ASMAtomicCmpXchgExSize
1293 * Atomically Compare and Exchange a value which size might differ
1294 * between platforms or compilers. Additionally passes back old value.
1295 *
1296 * @param pu Pointer to the value to update.
1297 * @param uNew The new value to assigned to *pu.
1298 * @param uOld The old value to *pu compare with.
1299 * @param fRc Where to store the result.
1300 * @param puOldVal Pointer to where to store the old value.
1301 *
1302 * @remarks x86: Requires a 486 or later.
1303 */
1304#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1305 do { \
1306 switch (sizeof(*(pu))) { \
1307 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
1308 break; \
1309 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
1310 break; \
1311 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1312 (fRc) = false; \
1313 (uOldVal) = 0; \
1314 break; \
1315 } \
1316 } while (0)
1317
1318
1319/**
1320 * Atomically Compare and Exchange a pointer value, additionally
1321 * passing back old value, ordered.
1322 *
1323 * @returns true if xchg was done.
1324 * @returns false if xchg wasn't done.
1325 *
1326 * @param ppv Pointer to the value to update.
1327 * @param pvNew The new value to assigned to *ppv.
1328 * @param pvOld The old value to *ppv compare with.
1329 * @param ppvOld Pointer store the old value at.
1330 *
1331 * @remarks x86: Requires a 486 or later.
1332 */
1333DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
1334{
1335#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1336 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
1337#elif ARCH_BITS == 64
1338 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
1339#else
1340# error "ARCH_BITS is bogus"
1341#endif
1342}
1343
1344
1345/**
1346 * Atomically Compare and Exchange a pointer value, additionally
1347 * passing back old value, ordered.
1348 *
1349 * @returns true if xchg was done.
1350 * @returns false if xchg wasn't done.
1351 *
1352 * @param ppv Pointer to the value to update.
1353 * @param pvNew The new value to assigned to *ppv.
1354 * @param pvOld The old value to *ppv compare with.
1355 * @param ppvOld Pointer store the old value at.
1356 *
1357 * @remarks This is relatively type safe on GCC platforms.
1358 * @remarks x86: Requires a 486 or later.
1359 */
1360#ifdef __GNUC__
1361# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1362 __extension__ \
1363 ({\
1364 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1365 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1366 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1367 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1368 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1369 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1370 (void **)ppvOldTypeChecked); \
1371 fMacroRet; \
1372 })
1373#else
1374# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1375 ASMAtomicCmpXchgExPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld), (void **)(ppvOld))
1376#endif
1377
1378
1379/**
1380 * Virtualization unfriendly serializing instruction, always exits.
1381 */
1382#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1383DECLASM(void) ASMSerializeInstructionCpuId(void);
1384#else
1385DECLINLINE(void) ASMSerializeInstructionCpuId(void)
1386{
1387# if RT_INLINE_ASM_GNU_STYLE
1388 RTCCUINTREG xAX = 0;
1389# ifdef RT_ARCH_AMD64
1390 __asm__ __volatile__ ("cpuid"
1391 : "=a" (xAX)
1392 : "0" (xAX)
1393 : "rbx", "rcx", "rdx", "memory");
1394# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1395 __asm__ __volatile__ ("push %%ebx\n\t"
1396 "cpuid\n\t"
1397 "pop %%ebx\n\t"
1398 : "=a" (xAX)
1399 : "0" (xAX)
1400 : "ecx", "edx", "memory");
1401# else
1402 __asm__ __volatile__ ("cpuid"
1403 : "=a" (xAX)
1404 : "0" (xAX)
1405 : "ebx", "ecx", "edx", "memory");
1406# endif
1407
1408# elif RT_INLINE_ASM_USES_INTRIN
1409 int aInfo[4];
1410 _ReadWriteBarrier();
1411 __cpuid(aInfo, 0);
1412
1413# else
1414 __asm
1415 {
1416 push ebx
1417 xor eax, eax
1418 cpuid
1419 pop ebx
1420 }
1421# endif
1422}
1423#endif
1424
1425/**
1426 * Virtualization friendly serializing instruction, though more expensive.
1427 */
1428#if RT_INLINE_ASM_EXTERNAL
1429DECLASM(void) ASMSerializeInstructionIRet(void);
1430#else
1431DECLINLINE(void) ASMSerializeInstructionIRet(void)
1432{
1433# if RT_INLINE_ASM_GNU_STYLE
1434# ifdef RT_ARCH_AMD64
1435 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
1436 "subq $128, %%rsp\n\t" /*redzone*/
1437 "mov %%ss, %%eax\n\t"
1438 "pushq %%rax\n\t"
1439 "pushq %%r10\n\t"
1440 "pushfq\n\t"
1441 "movl %%cs, %%eax\n\t"
1442 "pushq %%rax\n\t"
1443 "leaq 1f(%%rip), %%rax\n\t"
1444 "pushq %%rax\n\t"
1445 "iretq\n\t"
1446 "1:\n\t"
1447 ::: "rax", "r10", "memory");
1448# else
1449 __asm__ __volatile__ ("pushfl\n\t"
1450 "pushl %%cs\n\t"
1451 "pushl $1f\n\t"
1452 "iretl\n\t"
1453 "1:\n\t"
1454 ::: "memory");
1455# endif
1456
1457# else
1458 __asm
1459 {
1460 pushfd
1461 push cs
1462 push la_ret
1463 iretd
1464 la_ret:
1465 }
1466# endif
1467}
1468#endif
1469
1470/**
1471 * Virtualization friendlier serializing instruction, may still cause exits.
1472 */
1473#if RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < 15
1474DECLASM(void) ASMSerializeInstructionRdTscp(void);
1475#else
1476DECLINLINE(void) ASMSerializeInstructionRdTscp(void)
1477{
1478# if RT_INLINE_ASM_GNU_STYLE
1479 /* rdtscp is not supported by ancient linux build VM of course :-( */
1480# ifdef RT_ARCH_AMD64
1481 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
1482 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
1483# else
1484 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
1485 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
1486# endif
1487# else
1488# if RT_INLINE_ASM_USES_INTRIN >= 15
1489 uint32_t uIgnore;
1490 _ReadWriteBarrier();
1491 (void)__rdtscp(&uIgnore);
1492 (void)uIgnore;
1493# else
1494 __asm
1495 {
1496 rdtscp
1497 }
1498# endif
1499# endif
1500}
1501#endif
1502
1503
1504/**
1505 * Serialize Instruction.
1506 */
1507#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
1508# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
1509#else
1510# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
1511#endif
1512
1513
1514/**
1515 * Memory fence, waits for any pending writes and reads to complete.
1516 */
1517DECLINLINE(void) ASMMemoryFence(void)
1518{
1519 /** @todo use mfence? check if all cpus we care for support it. */
1520#if ARCH_BITS == 16
1521 uint16_t volatile u16;
1522 ASMAtomicXchgU16(&u16, 0);
1523#else
1524 uint32_t volatile u32;
1525 ASMAtomicXchgU32(&u32, 0);
1526#endif
1527}
1528
1529
1530/**
1531 * Write fence, waits for any pending writes to complete.
1532 */
1533DECLINLINE(void) ASMWriteFence(void)
1534{
1535 /** @todo use sfence? check if all cpus we care for support it. */
1536 ASMMemoryFence();
1537}
1538
1539
1540/**
1541 * Read fence, waits for any pending reads to complete.
1542 */
1543DECLINLINE(void) ASMReadFence(void)
1544{
1545 /** @todo use lfence? check if all cpus we care for support it. */
1546 ASMMemoryFence();
1547}
1548
1549
1550/**
1551 * Atomically reads an unsigned 8-bit value, ordered.
1552 *
1553 * @returns Current *pu8 value
1554 * @param pu8 Pointer to the 8-bit variable to read.
1555 */
1556DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
1557{
1558 ASMMemoryFence();
1559 return *pu8; /* byte reads are atomic on x86 */
1560}
1561
1562
1563/**
1564 * Atomically reads an unsigned 8-bit value, unordered.
1565 *
1566 * @returns Current *pu8 value
1567 * @param pu8 Pointer to the 8-bit variable to read.
1568 */
1569DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
1570{
1571 return *pu8; /* byte reads are atomic on x86 */
1572}
1573
1574
1575/**
1576 * Atomically reads a signed 8-bit value, ordered.
1577 *
1578 * @returns Current *pi8 value
1579 * @param pi8 Pointer to the 8-bit variable to read.
1580 */
1581DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
1582{
1583 ASMMemoryFence();
1584 return *pi8; /* byte reads are atomic on x86 */
1585}
1586
1587
1588/**
1589 * Atomically reads a signed 8-bit value, unordered.
1590 *
1591 * @returns Current *pi8 value
1592 * @param pi8 Pointer to the 8-bit variable to read.
1593 */
1594DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
1595{
1596 return *pi8; /* byte reads are atomic on x86 */
1597}
1598
1599
1600/**
1601 * Atomically reads an unsigned 16-bit value, ordered.
1602 *
1603 * @returns Current *pu16 value
1604 * @param pu16 Pointer to the 16-bit variable to read.
1605 */
1606DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
1607{
1608 ASMMemoryFence();
1609 Assert(!((uintptr_t)pu16 & 1));
1610 return *pu16;
1611}
1612
1613
1614/**
1615 * Atomically reads an unsigned 16-bit value, unordered.
1616 *
1617 * @returns Current *pu16 value
1618 * @param pu16 Pointer to the 16-bit variable to read.
1619 */
1620DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
1621{
1622 Assert(!((uintptr_t)pu16 & 1));
1623 return *pu16;
1624}
1625
1626
1627/**
1628 * Atomically reads a signed 16-bit value, ordered.
1629 *
1630 * @returns Current *pi16 value
1631 * @param pi16 Pointer to the 16-bit variable to read.
1632 */
1633DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
1634{
1635 ASMMemoryFence();
1636 Assert(!((uintptr_t)pi16 & 1));
1637 return *pi16;
1638}
1639
1640
1641/**
1642 * Atomically reads a signed 16-bit value, unordered.
1643 *
1644 * @returns Current *pi16 value
1645 * @param pi16 Pointer to the 16-bit variable to read.
1646 */
1647DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
1648{
1649 Assert(!((uintptr_t)pi16 & 1));
1650 return *pi16;
1651}
1652
1653
1654/**
1655 * Atomically reads an unsigned 32-bit value, ordered.
1656 *
1657 * @returns Current *pu32 value
1658 * @param pu32 Pointer to the 32-bit variable to read.
1659 */
1660DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
1661{
1662 ASMMemoryFence();
1663 Assert(!((uintptr_t)pu32 & 3));
1664#if ARCH_BITS == 16
1665 AssertFailed(); /** @todo 16-bit */
1666#endif
1667 return *pu32;
1668}
1669
1670
1671/**
1672 * Atomically reads an unsigned 32-bit value, unordered.
1673 *
1674 * @returns Current *pu32 value
1675 * @param pu32 Pointer to the 32-bit variable to read.
1676 */
1677DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
1678{
1679 Assert(!((uintptr_t)pu32 & 3));
1680#if ARCH_BITS == 16
1681 AssertFailed(); /** @todo 16-bit */
1682#endif
1683 return *pu32;
1684}
1685
1686
1687/**
1688 * Atomically reads a signed 32-bit value, ordered.
1689 *
1690 * @returns Current *pi32 value
1691 * @param pi32 Pointer to the 32-bit variable to read.
1692 */
1693DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
1694{
1695 ASMMemoryFence();
1696 Assert(!((uintptr_t)pi32 & 3));
1697#if ARCH_BITS == 16
1698 AssertFailed(); /** @todo 16-bit */
1699#endif
1700 return *pi32;
1701}
1702
1703
1704/**
1705 * Atomically reads a signed 32-bit value, unordered.
1706 *
1707 * @returns Current *pi32 value
1708 * @param pi32 Pointer to the 32-bit variable to read.
1709 */
1710DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
1711{
1712 Assert(!((uintptr_t)pi32 & 3));
1713#if ARCH_BITS == 16
1714 AssertFailed(); /** @todo 16-bit */
1715#endif
1716 return *pi32;
1717}
1718
1719
1720/**
1721 * Atomically reads an unsigned 64-bit value, ordered.
1722 *
1723 * @returns Current *pu64 value
1724 * @param pu64 Pointer to the 64-bit variable to read.
1725 * The memory pointed to must be writable.
1726 *
1727 * @remarks This may fault if the memory is read-only!
1728 * @remarks x86: Requires a Pentium or later.
1729 */
1730#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1731 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1732DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
1733#else
1734DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
1735{
1736 uint64_t u64;
1737# ifdef RT_ARCH_AMD64
1738 Assert(!((uintptr_t)pu64 & 7));
1739/*# if RT_INLINE_ASM_GNU_STYLE
1740 __asm__ __volatile__( "mfence\n\t"
1741 "movq %1, %0\n\t"
1742 : "=r" (u64)
1743 : "m" (*pu64));
1744# else
1745 __asm
1746 {
1747 mfence
1748 mov rdx, [pu64]
1749 mov rax, [rdx]
1750 mov [u64], rax
1751 }
1752# endif*/
1753 ASMMemoryFence();
1754 u64 = *pu64;
1755# else /* !RT_ARCH_AMD64 */
1756# if RT_INLINE_ASM_GNU_STYLE
1757# if defined(PIC) || defined(__PIC__)
1758 uint32_t u32EBX = 0;
1759 Assert(!((uintptr_t)pu64 & 7));
1760 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1761 "lock; cmpxchg8b (%5)\n\t"
1762 "movl %3, %%ebx\n\t"
1763 : "=A" (u64),
1764# if RT_GNUC_PREREQ(4, 3)
1765 "+m" (*pu64)
1766# else
1767 "=m" (*pu64)
1768# endif
1769 : "0" (0ULL),
1770 "m" (u32EBX),
1771 "c" (0),
1772 "S" (pu64));
1773# else /* !PIC */
1774 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1775 : "=A" (u64),
1776 "+m" (*pu64)
1777 : "0" (0ULL),
1778 "b" (0),
1779 "c" (0));
1780# endif
1781# else
1782 Assert(!((uintptr_t)pu64 & 7));
1783 __asm
1784 {
1785 xor eax, eax
1786 xor edx, edx
1787 mov edi, pu64
1788 xor ecx, ecx
1789 xor ebx, ebx
1790 lock cmpxchg8b [edi]
1791 mov dword ptr [u64], eax
1792 mov dword ptr [u64 + 4], edx
1793 }
1794# endif
1795# endif /* !RT_ARCH_AMD64 */
1796 return u64;
1797}
1798#endif
1799
1800
1801/**
1802 * Atomically reads an unsigned 64-bit value, unordered.
1803 *
1804 * @returns Current *pu64 value
1805 * @param pu64 Pointer to the 64-bit variable to read.
1806 * The memory pointed to must be writable.
1807 *
1808 * @remarks This may fault if the memory is read-only!
1809 * @remarks x86: Requires a Pentium or later.
1810 */
1811#if !defined(RT_ARCH_AMD64) \
1812 && ( (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1813 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
1814DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
1815#else
1816DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
1817{
1818 uint64_t u64;
1819# ifdef RT_ARCH_AMD64
1820 Assert(!((uintptr_t)pu64 & 7));
1821/*# if RT_INLINE_ASM_GNU_STYLE
1822 Assert(!((uintptr_t)pu64 & 7));
1823 __asm__ __volatile__("movq %1, %0\n\t"
1824 : "=r" (u64)
1825 : "m" (*pu64));
1826# else
1827 __asm
1828 {
1829 mov rdx, [pu64]
1830 mov rax, [rdx]
1831 mov [u64], rax
1832 }
1833# endif */
1834 u64 = *pu64;
1835# else /* !RT_ARCH_AMD64 */
1836# if RT_INLINE_ASM_GNU_STYLE
1837# if defined(PIC) || defined(__PIC__)
1838 uint32_t u32EBX = 0;
1839 uint32_t u32Spill;
1840 Assert(!((uintptr_t)pu64 & 7));
1841 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1842 "xor %%ecx,%%ecx\n\t"
1843 "xor %%edx,%%edx\n\t"
1844 "xchgl %%ebx, %3\n\t"
1845 "lock; cmpxchg8b (%4)\n\t"
1846 "movl %3, %%ebx\n\t"
1847 : "=A" (u64),
1848# if RT_GNUC_PREREQ(4, 3)
1849 "+m" (*pu64),
1850# else
1851 "=m" (*pu64),
1852# endif
1853 "=c" (u32Spill)
1854 : "m" (u32EBX),
1855 "S" (pu64));
1856# else /* !PIC */
1857 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1858 : "=A" (u64),
1859 "+m" (*pu64)
1860 : "0" (0ULL),
1861 "b" (0),
1862 "c" (0));
1863# endif
1864# else
1865 Assert(!((uintptr_t)pu64 & 7));
1866 __asm
1867 {
1868 xor eax, eax
1869 xor edx, edx
1870 mov edi, pu64
1871 xor ecx, ecx
1872 xor ebx, ebx
1873 lock cmpxchg8b [edi]
1874 mov dword ptr [u64], eax
1875 mov dword ptr [u64 + 4], edx
1876 }
1877# endif
1878# endif /* !RT_ARCH_AMD64 */
1879 return u64;
1880}
1881#endif
1882
1883
1884/**
1885 * Atomically reads a signed 64-bit value, ordered.
1886 *
1887 * @returns Current *pi64 value
1888 * @param pi64 Pointer to the 64-bit variable to read.
1889 * The memory pointed to must be writable.
1890 *
1891 * @remarks This may fault if the memory is read-only!
1892 * @remarks x86: Requires a Pentium or later.
1893 */
1894DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
1895{
1896 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
1897}
1898
1899
1900/**
1901 * Atomically reads a signed 64-bit value, unordered.
1902 *
1903 * @returns Current *pi64 value
1904 * @param pi64 Pointer to the 64-bit variable to read.
1905 * The memory pointed to must be writable.
1906 *
1907 * @remarks This will fault if the memory is read-only!
1908 * @remarks x86: Requires a Pentium or later.
1909 */
1910DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
1911{
1912 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
1913}
1914
1915
1916/**
1917 * Atomically reads a size_t value, ordered.
1918 *
1919 * @returns Current *pcb value
1920 * @param pcb Pointer to the size_t variable to read.
1921 */
1922DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile *pcb)
1923{
1924#if ARCH_BITS == 64
1925 return ASMAtomicReadU64((uint64_t volatile *)pcb);
1926#elif ARCH_BITS == 32
1927 return ASMAtomicReadU32((uint32_t volatile *)pcb);
1928#elif ARCH_BITS == 16
1929 AssertCompileSize(size_t, 2);
1930 return ASMAtomicReadU16((uint16_t volatile *)pcb);
1931#else
1932# error "Unsupported ARCH_BITS value"
1933#endif
1934}
1935
1936
1937/**
1938 * Atomically reads a size_t value, unordered.
1939 *
1940 * @returns Current *pcb value
1941 * @param pcb Pointer to the size_t variable to read.
1942 */
1943DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile *pcb)
1944{
1945#if ARCH_BITS == 64 || (ARCH_BITS == 16 && RT_FAR_DATA)
1946 return ASMAtomicUoReadU64((uint64_t volatile *)pcb);
1947#elif ARCH_BITS == 32
1948 return ASMAtomicUoReadU32((uint32_t volatile *)pcb);
1949#elif ARCH_BITS == 16
1950 AssertCompileSize(size_t, 2);
1951 return ASMAtomicUoReadU16((uint16_t volatile *)pcb);
1952#else
1953# error "Unsupported ARCH_BITS value"
1954#endif
1955}
1956
1957
1958/**
1959 * Atomically reads a pointer value, ordered.
1960 *
1961 * @returns Current *pv value
1962 * @param ppv Pointer to the pointer variable to read.
1963 *
1964 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
1965 * requires less typing (no casts).
1966 */
1967DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
1968{
1969#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1970 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
1971#elif ARCH_BITS == 64
1972 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
1973#else
1974# error "ARCH_BITS is bogus"
1975#endif
1976}
1977
1978/**
1979 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
1980 *
1981 * @returns Current *pv value
1982 * @param ppv Pointer to the pointer variable to read.
1983 * @param Type The type of *ppv, sans volatile.
1984 */
1985#ifdef __GNUC__
1986# define ASMAtomicReadPtrT(ppv, Type) \
1987 __extension__ \
1988 ({\
1989 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
1990 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
1991 pvTypeChecked; \
1992 })
1993#else
1994# define ASMAtomicReadPtrT(ppv, Type) \
1995 (Type)ASMAtomicReadPtr((void * volatile *)(ppv))
1996#endif
1997
1998
1999/**
2000 * Atomically reads a pointer value, unordered.
2001 *
2002 * @returns Current *pv value
2003 * @param ppv Pointer to the pointer variable to read.
2004 *
2005 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
2006 * requires less typing (no casts).
2007 */
2008DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
2009{
2010#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2011 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
2012#elif ARCH_BITS == 64
2013 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
2014#else
2015# error "ARCH_BITS is bogus"
2016#endif
2017}
2018
2019
2020/**
2021 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
2022 *
2023 * @returns Current *pv value
2024 * @param ppv Pointer to the pointer variable to read.
2025 * @param Type The type of *ppv, sans volatile.
2026 */
2027#ifdef __GNUC__
2028# define ASMAtomicUoReadPtrT(ppv, Type) \
2029 __extension__ \
2030 ({\
2031 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2032 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
2033 pvTypeChecked; \
2034 })
2035#else
2036# define ASMAtomicUoReadPtrT(ppv, Type) \
2037 (Type)ASMAtomicUoReadPtr((void * volatile *)(ppv))
2038#endif
2039
2040
2041/**
2042 * Atomically reads a boolean value, ordered.
2043 *
2044 * @returns Current *pf value
2045 * @param pf Pointer to the boolean variable to read.
2046 */
2047DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
2048{
2049 ASMMemoryFence();
2050 return *pf; /* byte reads are atomic on x86 */
2051}
2052
2053
2054/**
2055 * Atomically reads a boolean value, unordered.
2056 *
2057 * @returns Current *pf value
2058 * @param pf Pointer to the boolean variable to read.
2059 */
2060DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
2061{
2062 return *pf; /* byte reads are atomic on x86 */
2063}
2064
2065
2066/**
2067 * Atomically read a typical IPRT handle value, ordered.
2068 *
2069 * @param ph Pointer to the handle variable to read.
2070 * @param phRes Where to store the result.
2071 *
2072 * @remarks This doesn't currently work for all handles (like RTFILE).
2073 */
2074#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2075# define ASMAtomicReadHandle(ph, phRes) \
2076 do { \
2077 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2078 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2079 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
2080 } while (0)
2081#elif HC_ARCH_BITS == 64
2082# define ASMAtomicReadHandle(ph, phRes) \
2083 do { \
2084 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2085 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2086 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
2087 } while (0)
2088#else
2089# error HC_ARCH_BITS
2090#endif
2091
2092
2093/**
2094 * Atomically read a typical IPRT handle value, unordered.
2095 *
2096 * @param ph Pointer to the handle variable to read.
2097 * @param phRes Where to store the result.
2098 *
2099 * @remarks This doesn't currently work for all handles (like RTFILE).
2100 */
2101#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2102# define ASMAtomicUoReadHandle(ph, phRes) \
2103 do { \
2104 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2105 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2106 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
2107 } while (0)
2108#elif HC_ARCH_BITS == 64
2109# define ASMAtomicUoReadHandle(ph, phRes) \
2110 do { \
2111 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2112 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2113 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
2114 } while (0)
2115#else
2116# error HC_ARCH_BITS
2117#endif
2118
2119
2120/**
2121 * Atomically read a value which size might differ
2122 * between platforms or compilers, ordered.
2123 *
2124 * @param pu Pointer to the variable to read.
2125 * @param puRes Where to store the result.
2126 */
2127#define ASMAtomicReadSize(pu, puRes) \
2128 do { \
2129 switch (sizeof(*(pu))) { \
2130 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
2131 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
2132 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
2133 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
2134 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2135 } \
2136 } while (0)
2137
2138
2139/**
2140 * Atomically read a value which size might differ
2141 * between platforms or compilers, unordered.
2142 *
2143 * @param pu Pointer to the variable to read.
2144 * @param puRes Where to store the result.
2145 */
2146#define ASMAtomicUoReadSize(pu, puRes) \
2147 do { \
2148 switch (sizeof(*(pu))) { \
2149 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
2150 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
2151 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
2152 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
2153 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2154 } \
2155 } while (0)
2156
2157
2158/**
2159 * Atomically writes an unsigned 8-bit value, ordered.
2160 *
2161 * @param pu8 Pointer to the 8-bit variable.
2162 * @param u8 The 8-bit value to assign to *pu8.
2163 */
2164DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
2165{
2166 ASMAtomicXchgU8(pu8, u8);
2167}
2168
2169
2170/**
2171 * Atomically writes an unsigned 8-bit value, unordered.
2172 *
2173 * @param pu8 Pointer to the 8-bit variable.
2174 * @param u8 The 8-bit value to assign to *pu8.
2175 */
2176DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
2177{
2178 *pu8 = u8; /* byte writes are atomic on x86 */
2179}
2180
2181
2182/**
2183 * Atomically writes a signed 8-bit value, ordered.
2184 *
2185 * @param pi8 Pointer to the 8-bit variable to read.
2186 * @param i8 The 8-bit value to assign to *pi8.
2187 */
2188DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
2189{
2190 ASMAtomicXchgS8(pi8, i8);
2191}
2192
2193
2194/**
2195 * Atomically writes a signed 8-bit value, unordered.
2196 *
2197 * @param pi8 Pointer to the 8-bit variable to write.
2198 * @param i8 The 8-bit value to assign to *pi8.
2199 */
2200DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
2201{
2202 *pi8 = i8; /* byte writes are atomic on x86 */
2203}
2204
2205
2206/**
2207 * Atomically writes an unsigned 16-bit value, ordered.
2208 *
2209 * @param pu16 Pointer to the 16-bit variable to write.
2210 * @param u16 The 16-bit value to assign to *pu16.
2211 */
2212DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
2213{
2214 ASMAtomicXchgU16(pu16, u16);
2215}
2216
2217
2218/**
2219 * Atomically writes an unsigned 16-bit value, unordered.
2220 *
2221 * @param pu16 Pointer to the 16-bit variable to write.
2222 * @param u16 The 16-bit value to assign to *pu16.
2223 */
2224DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
2225{
2226 Assert(!((uintptr_t)pu16 & 1));
2227 *pu16 = u16;
2228}
2229
2230
2231/**
2232 * Atomically writes a signed 16-bit value, ordered.
2233 *
2234 * @param pi16 Pointer to the 16-bit variable to write.
2235 * @param i16 The 16-bit value to assign to *pi16.
2236 */
2237DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
2238{
2239 ASMAtomicXchgS16(pi16, i16);
2240}
2241
2242
2243/**
2244 * Atomically writes a signed 16-bit value, unordered.
2245 *
2246 * @param pi16 Pointer to the 16-bit variable to write.
2247 * @param i16 The 16-bit value to assign to *pi16.
2248 */
2249DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
2250{
2251 Assert(!((uintptr_t)pi16 & 1));
2252 *pi16 = i16;
2253}
2254
2255
2256/**
2257 * Atomically writes an unsigned 32-bit value, ordered.
2258 *
2259 * @param pu32 Pointer to the 32-bit variable to write.
2260 * @param u32 The 32-bit value to assign to *pu32.
2261 */
2262DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
2263{
2264 ASMAtomicXchgU32(pu32, u32);
2265}
2266
2267
2268/**
2269 * Atomically writes an unsigned 32-bit value, unordered.
2270 *
2271 * @param pu32 Pointer to the 32-bit variable to write.
2272 * @param u32 The 32-bit value to assign to *pu32.
2273 */
2274DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
2275{
2276 Assert(!((uintptr_t)pu32 & 3));
2277#if ARCH_BITS >= 32
2278 *pu32 = u32;
2279#else
2280 ASMAtomicXchgU32(pu32, u32);
2281#endif
2282}
2283
2284
2285/**
2286 * Atomically writes a signed 32-bit value, ordered.
2287 *
2288 * @param pi32 Pointer to the 32-bit variable to write.
2289 * @param i32 The 32-bit value to assign to *pi32.
2290 */
2291DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
2292{
2293 ASMAtomicXchgS32(pi32, i32);
2294}
2295
2296
2297/**
2298 * Atomically writes a signed 32-bit value, unordered.
2299 *
2300 * @param pi32 Pointer to the 32-bit variable to write.
2301 * @param i32 The 32-bit value to assign to *pi32.
2302 */
2303DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
2304{
2305 Assert(!((uintptr_t)pi32 & 3));
2306#if ARCH_BITS >= 32
2307 *pi32 = i32;
2308#else
2309 ASMAtomicXchgS32(pi32, i32);
2310#endif
2311}
2312
2313
2314/**
2315 * Atomically writes an unsigned 64-bit value, ordered.
2316 *
2317 * @param pu64 Pointer to the 64-bit variable to write.
2318 * @param u64 The 64-bit value to assign to *pu64.
2319 */
2320DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
2321{
2322 ASMAtomicXchgU64(pu64, u64);
2323}
2324
2325
2326/**
2327 * Atomically writes an unsigned 64-bit value, unordered.
2328 *
2329 * @param pu64 Pointer to the 64-bit variable to write.
2330 * @param u64 The 64-bit value to assign to *pu64.
2331 */
2332DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
2333{
2334 Assert(!((uintptr_t)pu64 & 7));
2335#if ARCH_BITS == 64
2336 *pu64 = u64;
2337#else
2338 ASMAtomicXchgU64(pu64, u64);
2339#endif
2340}
2341
2342
2343/**
2344 * Atomically writes a signed 64-bit value, ordered.
2345 *
2346 * @param pi64 Pointer to the 64-bit variable to write.
2347 * @param i64 The 64-bit value to assign to *pi64.
2348 */
2349DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
2350{
2351 ASMAtomicXchgS64(pi64, i64);
2352}
2353
2354
2355/**
2356 * Atomically writes a signed 64-bit value, unordered.
2357 *
2358 * @param pi64 Pointer to the 64-bit variable to write.
2359 * @param i64 The 64-bit value to assign to *pi64.
2360 */
2361DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
2362{
2363 Assert(!((uintptr_t)pi64 & 7));
2364#if ARCH_BITS == 64
2365 *pi64 = i64;
2366#else
2367 ASMAtomicXchgS64(pi64, i64);
2368#endif
2369}
2370
2371
2372/**
2373 * Atomically writes a boolean value, unordered.
2374 *
2375 * @param pf Pointer to the boolean variable to write.
2376 * @param f The boolean value to assign to *pf.
2377 */
2378DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
2379{
2380 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
2381}
2382
2383
2384/**
2385 * Atomically writes a boolean value, unordered.
2386 *
2387 * @param pf Pointer to the boolean variable to write.
2388 * @param f The boolean value to assign to *pf.
2389 */
2390DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
2391{
2392 *pf = f; /* byte writes are atomic on x86 */
2393}
2394
2395
2396/**
2397 * Atomically writes a pointer value, ordered.
2398 *
2399 * @param ppv Pointer to the pointer variable to write.
2400 * @param pv The pointer value to assign to *ppv.
2401 */
2402DECLINLINE(void) ASMAtomicWritePtrVoid(void * volatile *ppv, const void *pv)
2403{
2404#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2405 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2406#elif ARCH_BITS == 64
2407 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2408#else
2409# error "ARCH_BITS is bogus"
2410#endif
2411}
2412
2413
2414/**
2415 * Atomically writes a pointer value, ordered.
2416 *
2417 * @param ppv Pointer to the pointer variable to write.
2418 * @param pv The pointer value to assign to *ppv. If NULL use
2419 * ASMAtomicWriteNullPtr or you'll land in trouble.
2420 *
2421 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2422 * NULL.
2423 */
2424#ifdef __GNUC__
2425# define ASMAtomicWritePtr(ppv, pv) \
2426 do \
2427 { \
2428 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2429 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2430 \
2431 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2432 AssertCompile(sizeof(pv) == sizeof(void *)); \
2433 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2434 \
2435 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), (void *)(pvTypeChecked)); \
2436 } while (0)
2437#else
2438# define ASMAtomicWritePtr(ppv, pv) \
2439 do \
2440 { \
2441 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2442 AssertCompile(sizeof(pv) == sizeof(void *)); \
2443 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2444 \
2445 ASMAtomicWritePtrVoid((void * volatile *)(ppv), (void *)(pv)); \
2446 } while (0)
2447#endif
2448
2449
2450/**
2451 * Atomically sets a pointer to NULL, ordered.
2452 *
2453 * @param ppv Pointer to the pointer variable that should be set to NULL.
2454 *
2455 * @remarks This is relatively type safe on GCC platforms.
2456 */
2457#ifdef __GNUC__
2458# define ASMAtomicWriteNullPtr(ppv) \
2459 do \
2460 { \
2461 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
2462 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2463 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2464 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), NULL); \
2465 } while (0)
2466#else
2467# define ASMAtomicWriteNullPtr(ppv) \
2468 do \
2469 { \
2470 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2471 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2472 ASMAtomicWritePtrVoid((void * volatile *)(ppv), NULL); \
2473 } while (0)
2474#endif
2475
2476
2477/**
2478 * Atomically writes a pointer value, unordered.
2479 *
2480 * @returns Current *pv value
2481 * @param ppv Pointer to the pointer variable.
2482 * @param pv The pointer value to assign to *ppv. If NULL use
2483 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2484 *
2485 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2486 * NULL.
2487 */
2488#ifdef __GNUC__
2489# define ASMAtomicUoWritePtr(ppv, pv) \
2490 do \
2491 { \
2492 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2493 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2494 \
2495 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2496 AssertCompile(sizeof(pv) == sizeof(void *)); \
2497 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2498 \
2499 *(ppvTypeChecked) = pvTypeChecked; \
2500 } while (0)
2501#else
2502# define ASMAtomicUoWritePtr(ppv, pv) \
2503 do \
2504 { \
2505 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2506 AssertCompile(sizeof(pv) == sizeof(void *)); \
2507 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2508 *(ppv) = pv; \
2509 } while (0)
2510#endif
2511
2512
2513/**
2514 * Atomically sets a pointer to NULL, unordered.
2515 *
2516 * @param ppv Pointer to the pointer variable that should be set to NULL.
2517 *
2518 * @remarks This is relatively type safe on GCC platforms.
2519 */
2520#ifdef __GNUC__
2521# define ASMAtomicUoWriteNullPtr(ppv) \
2522 do \
2523 { \
2524 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2525 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2526 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2527 *(ppvTypeChecked) = NULL; \
2528 } while (0)
2529#else
2530# define ASMAtomicUoWriteNullPtr(ppv) \
2531 do \
2532 { \
2533 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2534 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2535 *(ppv) = NULL; \
2536 } while (0)
2537#endif
2538
2539
2540/**
2541 * Atomically write a typical IPRT handle value, ordered.
2542 *
2543 * @param ph Pointer to the variable to update.
2544 * @param hNew The value to assign to *ph.
2545 *
2546 * @remarks This doesn't currently work for all handles (like RTFILE).
2547 */
2548#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2549# define ASMAtomicWriteHandle(ph, hNew) \
2550 do { \
2551 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2552 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2553 } while (0)
2554#elif HC_ARCH_BITS == 64
2555# define ASMAtomicWriteHandle(ph, hNew) \
2556 do { \
2557 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2558 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
2559 } while (0)
2560#else
2561# error HC_ARCH_BITS
2562#endif
2563
2564
2565/**
2566 * Atomically write a typical IPRT handle value, unordered.
2567 *
2568 * @param ph Pointer to the variable to update.
2569 * @param hNew The value to assign to *ph.
2570 *
2571 * @remarks This doesn't currently work for all handles (like RTFILE).
2572 */
2573#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2574# define ASMAtomicUoWriteHandle(ph, hNew) \
2575 do { \
2576 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2577 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
2578 } while (0)
2579#elif HC_ARCH_BITS == 64
2580# define ASMAtomicUoWriteHandle(ph, hNew) \
2581 do { \
2582 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2583 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
2584 } while (0)
2585#else
2586# error HC_ARCH_BITS
2587#endif
2588
2589
2590/**
2591 * Atomically write a value which size might differ
2592 * between platforms or compilers, ordered.
2593 *
2594 * @param pu Pointer to the variable to update.
2595 * @param uNew The value to assign to *pu.
2596 */
2597#define ASMAtomicWriteSize(pu, uNew) \
2598 do { \
2599 switch (sizeof(*(pu))) { \
2600 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2601 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2602 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2603 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2604 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2605 } \
2606 } while (0)
2607
2608/**
2609 * Atomically write a value which size might differ
2610 * between platforms or compilers, unordered.
2611 *
2612 * @param pu Pointer to the variable to update.
2613 * @param uNew The value to assign to *pu.
2614 */
2615#define ASMAtomicUoWriteSize(pu, uNew) \
2616 do { \
2617 switch (sizeof(*(pu))) { \
2618 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2619 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2620 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2621 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2622 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2623 } \
2624 } while (0)
2625
2626
2627
2628/**
2629 * Atomically exchanges and adds to a 16-bit value, ordered.
2630 *
2631 * @returns The old value.
2632 * @param pu16 Pointer to the value.
2633 * @param u16 Number to add.
2634 *
2635 * @remarks Currently not implemented, just to make 16-bit code happy.
2636 * @remarks x86: Requires a 486 or later.
2637 */
2638DECLASM(uint16_t) ASMAtomicAddU16(uint16_t volatile *pu16, uint32_t u16);
2639
2640
2641/**
2642 * Atomically exchanges and adds to a 32-bit value, ordered.
2643 *
2644 * @returns The old value.
2645 * @param pu32 Pointer to the value.
2646 * @param u32 Number to add.
2647 *
2648 * @remarks x86: Requires a 486 or later.
2649 */
2650#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2651DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
2652#else
2653DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
2654{
2655# if RT_INLINE_ASM_USES_INTRIN
2656 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
2657 return u32;
2658
2659# elif RT_INLINE_ASM_GNU_STYLE
2660 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2661 : "=r" (u32),
2662 "=m" (*pu32)
2663 : "0" (u32),
2664 "m" (*pu32)
2665 : "memory");
2666 return u32;
2667# else
2668 __asm
2669 {
2670 mov eax, [u32]
2671# ifdef RT_ARCH_AMD64
2672 mov rdx, [pu32]
2673 lock xadd [rdx], eax
2674# else
2675 mov edx, [pu32]
2676 lock xadd [edx], eax
2677# endif
2678 mov [u32], eax
2679 }
2680 return u32;
2681# endif
2682}
2683#endif
2684
2685
2686/**
2687 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2688 *
2689 * @returns The old value.
2690 * @param pi32 Pointer to the value.
2691 * @param i32 Number to add.
2692 *
2693 * @remarks x86: Requires a 486 or later.
2694 */
2695DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
2696{
2697 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
2698}
2699
2700
2701/**
2702 * Atomically exchanges and adds to a 64-bit value, ordered.
2703 *
2704 * @returns The old value.
2705 * @param pu64 Pointer to the value.
2706 * @param u64 Number to add.
2707 *
2708 * @remarks x86: Requires a Pentium or later.
2709 */
2710#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2711DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64);
2712#else
2713DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64)
2714{
2715# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2716 u64 = _InterlockedExchangeAdd64((__int64 *)pu64, u64);
2717 return u64;
2718
2719# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2720 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2721 : "=r" (u64),
2722 "=m" (*pu64)
2723 : "0" (u64),
2724 "m" (*pu64)
2725 : "memory");
2726 return u64;
2727# else
2728 uint64_t u64Old;
2729 for (;;)
2730 {
2731 uint64_t u64New;
2732 u64Old = ASMAtomicUoReadU64(pu64);
2733 u64New = u64Old + u64;
2734 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2735 break;
2736 ASMNopPause();
2737 }
2738 return u64Old;
2739# endif
2740}
2741#endif
2742
2743
2744/**
2745 * Atomically exchanges and adds to a signed 64-bit value, ordered.
2746 *
2747 * @returns The old value.
2748 * @param pi64 Pointer to the value.
2749 * @param i64 Number to add.
2750 *
2751 * @remarks x86: Requires a Pentium or later.
2752 */
2753DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile *pi64, int64_t i64)
2754{
2755 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)i64);
2756}
2757
2758
2759/**
2760 * Atomically exchanges and adds to a size_t value, ordered.
2761 *
2762 * @returns The old value.
2763 * @param pcb Pointer to the size_t value.
2764 * @param cb Number to add.
2765 */
2766DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile *pcb, size_t cb)
2767{
2768#if ARCH_BITS == 64
2769 AssertCompileSize(size_t, 8);
2770 return ASMAtomicAddU64((uint64_t volatile *)pcb, cb);
2771#elif ARCH_BITS == 32
2772 AssertCompileSize(size_t, 4);
2773 return ASMAtomicAddU32((uint32_t volatile *)pcb, cb);
2774#elif ARCH_BITS == 16
2775 AssertCompileSize(size_t, 2);
2776 return ASMAtomicAddU16((uint16_t volatile *)pcb, cb);
2777#else
2778# error "Unsupported ARCH_BITS value"
2779#endif
2780}
2781
2782
2783/**
2784 * Atomically exchanges and adds a value which size might differ between
2785 * platforms or compilers, ordered.
2786 *
2787 * @param pu Pointer to the variable to update.
2788 * @param uNew The value to add to *pu.
2789 * @param puOld Where to store the old value.
2790 */
2791#define ASMAtomicAddSize(pu, uNew, puOld) \
2792 do { \
2793 switch (sizeof(*(pu))) { \
2794 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2795 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2796 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
2797 } \
2798 } while (0)
2799
2800
2801
2802/**
2803 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
2804 *
2805 * @returns The old value.
2806 * @param pu16 Pointer to the value.
2807 * @param u16 Number to subtract.
2808 *
2809 * @remarks x86: Requires a 486 or later.
2810 */
2811DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile *pu16, uint32_t u16)
2812{
2813 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
2814}
2815
2816
2817/**
2818 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
2819 *
2820 * @returns The old value.
2821 * @param pi16 Pointer to the value.
2822 * @param i16 Number to subtract.
2823 *
2824 * @remarks x86: Requires a 486 or later.
2825 */
2826DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile *pi16, int16_t i16)
2827{
2828 return (int16_t)ASMAtomicAddU16((uint16_t volatile *)pi16, (uint16_t)-i16);
2829}
2830
2831
2832/**
2833 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
2834 *
2835 * @returns The old value.
2836 * @param pu32 Pointer to the value.
2837 * @param u32 Number to subtract.
2838 *
2839 * @remarks x86: Requires a 486 or later.
2840 */
2841DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile *pu32, uint32_t u32)
2842{
2843 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
2844}
2845
2846
2847/**
2848 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
2849 *
2850 * @returns The old value.
2851 * @param pi32 Pointer to the value.
2852 * @param i32 Number to subtract.
2853 *
2854 * @remarks x86: Requires a 486 or later.
2855 */
2856DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
2857{
2858 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
2859}
2860
2861
2862/**
2863 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
2864 *
2865 * @returns The old value.
2866 * @param pu64 Pointer to the value.
2867 * @param u64 Number to subtract.
2868 *
2869 * @remarks x86: Requires a Pentium or later.
2870 */
2871DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile *pu64, uint64_t u64)
2872{
2873 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
2874}
2875
2876
2877/**
2878 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
2879 *
2880 * @returns The old value.
2881 * @param pi64 Pointer to the value.
2882 * @param i64 Number to subtract.
2883 *
2884 * @remarks x86: Requires a Pentium or later.
2885 */
2886DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile *pi64, int64_t i64)
2887{
2888 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)-i64);
2889}
2890
2891
2892/**
2893 * Atomically exchanges and subtracts to a size_t value, ordered.
2894 *
2895 * @returns The old value.
2896 * @param pcb Pointer to the size_t value.
2897 * @param cb Number to subtract.
2898 *
2899 * @remarks x86: Requires a 486 or later.
2900 */
2901DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile *pcb, size_t cb)
2902{
2903#if ARCH_BITS == 64
2904 return ASMAtomicSubU64((uint64_t volatile *)pcb, cb);
2905#elif ARCH_BITS == 32
2906 return ASMAtomicSubU32((uint32_t volatile *)pcb, cb);
2907#elif ARCH_BITS == 16
2908 AssertCompileSize(size_t, 2);
2909 return ASMAtomicSubU16((uint16_t volatile *)pcb, cb);
2910#else
2911# error "Unsupported ARCH_BITS value"
2912#endif
2913}
2914
2915
2916/**
2917 * Atomically exchanges and subtracts a value which size might differ between
2918 * platforms or compilers, ordered.
2919 *
2920 * @param pu Pointer to the variable to update.
2921 * @param uNew The value to subtract to *pu.
2922 * @param puOld Where to store the old value.
2923 *
2924 * @remarks x86: Requires a 486 or later.
2925 */
2926#define ASMAtomicSubSize(pu, uNew, puOld) \
2927 do { \
2928 switch (sizeof(*(pu))) { \
2929 case 4: *(uint32_t *)(puOld) = ASMAtomicSubU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2930 case 8: *(uint64_t *)(puOld) = ASMAtomicSubU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2931 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
2932 } \
2933 } while (0)
2934
2935
2936
2937/**
2938 * Atomically increment a 16-bit value, ordered.
2939 *
2940 * @returns The new value.
2941 * @param pu16 Pointer to the value to increment.
2942 * @remarks Not implemented. Just to make 16-bit code happy.
2943 *
2944 * @remarks x86: Requires a 486 or later.
2945 */
2946DECLASM(uint16_t) ASMAtomicIncU16(uint16_t volatile *pu16);
2947
2948
2949/**
2950 * Atomically increment a 32-bit value, ordered.
2951 *
2952 * @returns The new value.
2953 * @param pu32 Pointer to the value to increment.
2954 *
2955 * @remarks x86: Requires a 486 or later.
2956 */
2957#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2958DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2959#else
2960DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2961{
2962 uint32_t u32;
2963# if RT_INLINE_ASM_USES_INTRIN
2964 u32 = _InterlockedIncrement((long *)pu32);
2965 return u32;
2966
2967# elif RT_INLINE_ASM_GNU_STYLE
2968 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2969 : "=r" (u32),
2970 "=m" (*pu32)
2971 : "0" (1),
2972 "m" (*pu32)
2973 : "memory");
2974 return u32+1;
2975# else
2976 __asm
2977 {
2978 mov eax, 1
2979# ifdef RT_ARCH_AMD64
2980 mov rdx, [pu32]
2981 lock xadd [rdx], eax
2982# else
2983 mov edx, [pu32]
2984 lock xadd [edx], eax
2985# endif
2986 mov u32, eax
2987 }
2988 return u32+1;
2989# endif
2990}
2991#endif
2992
2993
2994/**
2995 * Atomically increment a signed 32-bit value, ordered.
2996 *
2997 * @returns The new value.
2998 * @param pi32 Pointer to the value to increment.
2999 *
3000 * @remarks x86: Requires a 486 or later.
3001 */
3002DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
3003{
3004 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
3005}
3006
3007
3008/**
3009 * Atomically increment a 64-bit value, ordered.
3010 *
3011 * @returns The new value.
3012 * @param pu64 Pointer to the value to increment.
3013 *
3014 * @remarks x86: Requires a Pentium or later.
3015 */
3016#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3017DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64);
3018#else
3019DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64)
3020{
3021# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3022 uint64_t u64;
3023 u64 = _InterlockedIncrement64((__int64 *)pu64);
3024 return u64;
3025
3026# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3027 uint64_t u64;
3028 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
3029 : "=r" (u64),
3030 "=m" (*pu64)
3031 : "0" (1),
3032 "m" (*pu64)
3033 : "memory");
3034 return u64 + 1;
3035# else
3036 return ASMAtomicAddU64(pu64, 1) + 1;
3037# endif
3038}
3039#endif
3040
3041
3042/**
3043 * Atomically increment a signed 64-bit value, ordered.
3044 *
3045 * @returns The new value.
3046 * @param pi64 Pointer to the value to increment.
3047 *
3048 * @remarks x86: Requires a Pentium or later.
3049 */
3050DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile *pi64)
3051{
3052 return (int64_t)ASMAtomicIncU64((uint64_t volatile *)pi64);
3053}
3054
3055
3056/**
3057 * Atomically increment a size_t value, ordered.
3058 *
3059 * @returns The new value.
3060 * @param pcb Pointer to the value to increment.
3061 *
3062 * @remarks x86: Requires a 486 or later.
3063 */
3064DECLINLINE(int64_t) ASMAtomicIncZ(size_t volatile *pcb)
3065{
3066#if ARCH_BITS == 64
3067 return ASMAtomicIncU64((uint64_t volatile *)pcb);
3068#elif ARCH_BITS == 32
3069 return ASMAtomicIncU32((uint32_t volatile *)pcb);
3070#elif ARCH_BITS == 16
3071 return ASMAtomicIncU16((uint16_t volatile *)pcb);
3072#else
3073# error "Unsupported ARCH_BITS value"
3074#endif
3075}
3076
3077
3078
3079/**
3080 * Atomically decrement an unsigned 32-bit value, ordered.
3081 *
3082 * @returns The new value.
3083 * @param pu16 Pointer to the value to decrement.
3084 * @remarks Not implemented. Just to make 16-bit code happy.
3085 *
3086 * @remarks x86: Requires a 486 or later.
3087 */
3088DECLASM(uint32_t) ASMAtomicDecU16(uint16_t volatile *pu16);
3089
3090
3091/**
3092 * Atomically decrement an unsigned 32-bit value, ordered.
3093 *
3094 * @returns The new value.
3095 * @param pu32 Pointer to the value to decrement.
3096 *
3097 * @remarks x86: Requires a 486 or later.
3098 */
3099#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3100DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3101#else
3102DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3103{
3104 uint32_t u32;
3105# if RT_INLINE_ASM_USES_INTRIN
3106 u32 = _InterlockedDecrement((long *)pu32);
3107 return u32;
3108
3109# elif RT_INLINE_ASM_GNU_STYLE
3110 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3111 : "=r" (u32),
3112 "=m" (*pu32)
3113 : "0" (-1),
3114 "m" (*pu32)
3115 : "memory");
3116 return u32-1;
3117# else
3118 __asm
3119 {
3120 mov eax, -1
3121# ifdef RT_ARCH_AMD64
3122 mov rdx, [pu32]
3123 lock xadd [rdx], eax
3124# else
3125 mov edx, [pu32]
3126 lock xadd [edx], eax
3127# endif
3128 mov u32, eax
3129 }
3130 return u32-1;
3131# endif
3132}
3133#endif
3134
3135
3136/**
3137 * Atomically decrement a signed 32-bit value, ordered.
3138 *
3139 * @returns The new value.
3140 * @param pi32 Pointer to the value to decrement.
3141 *
3142 * @remarks x86: Requires a 486 or later.
3143 */
3144DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3145{
3146 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3147}
3148
3149
3150/**
3151 * Atomically decrement an unsigned 64-bit value, ordered.
3152 *
3153 * @returns The new value.
3154 * @param pu64 Pointer to the value to decrement.
3155 *
3156 * @remarks x86: Requires a Pentium or later.
3157 */
3158#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3159DECLASM(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64);
3160#else
3161DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64)
3162{
3163# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3164 uint64_t u64 = _InterlockedDecrement64((__int64 volatile *)pu64);
3165 return u64;
3166
3167# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3168 uint64_t u64;
3169 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
3170 : "=r" (u64),
3171 "=m" (*pu64)
3172 : "0" (~(uint64_t)0),
3173 "m" (*pu64)
3174 : "memory");
3175 return u64-1;
3176# else
3177 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
3178# endif
3179}
3180#endif
3181
3182
3183/**
3184 * Atomically decrement a signed 64-bit value, ordered.
3185 *
3186 * @returns The new value.
3187 * @param pi64 Pointer to the value to decrement.
3188 *
3189 * @remarks x86: Requires a Pentium or later.
3190 */
3191DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile *pi64)
3192{
3193 return (int64_t)ASMAtomicDecU64((uint64_t volatile *)pi64);
3194}
3195
3196
3197/**
3198 * Atomically decrement a size_t value, ordered.
3199 *
3200 * @returns The new value.
3201 * @param pcb Pointer to the value to decrement.
3202 *
3203 * @remarks x86: Requires a 486 or later.
3204 */
3205DECLINLINE(int64_t) ASMAtomicDecZ(size_t volatile *pcb)
3206{
3207#if ARCH_BITS == 64
3208 return ASMAtomicDecU64((uint64_t volatile *)pcb);
3209#elif ARCH_BITS == 32
3210 return ASMAtomicDecU32((uint32_t volatile *)pcb);
3211#elif ARCH_BITS == 16
3212 return ASMAtomicDecU16((uint16_t volatile *)pcb);
3213#else
3214# error "Unsupported ARCH_BITS value"
3215#endif
3216}
3217
3218
3219/**
3220 * Atomically Or an unsigned 32-bit value, ordered.
3221 *
3222 * @param pu32 Pointer to the pointer variable to OR u32 with.
3223 * @param u32 The value to OR *pu32 with.
3224 *
3225 * @remarks x86: Requires a 386 or later.
3226 */
3227#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3228DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3229#else
3230DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3231{
3232# if RT_INLINE_ASM_USES_INTRIN
3233 _InterlockedOr((long volatile *)pu32, (long)u32);
3234
3235# elif RT_INLINE_ASM_GNU_STYLE
3236 __asm__ __volatile__("lock; orl %1, %0\n\t"
3237 : "=m" (*pu32)
3238 : "ir" (u32),
3239 "m" (*pu32));
3240# else
3241 __asm
3242 {
3243 mov eax, [u32]
3244# ifdef RT_ARCH_AMD64
3245 mov rdx, [pu32]
3246 lock or [rdx], eax
3247# else
3248 mov edx, [pu32]
3249 lock or [edx], eax
3250# endif
3251 }
3252# endif
3253}
3254#endif
3255
3256
3257/**
3258 * Atomically Or a signed 32-bit value, ordered.
3259 *
3260 * @param pi32 Pointer to the pointer variable to OR u32 with.
3261 * @param i32 The value to OR *pu32 with.
3262 *
3263 * @remarks x86: Requires a 386 or later.
3264 */
3265DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3266{
3267 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3268}
3269
3270
3271/**
3272 * Atomically Or an unsigned 64-bit value, ordered.
3273 *
3274 * @param pu64 Pointer to the pointer variable to OR u64 with.
3275 * @param u64 The value to OR *pu64 with.
3276 *
3277 * @remarks x86: Requires a Pentium or later.
3278 */
3279#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3280DECLASM(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64);
3281#else
3282DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64)
3283{
3284# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3285 _InterlockedOr64((__int64 volatile *)pu64, (__int64)u64);
3286
3287# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3288 __asm__ __volatile__("lock; orq %1, %q0\n\t"
3289 : "=m" (*pu64)
3290 : "r" (u64),
3291 "m" (*pu64));
3292# else
3293 for (;;)
3294 {
3295 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3296 uint64_t u64New = u64Old | u64;
3297 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3298 break;
3299 ASMNopPause();
3300 }
3301# endif
3302}
3303#endif
3304
3305
3306/**
3307 * Atomically Or a signed 64-bit value, ordered.
3308 *
3309 * @param pi64 Pointer to the pointer variable to OR u64 with.
3310 * @param i64 The value to OR *pu64 with.
3311 *
3312 * @remarks x86: Requires a Pentium or later.
3313 */
3314DECLINLINE(void) ASMAtomicOrS64(int64_t volatile *pi64, int64_t i64)
3315{
3316 ASMAtomicOrU64((uint64_t volatile *)pi64, i64);
3317}
3318
3319
3320/**
3321 * Atomically And an unsigned 32-bit value, ordered.
3322 *
3323 * @param pu32 Pointer to the pointer variable to AND u32 with.
3324 * @param u32 The value to AND *pu32 with.
3325 *
3326 * @remarks x86: Requires a 386 or later.
3327 */
3328#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3329DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3330#else
3331DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3332{
3333# if RT_INLINE_ASM_USES_INTRIN
3334 _InterlockedAnd((long volatile *)pu32, u32);
3335
3336# elif RT_INLINE_ASM_GNU_STYLE
3337 __asm__ __volatile__("lock; andl %1, %0\n\t"
3338 : "=m" (*pu32)
3339 : "ir" (u32),
3340 "m" (*pu32));
3341# else
3342 __asm
3343 {
3344 mov eax, [u32]
3345# ifdef RT_ARCH_AMD64
3346 mov rdx, [pu32]
3347 lock and [rdx], eax
3348# else
3349 mov edx, [pu32]
3350 lock and [edx], eax
3351# endif
3352 }
3353# endif
3354}
3355#endif
3356
3357
3358/**
3359 * Atomically And a signed 32-bit value, ordered.
3360 *
3361 * @param pi32 Pointer to the pointer variable to AND i32 with.
3362 * @param i32 The value to AND *pi32 with.
3363 *
3364 * @remarks x86: Requires a 386 or later.
3365 */
3366DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3367{
3368 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3369}
3370
3371
3372/**
3373 * Atomically And an unsigned 64-bit value, ordered.
3374 *
3375 * @param pu64 Pointer to the pointer variable to AND u64 with.
3376 * @param u64 The value to AND *pu64 with.
3377 *
3378 * @remarks x86: Requires a Pentium or later.
3379 */
3380#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3381DECLASM(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64);
3382#else
3383DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64)
3384{
3385# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3386 _InterlockedAnd64((__int64 volatile *)pu64, u64);
3387
3388# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3389 __asm__ __volatile__("lock; andq %1, %0\n\t"
3390 : "=m" (*pu64)
3391 : "r" (u64),
3392 "m" (*pu64));
3393# else
3394 for (;;)
3395 {
3396 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3397 uint64_t u64New = u64Old & u64;
3398 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3399 break;
3400 ASMNopPause();
3401 }
3402# endif
3403}
3404#endif
3405
3406
3407/**
3408 * Atomically And a signed 64-bit value, ordered.
3409 *
3410 * @param pi64 Pointer to the pointer variable to AND i64 with.
3411 * @param i64 The value to AND *pi64 with.
3412 *
3413 * @remarks x86: Requires a Pentium or later.
3414 */
3415DECLINLINE(void) ASMAtomicAndS64(int64_t volatile *pi64, int64_t i64)
3416{
3417 ASMAtomicAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3418}
3419
3420
3421/**
3422 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
3423 *
3424 * @param pu32 Pointer to the pointer variable to OR u32 with.
3425 * @param u32 The value to OR *pu32 with.
3426 *
3427 * @remarks x86: Requires a 386 or later.
3428 */
3429#if RT_INLINE_ASM_EXTERNAL
3430DECLASM(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32);
3431#else
3432DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32)
3433{
3434# if RT_INLINE_ASM_GNU_STYLE
3435 __asm__ __volatile__("orl %1, %0\n\t"
3436 : "=m" (*pu32)
3437 : "ir" (u32),
3438 "m" (*pu32));
3439# else
3440 __asm
3441 {
3442 mov eax, [u32]
3443# ifdef RT_ARCH_AMD64
3444 mov rdx, [pu32]
3445 or [rdx], eax
3446# else
3447 mov edx, [pu32]
3448 or [edx], eax
3449# endif
3450 }
3451# endif
3452}
3453#endif
3454
3455
3456/**
3457 * Atomically OR a signed 32-bit value, unordered.
3458 *
3459 * @param pi32 Pointer to the pointer variable to OR u32 with.
3460 * @param i32 The value to OR *pu32 with.
3461 *
3462 * @remarks x86: Requires a 386 or later.
3463 */
3464DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile *pi32, int32_t i32)
3465{
3466 ASMAtomicUoOrU32((uint32_t volatile *)pi32, i32);
3467}
3468
3469
3470/**
3471 * Atomically OR an unsigned 64-bit value, unordered.
3472 *
3473 * @param pu64 Pointer to the pointer variable to OR u64 with.
3474 * @param u64 The value to OR *pu64 with.
3475 *
3476 * @remarks x86: Requires a Pentium or later.
3477 */
3478#if RT_INLINE_ASM_EXTERNAL
3479DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64);
3480#else
3481DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64)
3482{
3483# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3484 __asm__ __volatile__("orq %1, %q0\n\t"
3485 : "=m" (*pu64)
3486 : "r" (u64),
3487 "m" (*pu64));
3488# else
3489 for (;;)
3490 {
3491 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3492 uint64_t u64New = u64Old | u64;
3493 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3494 break;
3495 ASMNopPause();
3496 }
3497# endif
3498}
3499#endif
3500
3501
3502/**
3503 * Atomically Or a signed 64-bit value, unordered.
3504 *
3505 * @param pi64 Pointer to the pointer variable to OR u64 with.
3506 * @param i64 The value to OR *pu64 with.
3507 *
3508 * @remarks x86: Requires a Pentium or later.
3509 */
3510DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile *pi64, int64_t i64)
3511{
3512 ASMAtomicUoOrU64((uint64_t volatile *)pi64, i64);
3513}
3514
3515
3516/**
3517 * Atomically And an unsigned 32-bit value, unordered.
3518 *
3519 * @param pu32 Pointer to the pointer variable to AND u32 with.
3520 * @param u32 The value to AND *pu32 with.
3521 *
3522 * @remarks x86: Requires a 386 or later.
3523 */
3524#if RT_INLINE_ASM_EXTERNAL
3525DECLASM(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32);
3526#else
3527DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32)
3528{
3529# if RT_INLINE_ASM_GNU_STYLE
3530 __asm__ __volatile__("andl %1, %0\n\t"
3531 : "=m" (*pu32)
3532 : "ir" (u32),
3533 "m" (*pu32));
3534# else
3535 __asm
3536 {
3537 mov eax, [u32]
3538# ifdef RT_ARCH_AMD64
3539 mov rdx, [pu32]
3540 and [rdx], eax
3541# else
3542 mov edx, [pu32]
3543 and [edx], eax
3544# endif
3545 }
3546# endif
3547}
3548#endif
3549
3550
3551/**
3552 * Atomically And a signed 32-bit value, unordered.
3553 *
3554 * @param pi32 Pointer to the pointer variable to AND i32 with.
3555 * @param i32 The value to AND *pi32 with.
3556 *
3557 * @remarks x86: Requires a 386 or later.
3558 */
3559DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile *pi32, int32_t i32)
3560{
3561 ASMAtomicUoAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3562}
3563
3564
3565/**
3566 * Atomically And an unsigned 64-bit value, unordered.
3567 *
3568 * @param pu64 Pointer to the pointer variable to AND u64 with.
3569 * @param u64 The value to AND *pu64 with.
3570 *
3571 * @remarks x86: Requires a Pentium or later.
3572 */
3573#if RT_INLINE_ASM_EXTERNAL
3574DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64);
3575#else
3576DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64)
3577{
3578# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3579 __asm__ __volatile__("andq %1, %0\n\t"
3580 : "=m" (*pu64)
3581 : "r" (u64),
3582 "m" (*pu64));
3583# else
3584 for (;;)
3585 {
3586 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3587 uint64_t u64New = u64Old & u64;
3588 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3589 break;
3590 ASMNopPause();
3591 }
3592# endif
3593}
3594#endif
3595
3596
3597/**
3598 * Atomically And a signed 64-bit value, unordered.
3599 *
3600 * @param pi64 Pointer to the pointer variable to AND i64 with.
3601 * @param i64 The value to AND *pi64 with.
3602 *
3603 * @remarks x86: Requires a Pentium or later.
3604 */
3605DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile *pi64, int64_t i64)
3606{
3607 ASMAtomicUoAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3608}
3609
3610
3611/**
3612 * Atomically increment an unsigned 32-bit value, unordered.
3613 *
3614 * @returns the new value.
3615 * @param pu32 Pointer to the variable to increment.
3616 *
3617 * @remarks x86: Requires a 486 or later.
3618 */
3619#if RT_INLINE_ASM_EXTERNAL
3620DECLASM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile *pu32);
3621#else
3622DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile *pu32)
3623{
3624 uint32_t u32;
3625# if RT_INLINE_ASM_GNU_STYLE
3626 __asm__ __volatile__("xaddl %0, %1\n\t"
3627 : "=r" (u32),
3628 "=m" (*pu32)
3629 : "0" (1),
3630 "m" (*pu32)
3631 : "memory");
3632 return u32 + 1;
3633# else
3634 __asm
3635 {
3636 mov eax, 1
3637# ifdef RT_ARCH_AMD64
3638 mov rdx, [pu32]
3639 xadd [rdx], eax
3640# else
3641 mov edx, [pu32]
3642 xadd [edx], eax
3643# endif
3644 mov u32, eax
3645 }
3646 return u32 + 1;
3647# endif
3648}
3649#endif
3650
3651
3652/**
3653 * Atomically decrement an unsigned 32-bit value, unordered.
3654 *
3655 * @returns the new value.
3656 * @param pu32 Pointer to the variable to decrement.
3657 *
3658 * @remarks x86: Requires a 486 or later.
3659 */
3660#if RT_INLINE_ASM_EXTERNAL
3661DECLASM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile *pu32);
3662#else
3663DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile *pu32)
3664{
3665 uint32_t u32;
3666# if RT_INLINE_ASM_GNU_STYLE
3667 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3668 : "=r" (u32),
3669 "=m" (*pu32)
3670 : "0" (-1),
3671 "m" (*pu32)
3672 : "memory");
3673 return u32 - 1;
3674# else
3675 __asm
3676 {
3677 mov eax, -1
3678# ifdef RT_ARCH_AMD64
3679 mov rdx, [pu32]
3680 xadd [rdx], eax
3681# else
3682 mov edx, [pu32]
3683 xadd [edx], eax
3684# endif
3685 mov u32, eax
3686 }
3687 return u32 - 1;
3688# endif
3689}
3690#endif
3691
3692
3693/** @def RT_ASM_PAGE_SIZE
3694 * We try avoid dragging in iprt/param.h here.
3695 * @internal
3696 */
3697#if defined(RT_ARCH_SPARC64)
3698# define RT_ASM_PAGE_SIZE 0x2000
3699# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3700# if PAGE_SIZE != 0x2000
3701# error "PAGE_SIZE is not 0x2000!"
3702# endif
3703# endif
3704#else
3705# define RT_ASM_PAGE_SIZE 0x1000
3706# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3707# if PAGE_SIZE != 0x1000
3708# error "PAGE_SIZE is not 0x1000!"
3709# endif
3710# endif
3711#endif
3712
3713/**
3714 * Zeros a 4K memory page.
3715 *
3716 * @param pv Pointer to the memory block. This must be page aligned.
3717 */
3718#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3719DECLASM(void) ASMMemZeroPage(volatile void *pv);
3720# else
3721DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
3722{
3723# if RT_INLINE_ASM_USES_INTRIN
3724# ifdef RT_ARCH_AMD64
3725 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
3726# else
3727 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
3728# endif
3729
3730# elif RT_INLINE_ASM_GNU_STYLE
3731 RTCCUINTREG uDummy;
3732# ifdef RT_ARCH_AMD64
3733 __asm__ __volatile__("rep stosq"
3734 : "=D" (pv),
3735 "=c" (uDummy)
3736 : "0" (pv),
3737 "c" (RT_ASM_PAGE_SIZE >> 3),
3738 "a" (0)
3739 : "memory");
3740# else
3741 __asm__ __volatile__("rep stosl"
3742 : "=D" (pv),
3743 "=c" (uDummy)
3744 : "0" (pv),
3745 "c" (RT_ASM_PAGE_SIZE >> 2),
3746 "a" (0)
3747 : "memory");
3748# endif
3749# else
3750 __asm
3751 {
3752# ifdef RT_ARCH_AMD64
3753 xor rax, rax
3754 mov ecx, 0200h
3755 mov rdi, [pv]
3756 rep stosq
3757# else
3758 xor eax, eax
3759 mov ecx, 0400h
3760 mov edi, [pv]
3761 rep stosd
3762# endif
3763 }
3764# endif
3765}
3766# endif
3767
3768
3769/**
3770 * Zeros a memory block with a 32-bit aligned size.
3771 *
3772 * @param pv Pointer to the memory block.
3773 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3774 */
3775#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3776DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3777#else
3778DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3779{
3780# if RT_INLINE_ASM_USES_INTRIN
3781# ifdef RT_ARCH_AMD64
3782 if (!(cb & 7))
3783 __stosq((unsigned __int64 *)pv, 0, cb / 8);
3784 else
3785# endif
3786 __stosd((unsigned long *)pv, 0, cb / 4);
3787
3788# elif RT_INLINE_ASM_GNU_STYLE
3789 __asm__ __volatile__("rep stosl"
3790 : "=D" (pv),
3791 "=c" (cb)
3792 : "0" (pv),
3793 "1" (cb >> 2),
3794 "a" (0)
3795 : "memory");
3796# else
3797 __asm
3798 {
3799 xor eax, eax
3800# ifdef RT_ARCH_AMD64
3801 mov rcx, [cb]
3802 shr rcx, 2
3803 mov rdi, [pv]
3804# else
3805 mov ecx, [cb]
3806 shr ecx, 2
3807 mov edi, [pv]
3808# endif
3809 rep stosd
3810 }
3811# endif
3812}
3813#endif
3814
3815
3816/**
3817 * Fills a memory block with a 32-bit aligned size.
3818 *
3819 * @param pv Pointer to the memory block.
3820 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3821 * @param u32 The value to fill with.
3822 */
3823#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3824DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3825#else
3826DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3827{
3828# if RT_INLINE_ASM_USES_INTRIN
3829# ifdef RT_ARCH_AMD64
3830 if (!(cb & 7))
3831 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3832 else
3833# endif
3834 __stosd((unsigned long *)pv, u32, cb / 4);
3835
3836# elif RT_INLINE_ASM_GNU_STYLE
3837 __asm__ __volatile__("rep stosl"
3838 : "=D" (pv),
3839 "=c" (cb)
3840 : "0" (pv),
3841 "1" (cb >> 2),
3842 "a" (u32)
3843 : "memory");
3844# else
3845 __asm
3846 {
3847# ifdef RT_ARCH_AMD64
3848 mov rcx, [cb]
3849 shr rcx, 2
3850 mov rdi, [pv]
3851# else
3852 mov ecx, [cb]
3853 shr ecx, 2
3854 mov edi, [pv]
3855# endif
3856 mov eax, [u32]
3857 rep stosd
3858 }
3859# endif
3860}
3861#endif
3862
3863
3864/**
3865 * Checks if a memory block is all zeros.
3866 *
3867 * @returns Pointer to the first non-zero byte.
3868 * @returns NULL if all zero.
3869 *
3870 * @param pv Pointer to the memory block.
3871 * @param cb Number of bytes in the block.
3872 *
3873 * @todo Fix name, it is a predicate function but it's not returning boolean!
3874 */
3875#if !defined(RT_OS_LINUX) || !defined(__KERNEL__)
3876DECLASM(void *) ASMMemFirstNonZero(void const *pv, size_t cb);
3877#else
3878DECLINLINE(void *) ASMMemFirstNonZero(void const *pv, size_t cb)
3879{
3880 uint8_t const *pb = (uint8_t const *)pv;
3881 for (; cb; cb--, pb++)
3882 if (RT_LIKELY(*pb == 0))
3883 { /* likely */ }
3884 else
3885 return (void *)pb;
3886 return NULL;
3887}
3888#endif
3889
3890
3891/**
3892 * Checks if a memory block is all zeros.
3893 *
3894 * @returns true if zero, false if not.
3895 *
3896 * @param pv Pointer to the memory block.
3897 * @param cb Number of bytes in the block.
3898 *
3899 * @sa ASMMemFirstNonZero
3900 */
3901DECLINLINE(bool) ASMMemIsZero(void const *pv, size_t cb)
3902{
3903 return ASMMemFirstNonZero(pv, cb) == NULL;
3904}
3905
3906
3907/**
3908 * Checks if a memory page is all zeros.
3909 *
3910 * @returns true / false.
3911 *
3912 * @param pvPage Pointer to the page. Must be aligned on 16 byte
3913 * boundary
3914 */
3915DECLINLINE(bool) ASMMemIsZeroPage(void const *pvPage)
3916{
3917# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
3918 union { RTCCUINTREG r; bool f; } uAX;
3919 RTCCUINTREG xCX, xDI;
3920 Assert(!((uintptr_t)pvPage & 15));
3921 __asm__ __volatile__("repe; "
3922# ifdef RT_ARCH_AMD64
3923 "scasq\n\t"
3924# else
3925 "scasl\n\t"
3926# endif
3927 "setnc %%al\n\t"
3928 : "=&c" (xCX),
3929 "=&D" (xDI),
3930 "=&a" (uAX.r)
3931 : "mr" (pvPage),
3932# ifdef RT_ARCH_AMD64
3933 "0" (RT_ASM_PAGE_SIZE/8),
3934# else
3935 "0" (RT_ASM_PAGE_SIZE/4),
3936# endif
3937 "1" (pvPage),
3938 "2" (0));
3939 return uAX.f;
3940# else
3941 uintptr_t const *puPtr = (uintptr_t const *)pvPage;
3942 int cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
3943 Assert(!((uintptr_t)pvPage & 15));
3944 for (;;)
3945 {
3946 if (puPtr[0]) return false;
3947 if (puPtr[4]) return false;
3948
3949 if (puPtr[2]) return false;
3950 if (puPtr[6]) return false;
3951
3952 if (puPtr[1]) return false;
3953 if (puPtr[5]) return false;
3954
3955 if (puPtr[3]) return false;
3956 if (puPtr[7]) return false;
3957
3958 if (!--cLeft)
3959 return true;
3960 puPtr += 8;
3961 }
3962 return true;
3963# endif
3964}
3965
3966
3967/**
3968 * Checks if a memory block is filled with the specified byte, returning the
3969 * first mismatch.
3970 *
3971 * This is sort of an inverted memchr.
3972 *
3973 * @returns Pointer to the byte which doesn't equal u8.
3974 * @returns NULL if all equal to u8.
3975 *
3976 * @param pv Pointer to the memory block.
3977 * @param cb Number of bytes in the block.
3978 * @param u8 The value it's supposed to be filled with.
3979 *
3980 * @remarks No alignment requirements.
3981 */
3982#if (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
3983 && (!defined(RT_OS_FREEBSD) || !defined(_KERNEL))
3984DECLASM(void *) ASMMemFirstMismatchingU8(void const *pv, size_t cb, uint8_t u8);
3985#else
3986DECLINLINE(void *) ASMMemFirstMismatchingU8(void const *pv, size_t cb, uint8_t u8)
3987{
3988 uint8_t const *pb = (uint8_t const *)pv;
3989 for (; cb; cb--, pb++)
3990 if (RT_LIKELY(*pb == u8))
3991 { /* likely */ }
3992 else
3993 return (void *)pb;
3994 return NULL;
3995}
3996#endif
3997
3998
3999/**
4000 * Checks if a memory block is filled with the specified byte.
4001 *
4002 * @returns true if all matching, false if not.
4003 *
4004 * @param pv Pointer to the memory block.
4005 * @param cb Number of bytes in the block.
4006 * @param u8 The value it's supposed to be filled with.
4007 *
4008 * @remarks No alignment requirements.
4009 */
4010DECLINLINE(bool) ASMMemIsAllU8(void const *pv, size_t cb, uint8_t u8)
4011{
4012 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
4013}
4014
4015
4016/**
4017 * Checks if a memory block is filled with the specified 32-bit value.
4018 *
4019 * This is a sort of inverted memchr.
4020 *
4021 * @returns Pointer to the first value which doesn't equal u32.
4022 * @returns NULL if all equal to u32.
4023 *
4024 * @param pv Pointer to the memory block.
4025 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4026 * @param u32 The value it's supposed to be filled with.
4027 */
4028DECLINLINE(uint32_t *) ASMMemFirstMismatchingU32(void const *pv, size_t cb, uint32_t u32)
4029{
4030/** @todo rewrite this in inline assembly? */
4031 uint32_t const *pu32 = (uint32_t const *)pv;
4032 for (; cb; cb -= 4, pu32++)
4033 if (RT_LIKELY(*pu32 == u32))
4034 { /* likely */ }
4035 else
4036 return (uint32_t *)pu32;
4037 return NULL;
4038}
4039
4040
4041/**
4042 * Probes a byte pointer for read access.
4043 *
4044 * While the function will not fault if the byte is not read accessible,
4045 * the idea is to do this in a safe place like before acquiring locks
4046 * and such like.
4047 *
4048 * Also, this functions guarantees that an eager compiler is not going
4049 * to optimize the probing away.
4050 *
4051 * @param pvByte Pointer to the byte.
4052 */
4053#if RT_INLINE_ASM_EXTERNAL
4054DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
4055#else
4056DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
4057{
4058 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4059 uint8_t u8;
4060# if RT_INLINE_ASM_GNU_STYLE
4061 __asm__ __volatile__("movb (%1), %0\n\t"
4062 : "=r" (u8)
4063 : "r" (pvByte));
4064# else
4065 __asm
4066 {
4067# ifdef RT_ARCH_AMD64
4068 mov rax, [pvByte]
4069 mov al, [rax]
4070# else
4071 mov eax, [pvByte]
4072 mov al, [eax]
4073# endif
4074 mov [u8], al
4075 }
4076# endif
4077 return u8;
4078}
4079#endif
4080
4081/**
4082 * Probes a buffer for read access page by page.
4083 *
4084 * While the function will fault if the buffer is not fully read
4085 * accessible, the idea is to do this in a safe place like before
4086 * acquiring locks and such like.
4087 *
4088 * Also, this functions guarantees that an eager compiler is not going
4089 * to optimize the probing away.
4090 *
4091 * @param pvBuf Pointer to the buffer.
4092 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4093 */
4094DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
4095{
4096 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4097 /* the first byte */
4098 const uint8_t *pu8 = (const uint8_t *)pvBuf;
4099 ASMProbeReadByte(pu8);
4100
4101 /* the pages in between pages. */
4102 while (cbBuf > RT_ASM_PAGE_SIZE)
4103 {
4104 ASMProbeReadByte(pu8);
4105 cbBuf -= RT_ASM_PAGE_SIZE;
4106 pu8 += RT_ASM_PAGE_SIZE;
4107 }
4108
4109 /* the last byte */
4110 ASMProbeReadByte(pu8 + cbBuf - 1);
4111}
4112
4113
4114
4115/** @defgroup grp_inline_bits Bit Operations
4116 * @{
4117 */
4118
4119
4120/**
4121 * Sets a bit in a bitmap.
4122 *
4123 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
4124 * @param iBit The bit to set.
4125 *
4126 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4127 * However, doing so will yield better performance as well as avoiding
4128 * traps accessing the last bits in the bitmap.
4129 */
4130#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4131DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
4132#else
4133DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
4134{
4135# if RT_INLINE_ASM_USES_INTRIN
4136 _bittestandset((long *)pvBitmap, iBit);
4137
4138# elif RT_INLINE_ASM_GNU_STYLE
4139 __asm__ __volatile__("btsl %1, %0"
4140 : "=m" (*(volatile long *)pvBitmap)
4141 : "Ir" (iBit),
4142 "m" (*(volatile long *)pvBitmap)
4143 : "memory");
4144# else
4145 __asm
4146 {
4147# ifdef RT_ARCH_AMD64
4148 mov rax, [pvBitmap]
4149 mov edx, [iBit]
4150 bts [rax], edx
4151# else
4152 mov eax, [pvBitmap]
4153 mov edx, [iBit]
4154 bts [eax], edx
4155# endif
4156 }
4157# endif
4158}
4159#endif
4160
4161
4162/**
4163 * Atomically sets a bit in a bitmap, ordered.
4164 *
4165 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4166 * the memory access isn't atomic!
4167 * @param iBit The bit to set.
4168 *
4169 * @remarks x86: Requires a 386 or later.
4170 */
4171#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4172DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
4173#else
4174DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
4175{
4176 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4177# if RT_INLINE_ASM_USES_INTRIN
4178 _interlockedbittestandset((long *)pvBitmap, iBit);
4179# elif RT_INLINE_ASM_GNU_STYLE
4180 __asm__ __volatile__("lock; btsl %1, %0"
4181 : "=m" (*(volatile long *)pvBitmap)
4182 : "Ir" (iBit),
4183 "m" (*(volatile long *)pvBitmap)
4184 : "memory");
4185# else
4186 __asm
4187 {
4188# ifdef RT_ARCH_AMD64
4189 mov rax, [pvBitmap]
4190 mov edx, [iBit]
4191 lock bts [rax], edx
4192# else
4193 mov eax, [pvBitmap]
4194 mov edx, [iBit]
4195 lock bts [eax], edx
4196# endif
4197 }
4198# endif
4199}
4200#endif
4201
4202
4203/**
4204 * Clears a bit in a bitmap.
4205 *
4206 * @param pvBitmap Pointer to the bitmap.
4207 * @param iBit The bit to clear.
4208 *
4209 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4210 * However, doing so will yield better performance as well as avoiding
4211 * traps accessing the last bits in the bitmap.
4212 */
4213#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4214DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
4215#else
4216DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
4217{
4218# if RT_INLINE_ASM_USES_INTRIN
4219 _bittestandreset((long *)pvBitmap, iBit);
4220
4221# elif RT_INLINE_ASM_GNU_STYLE
4222 __asm__ __volatile__("btrl %1, %0"
4223 : "=m" (*(volatile long *)pvBitmap)
4224 : "Ir" (iBit),
4225 "m" (*(volatile long *)pvBitmap)
4226 : "memory");
4227# else
4228 __asm
4229 {
4230# ifdef RT_ARCH_AMD64
4231 mov rax, [pvBitmap]
4232 mov edx, [iBit]
4233 btr [rax], edx
4234# else
4235 mov eax, [pvBitmap]
4236 mov edx, [iBit]
4237 btr [eax], edx
4238# endif
4239 }
4240# endif
4241}
4242#endif
4243
4244
4245/**
4246 * Atomically clears a bit in a bitmap, ordered.
4247 *
4248 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4249 * the memory access isn't atomic!
4250 * @param iBit The bit to toggle set.
4251 *
4252 * @remarks No memory barrier, take care on smp.
4253 * @remarks x86: Requires a 386 or later.
4254 */
4255#if RT_INLINE_ASM_EXTERNAL
4256DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
4257#else
4258DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
4259{
4260 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4261# if RT_INLINE_ASM_GNU_STYLE
4262 __asm__ __volatile__("lock; btrl %1, %0"
4263 : "=m" (*(volatile long *)pvBitmap)
4264 : "Ir" (iBit),
4265 "m" (*(volatile long *)pvBitmap)
4266 : "memory");
4267# else
4268 __asm
4269 {
4270# ifdef RT_ARCH_AMD64
4271 mov rax, [pvBitmap]
4272 mov edx, [iBit]
4273 lock btr [rax], edx
4274# else
4275 mov eax, [pvBitmap]
4276 mov edx, [iBit]
4277 lock btr [eax], edx
4278# endif
4279 }
4280# endif
4281}
4282#endif
4283
4284
4285/**
4286 * Toggles a bit in a bitmap.
4287 *
4288 * @param pvBitmap Pointer to the bitmap.
4289 * @param iBit The bit to toggle.
4290 *
4291 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4292 * However, doing so will yield better performance as well as avoiding
4293 * traps accessing the last bits in the bitmap.
4294 */
4295#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4296DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
4297#else
4298DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
4299{
4300# if RT_INLINE_ASM_USES_INTRIN
4301 _bittestandcomplement((long *)pvBitmap, iBit);
4302# elif RT_INLINE_ASM_GNU_STYLE
4303 __asm__ __volatile__("btcl %1, %0"
4304 : "=m" (*(volatile long *)pvBitmap)
4305 : "Ir" (iBit),
4306 "m" (*(volatile long *)pvBitmap)
4307 : "memory");
4308# else
4309 __asm
4310 {
4311# ifdef RT_ARCH_AMD64
4312 mov rax, [pvBitmap]
4313 mov edx, [iBit]
4314 btc [rax], edx
4315# else
4316 mov eax, [pvBitmap]
4317 mov edx, [iBit]
4318 btc [eax], edx
4319# endif
4320 }
4321# endif
4322}
4323#endif
4324
4325
4326/**
4327 * Atomically toggles a bit in a bitmap, ordered.
4328 *
4329 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4330 * the memory access isn't atomic!
4331 * @param iBit The bit to test and set.
4332 *
4333 * @remarks x86: Requires a 386 or later.
4334 */
4335#if RT_INLINE_ASM_EXTERNAL
4336DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
4337#else
4338DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
4339{
4340 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4341# if RT_INLINE_ASM_GNU_STYLE
4342 __asm__ __volatile__("lock; btcl %1, %0"
4343 : "=m" (*(volatile long *)pvBitmap)
4344 : "Ir" (iBit),
4345 "m" (*(volatile long *)pvBitmap)
4346 : "memory");
4347# else
4348 __asm
4349 {
4350# ifdef RT_ARCH_AMD64
4351 mov rax, [pvBitmap]
4352 mov edx, [iBit]
4353 lock btc [rax], edx
4354# else
4355 mov eax, [pvBitmap]
4356 mov edx, [iBit]
4357 lock btc [eax], edx
4358# endif
4359 }
4360# endif
4361}
4362#endif
4363
4364
4365/**
4366 * Tests and sets a bit in a bitmap.
4367 *
4368 * @returns true if the bit was set.
4369 * @returns false if the bit was clear.
4370 *
4371 * @param pvBitmap Pointer to the bitmap.
4372 * @param iBit The bit to test and set.
4373 *
4374 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4375 * However, doing so will yield better performance as well as avoiding
4376 * traps accessing the last bits in the bitmap.
4377 */
4378#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4379DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4380#else
4381DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4382{
4383 union { bool f; uint32_t u32; uint8_t u8; } rc;
4384# if RT_INLINE_ASM_USES_INTRIN
4385 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
4386
4387# elif RT_INLINE_ASM_GNU_STYLE
4388 __asm__ __volatile__("btsl %2, %1\n\t"
4389 "setc %b0\n\t"
4390 "andl $1, %0\n\t"
4391 : "=q" (rc.u32),
4392 "=m" (*(volatile long *)pvBitmap)
4393 : "Ir" (iBit),
4394 "m" (*(volatile long *)pvBitmap)
4395 : "memory");
4396# else
4397 __asm
4398 {
4399 mov edx, [iBit]
4400# ifdef RT_ARCH_AMD64
4401 mov rax, [pvBitmap]
4402 bts [rax], edx
4403# else
4404 mov eax, [pvBitmap]
4405 bts [eax], edx
4406# endif
4407 setc al
4408 and eax, 1
4409 mov [rc.u32], eax
4410 }
4411# endif
4412 return rc.f;
4413}
4414#endif
4415
4416
4417/**
4418 * Atomically tests and sets a bit in a bitmap, ordered.
4419 *
4420 * @returns true if the bit was set.
4421 * @returns false if the bit was clear.
4422 *
4423 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4424 * the memory access isn't atomic!
4425 * @param iBit The bit to set.
4426 *
4427 * @remarks x86: Requires a 386 or later.
4428 */
4429#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4430DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4431#else
4432DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4433{
4434 union { bool f; uint32_t u32; uint8_t u8; } rc;
4435 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4436# if RT_INLINE_ASM_USES_INTRIN
4437 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
4438# elif RT_INLINE_ASM_GNU_STYLE
4439 __asm__ __volatile__("lock; btsl %2, %1\n\t"
4440 "setc %b0\n\t"
4441 "andl $1, %0\n\t"
4442 : "=q" (rc.u32),
4443 "=m" (*(volatile long *)pvBitmap)
4444 : "Ir" (iBit),
4445 "m" (*(volatile long *)pvBitmap)
4446 : "memory");
4447# else
4448 __asm
4449 {
4450 mov edx, [iBit]
4451# ifdef RT_ARCH_AMD64
4452 mov rax, [pvBitmap]
4453 lock bts [rax], edx
4454# else
4455 mov eax, [pvBitmap]
4456 lock bts [eax], edx
4457# endif
4458 setc al
4459 and eax, 1
4460 mov [rc.u32], eax
4461 }
4462# endif
4463 return rc.f;
4464}
4465#endif
4466
4467
4468/**
4469 * Tests and clears a bit in a bitmap.
4470 *
4471 * @returns true if the bit was set.
4472 * @returns false if the bit was clear.
4473 *
4474 * @param pvBitmap Pointer to the bitmap.
4475 * @param iBit The bit to test and clear.
4476 *
4477 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4478 * However, doing so will yield better performance as well as avoiding
4479 * traps accessing the last bits in the bitmap.
4480 */
4481#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4482DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4483#else
4484DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4485{
4486 union { bool f; uint32_t u32; uint8_t u8; } rc;
4487# if RT_INLINE_ASM_USES_INTRIN
4488 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
4489
4490# elif RT_INLINE_ASM_GNU_STYLE
4491 __asm__ __volatile__("btrl %2, %1\n\t"
4492 "setc %b0\n\t"
4493 "andl $1, %0\n\t"
4494 : "=q" (rc.u32),
4495 "=m" (*(volatile long *)pvBitmap)
4496 : "Ir" (iBit),
4497 "m" (*(volatile long *)pvBitmap)
4498 : "memory");
4499# else
4500 __asm
4501 {
4502 mov edx, [iBit]
4503# ifdef RT_ARCH_AMD64
4504 mov rax, [pvBitmap]
4505 btr [rax], edx
4506# else
4507 mov eax, [pvBitmap]
4508 btr [eax], edx
4509# endif
4510 setc al
4511 and eax, 1
4512 mov [rc.u32], eax
4513 }
4514# endif
4515 return rc.f;
4516}
4517#endif
4518
4519
4520/**
4521 * Atomically tests and clears a bit in a bitmap, ordered.
4522 *
4523 * @returns true if the bit was set.
4524 * @returns false if the bit was clear.
4525 *
4526 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4527 * the memory access isn't atomic!
4528 * @param iBit The bit to test and clear.
4529 *
4530 * @remarks No memory barrier, take care on smp.
4531 * @remarks x86: Requires a 386 or later.
4532 */
4533#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4534DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4535#else
4536DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4537{
4538 union { bool f; uint32_t u32; uint8_t u8; } rc;
4539 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4540# if RT_INLINE_ASM_USES_INTRIN
4541 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
4542
4543# elif RT_INLINE_ASM_GNU_STYLE
4544 __asm__ __volatile__("lock; btrl %2, %1\n\t"
4545 "setc %b0\n\t"
4546 "andl $1, %0\n\t"
4547 : "=q" (rc.u32),
4548 "=m" (*(volatile long *)pvBitmap)
4549 : "Ir" (iBit),
4550 "m" (*(volatile long *)pvBitmap)
4551 : "memory");
4552# else
4553 __asm
4554 {
4555 mov edx, [iBit]
4556# ifdef RT_ARCH_AMD64
4557 mov rax, [pvBitmap]
4558 lock btr [rax], edx
4559# else
4560 mov eax, [pvBitmap]
4561 lock btr [eax], edx
4562# endif
4563 setc al
4564 and eax, 1
4565 mov [rc.u32], eax
4566 }
4567# endif
4568 return rc.f;
4569}
4570#endif
4571
4572
4573/**
4574 * Tests and toggles a bit in a bitmap.
4575 *
4576 * @returns true if the bit was set.
4577 * @returns false if the bit was clear.
4578 *
4579 * @param pvBitmap Pointer to the bitmap.
4580 * @param iBit The bit to test and toggle.
4581 *
4582 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4583 * However, doing so will yield better performance as well as avoiding
4584 * traps accessing the last bits in the bitmap.
4585 */
4586#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4587DECLASM(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4588#else
4589DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4590{
4591 union { bool f; uint32_t u32; uint8_t u8; } rc;
4592# if RT_INLINE_ASM_USES_INTRIN
4593 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
4594
4595# elif RT_INLINE_ASM_GNU_STYLE
4596 __asm__ __volatile__("btcl %2, %1\n\t"
4597 "setc %b0\n\t"
4598 "andl $1, %0\n\t"
4599 : "=q" (rc.u32),
4600 "=m" (*(volatile long *)pvBitmap)
4601 : "Ir" (iBit),
4602 "m" (*(volatile long *)pvBitmap)
4603 : "memory");
4604# else
4605 __asm
4606 {
4607 mov edx, [iBit]
4608# ifdef RT_ARCH_AMD64
4609 mov rax, [pvBitmap]
4610 btc [rax], edx
4611# else
4612 mov eax, [pvBitmap]
4613 btc [eax], edx
4614# endif
4615 setc al
4616 and eax, 1
4617 mov [rc.u32], eax
4618 }
4619# endif
4620 return rc.f;
4621}
4622#endif
4623
4624
4625/**
4626 * Atomically tests and toggles a bit in a bitmap, ordered.
4627 *
4628 * @returns true if the bit was set.
4629 * @returns false if the bit was clear.
4630 *
4631 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4632 * the memory access isn't atomic!
4633 * @param iBit The bit to test and toggle.
4634 *
4635 * @remarks x86: Requires a 386 or later.
4636 */
4637#if RT_INLINE_ASM_EXTERNAL
4638DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4639#else
4640DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4641{
4642 union { bool f; uint32_t u32; uint8_t u8; } rc;
4643 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4644# if RT_INLINE_ASM_GNU_STYLE
4645 __asm__ __volatile__("lock; btcl %2, %1\n\t"
4646 "setc %b0\n\t"
4647 "andl $1, %0\n\t"
4648 : "=q" (rc.u32),
4649 "=m" (*(volatile long *)pvBitmap)
4650 : "Ir" (iBit),
4651 "m" (*(volatile long *)pvBitmap)
4652 : "memory");
4653# else
4654 __asm
4655 {
4656 mov edx, [iBit]
4657# ifdef RT_ARCH_AMD64
4658 mov rax, [pvBitmap]
4659 lock btc [rax], edx
4660# else
4661 mov eax, [pvBitmap]
4662 lock btc [eax], edx
4663# endif
4664 setc al
4665 and eax, 1
4666 mov [rc.u32], eax
4667 }
4668# endif
4669 return rc.f;
4670}
4671#endif
4672
4673
4674/**
4675 * Tests if a bit in a bitmap is set.
4676 *
4677 * @returns true if the bit is set.
4678 * @returns false if the bit is clear.
4679 *
4680 * @param pvBitmap Pointer to the bitmap.
4681 * @param iBit The bit to test.
4682 *
4683 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4684 * However, doing so will yield better performance as well as avoiding
4685 * traps accessing the last bits in the bitmap.
4686 */
4687#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4688DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
4689#else
4690DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
4691{
4692 union { bool f; uint32_t u32; uint8_t u8; } rc;
4693# if RT_INLINE_ASM_USES_INTRIN
4694 rc.u32 = _bittest((long *)pvBitmap, iBit);
4695# elif RT_INLINE_ASM_GNU_STYLE
4696
4697 __asm__ __volatile__("btl %2, %1\n\t"
4698 "setc %b0\n\t"
4699 "andl $1, %0\n\t"
4700 : "=q" (rc.u32)
4701 : "m" (*(const volatile long *)pvBitmap),
4702 "Ir" (iBit)
4703 : "memory");
4704# else
4705 __asm
4706 {
4707 mov edx, [iBit]
4708# ifdef RT_ARCH_AMD64
4709 mov rax, [pvBitmap]
4710 bt [rax], edx
4711# else
4712 mov eax, [pvBitmap]
4713 bt [eax], edx
4714# endif
4715 setc al
4716 and eax, 1
4717 mov [rc.u32], eax
4718 }
4719# endif
4720 return rc.f;
4721}
4722#endif
4723
4724
4725/**
4726 * Clears a bit range within a bitmap.
4727 *
4728 * @param pvBitmap Pointer to the bitmap.
4729 * @param iBitStart The First bit to clear.
4730 * @param iBitEnd The first bit not to clear.
4731 */
4732DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4733{
4734 if (iBitStart < iBitEnd)
4735 {
4736 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4737 int32_t iStart = iBitStart & ~31;
4738 int32_t iEnd = iBitEnd & ~31;
4739 if (iStart == iEnd)
4740 *pu32 &= ((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4741 else
4742 {
4743 /* bits in first dword. */
4744 if (iBitStart & 31)
4745 {
4746 *pu32 &= (UINT32_C(1) << (iBitStart & 31)) - 1;
4747 pu32++;
4748 iBitStart = iStart + 32;
4749 }
4750
4751 /* whole dword. */
4752 if (iBitStart != iEnd)
4753 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4754
4755 /* bits in last dword. */
4756 if (iBitEnd & 31)
4757 {
4758 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4759 *pu32 &= ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4760 }
4761 }
4762 }
4763}
4764
4765
4766/**
4767 * Sets a bit range within a bitmap.
4768 *
4769 * @param pvBitmap Pointer to the bitmap.
4770 * @param iBitStart The First bit to set.
4771 * @param iBitEnd The first bit not to set.
4772 */
4773DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4774{
4775 if (iBitStart < iBitEnd)
4776 {
4777 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4778 int32_t iStart = iBitStart & ~31;
4779 int32_t iEnd = iBitEnd & ~31;
4780 if (iStart == iEnd)
4781 *pu32 |= ((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31);
4782 else
4783 {
4784 /* bits in first dword. */
4785 if (iBitStart & 31)
4786 {
4787 *pu32 |= ~((UINT32_C(1) << (iBitStart & 31)) - 1);
4788 pu32++;
4789 iBitStart = iStart + 32;
4790 }
4791
4792 /* whole dword. */
4793 if (iBitStart != iEnd)
4794 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
4795
4796 /* bits in last dword. */
4797 if (iBitEnd & 31)
4798 {
4799 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4800 *pu32 |= (UINT32_C(1) << (iBitEnd & 31)) - 1;
4801 }
4802 }
4803 }
4804}
4805
4806
4807/**
4808 * Finds the first clear bit in a bitmap.
4809 *
4810 * @returns Index of the first zero bit.
4811 * @returns -1 if no clear bit was found.
4812 * @param pvBitmap Pointer to the bitmap.
4813 * @param cBits The number of bits in the bitmap. Multiple of 32.
4814 */
4815#if RT_INLINE_ASM_EXTERNAL
4816DECLASM(int32_t) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
4817#else
4818DECLINLINE(int32_t) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
4819{
4820 if (cBits)
4821 {
4822 int32_t iBit;
4823# if RT_INLINE_ASM_GNU_STYLE
4824 RTCCUINTREG uEAX, uECX, uEDI;
4825 cBits = RT_ALIGN_32(cBits, 32);
4826 __asm__ __volatile__("repe; scasl\n\t"
4827 "je 1f\n\t"
4828# ifdef RT_ARCH_AMD64
4829 "lea -4(%%rdi), %%rdi\n\t"
4830 "xorl (%%rdi), %%eax\n\t"
4831 "subq %5, %%rdi\n\t"
4832# else
4833 "lea -4(%%edi), %%edi\n\t"
4834 "xorl (%%edi), %%eax\n\t"
4835 "subl %5, %%edi\n\t"
4836# endif
4837 "shll $3, %%edi\n\t"
4838 "bsfl %%eax, %%edx\n\t"
4839 "addl %%edi, %%edx\n\t"
4840 "1:\t\n"
4841 : "=d" (iBit),
4842 "=&c" (uECX),
4843 "=&D" (uEDI),
4844 "=&a" (uEAX)
4845 : "0" (0xffffffff),
4846 "mr" (pvBitmap),
4847 "1" (cBits >> 5),
4848 "2" (pvBitmap),
4849 "3" (0xffffffff));
4850# else
4851 cBits = RT_ALIGN_32(cBits, 32);
4852 __asm
4853 {
4854# ifdef RT_ARCH_AMD64
4855 mov rdi, [pvBitmap]
4856 mov rbx, rdi
4857# else
4858 mov edi, [pvBitmap]
4859 mov ebx, edi
4860# endif
4861 mov edx, 0ffffffffh
4862 mov eax, edx
4863 mov ecx, [cBits]
4864 shr ecx, 5
4865 repe scasd
4866 je done
4867
4868# ifdef RT_ARCH_AMD64
4869 lea rdi, [rdi - 4]
4870 xor eax, [rdi]
4871 sub rdi, rbx
4872# else
4873 lea edi, [edi - 4]
4874 xor eax, [edi]
4875 sub edi, ebx
4876# endif
4877 shl edi, 3
4878 bsf edx, eax
4879 add edx, edi
4880 done:
4881 mov [iBit], edx
4882 }
4883# endif
4884 return iBit;
4885 }
4886 return -1;
4887}
4888#endif
4889
4890
4891/**
4892 * Finds the next clear bit in a bitmap.
4893 *
4894 * @returns Index of the first zero bit.
4895 * @returns -1 if no clear bit was found.
4896 * @param pvBitmap Pointer to the bitmap.
4897 * @param cBits The number of bits in the bitmap. Multiple of 32.
4898 * @param iBitPrev The bit returned from the last search.
4899 * The search will start at iBitPrev + 1.
4900 */
4901#if RT_INLINE_ASM_EXTERNAL
4902DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4903#else
4904DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4905{
4906 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4907 int iBit = ++iBitPrev & 31;
4908 if (iBit)
4909 {
4910 /*
4911 * Inspect the 32-bit word containing the unaligned bit.
4912 */
4913 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
4914
4915# if RT_INLINE_ASM_USES_INTRIN
4916 unsigned long ulBit = 0;
4917 if (_BitScanForward(&ulBit, u32))
4918 return ulBit + iBitPrev;
4919# else
4920# if RT_INLINE_ASM_GNU_STYLE
4921 __asm__ __volatile__("bsf %1, %0\n\t"
4922 "jnz 1f\n\t"
4923 "movl $-1, %0\n\t"
4924 "1:\n\t"
4925 : "=r" (iBit)
4926 : "r" (u32));
4927# else
4928 __asm
4929 {
4930 mov edx, [u32]
4931 bsf eax, edx
4932 jnz done
4933 mov eax, 0ffffffffh
4934 done:
4935 mov [iBit], eax
4936 }
4937# endif
4938 if (iBit >= 0)
4939 return iBit + iBitPrev;
4940# endif
4941
4942 /*
4943 * Skip ahead and see if there is anything left to search.
4944 */
4945 iBitPrev |= 31;
4946 iBitPrev++;
4947 if (cBits <= (uint32_t)iBitPrev)
4948 return -1;
4949 }
4950
4951 /*
4952 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4953 */
4954 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4955 if (iBit >= 0)
4956 iBit += iBitPrev;
4957 return iBit;
4958}
4959#endif
4960
4961
4962/**
4963 * Finds the first set bit in a bitmap.
4964 *
4965 * @returns Index of the first set bit.
4966 * @returns -1 if no clear bit was found.
4967 * @param pvBitmap Pointer to the bitmap.
4968 * @param cBits The number of bits in the bitmap. Multiple of 32.
4969 */
4970#if RT_INLINE_ASM_EXTERNAL
4971DECLASM(int32_t) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
4972#else
4973DECLINLINE(int32_t) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
4974{
4975 if (cBits)
4976 {
4977 int32_t iBit;
4978# if RT_INLINE_ASM_GNU_STYLE
4979 RTCCUINTREG uEAX, uECX, uEDI;
4980 cBits = RT_ALIGN_32(cBits, 32);
4981 __asm__ __volatile__("repe; scasl\n\t"
4982 "je 1f\n\t"
4983# ifdef RT_ARCH_AMD64
4984 "lea -4(%%rdi), %%rdi\n\t"
4985 "movl (%%rdi), %%eax\n\t"
4986 "subq %5, %%rdi\n\t"
4987# else
4988 "lea -4(%%edi), %%edi\n\t"
4989 "movl (%%edi), %%eax\n\t"
4990 "subl %5, %%edi\n\t"
4991# endif
4992 "shll $3, %%edi\n\t"
4993 "bsfl %%eax, %%edx\n\t"
4994 "addl %%edi, %%edx\n\t"
4995 "1:\t\n"
4996 : "=d" (iBit),
4997 "=&c" (uECX),
4998 "=&D" (uEDI),
4999 "=&a" (uEAX)
5000 : "0" (0xffffffff),
5001 "mr" (pvBitmap),
5002 "1" (cBits >> 5),
5003 "2" (pvBitmap),
5004 "3" (0));
5005# else
5006 cBits = RT_ALIGN_32(cBits, 32);
5007 __asm
5008 {
5009# ifdef RT_ARCH_AMD64
5010 mov rdi, [pvBitmap]
5011 mov rbx, rdi
5012# else
5013 mov edi, [pvBitmap]
5014 mov ebx, edi
5015# endif
5016 mov edx, 0ffffffffh
5017 xor eax, eax
5018 mov ecx, [cBits]
5019 shr ecx, 5
5020 repe scasd
5021 je done
5022# ifdef RT_ARCH_AMD64
5023 lea rdi, [rdi - 4]
5024 mov eax, [rdi]
5025 sub rdi, rbx
5026# else
5027 lea edi, [edi - 4]
5028 mov eax, [edi]
5029 sub edi, ebx
5030# endif
5031 shl edi, 3
5032 bsf edx, eax
5033 add edx, edi
5034 done:
5035 mov [iBit], edx
5036 }
5037# endif
5038 return iBit;
5039 }
5040 return -1;
5041}
5042#endif
5043
5044
5045/**
5046 * Finds the next set bit in a bitmap.
5047 *
5048 * @returns Index of the next set bit.
5049 * @returns -1 if no set bit was found.
5050 * @param pvBitmap Pointer to the bitmap.
5051 * @param cBits The number of bits in the bitmap. Multiple of 32.
5052 * @param iBitPrev The bit returned from the last search.
5053 * The search will start at iBitPrev + 1.
5054 */
5055#if RT_INLINE_ASM_EXTERNAL
5056DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5057#else
5058DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5059{
5060 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
5061 int iBit = ++iBitPrev & 31;
5062 if (iBit)
5063 {
5064 /*
5065 * Inspect the 32-bit word containing the unaligned bit.
5066 */
5067 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
5068
5069# if RT_INLINE_ASM_USES_INTRIN
5070 unsigned long ulBit = 0;
5071 if (_BitScanForward(&ulBit, u32))
5072 return ulBit + iBitPrev;
5073# else
5074# if RT_INLINE_ASM_GNU_STYLE
5075 __asm__ __volatile__("bsf %1, %0\n\t"
5076 "jnz 1f\n\t"
5077 "movl $-1, %0\n\t"
5078 "1:\n\t"
5079 : "=r" (iBit)
5080 : "r" (u32));
5081# else
5082 __asm
5083 {
5084 mov edx, [u32]
5085 bsf eax, edx
5086 jnz done
5087 mov eax, 0ffffffffh
5088 done:
5089 mov [iBit], eax
5090 }
5091# endif
5092 if (iBit >= 0)
5093 return iBit + iBitPrev;
5094# endif
5095
5096 /*
5097 * Skip ahead and see if there is anything left to search.
5098 */
5099 iBitPrev |= 31;
5100 iBitPrev++;
5101 if (cBits <= (uint32_t)iBitPrev)
5102 return -1;
5103 }
5104
5105 /*
5106 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
5107 */
5108 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
5109 if (iBit >= 0)
5110 iBit += iBitPrev;
5111 return iBit;
5112}
5113#endif
5114
5115
5116/**
5117 * Finds the first bit which is set in the given 32-bit integer.
5118 * Bits are numbered from 1 (least significant) to 32.
5119 *
5120 * @returns index [1..32] of the first set bit.
5121 * @returns 0 if all bits are cleared.
5122 * @param u32 Integer to search for set bits.
5123 * @remarks Similar to ffs() in BSD.
5124 */
5125#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5126DECLASM(unsigned) ASMBitFirstSetU32(uint32_t u32);
5127#else
5128DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5129{
5130# if RT_INLINE_ASM_USES_INTRIN
5131 unsigned long iBit;
5132 if (_BitScanForward(&iBit, u32))
5133 iBit++;
5134 else
5135 iBit = 0;
5136# elif RT_INLINE_ASM_GNU_STYLE
5137 uint32_t iBit;
5138 __asm__ __volatile__("bsf %1, %0\n\t"
5139 "jnz 1f\n\t"
5140 "xorl %0, %0\n\t"
5141 "jmp 2f\n"
5142 "1:\n\t"
5143 "incl %0\n"
5144 "2:\n\t"
5145 : "=r" (iBit)
5146 : "rm" (u32));
5147# else
5148 uint32_t iBit;
5149 _asm
5150 {
5151 bsf eax, [u32]
5152 jnz found
5153 xor eax, eax
5154 jmp done
5155 found:
5156 inc eax
5157 done:
5158 mov [iBit], eax
5159 }
5160# endif
5161 return iBit;
5162}
5163#endif
5164
5165
5166/**
5167 * Finds the first bit which is set in the given 32-bit integer.
5168 * Bits are numbered from 1 (least significant) to 32.
5169 *
5170 * @returns index [1..32] of the first set bit.
5171 * @returns 0 if all bits are cleared.
5172 * @param i32 Integer to search for set bits.
5173 * @remark Similar to ffs() in BSD.
5174 */
5175DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5176{
5177 return ASMBitFirstSetU32((uint32_t)i32);
5178}
5179
5180
5181/**
5182 * Finds the first bit which is set in the given 64-bit integer.
5183 *
5184 * Bits are numbered from 1 (least significant) to 64.
5185 *
5186 * @returns index [1..64] of the first set bit.
5187 * @returns 0 if all bits are cleared.
5188 * @param u64 Integer to search for set bits.
5189 * @remarks Similar to ffs() in BSD.
5190 */
5191#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5192DECLASM(unsigned) ASMBitFirstSetU64(uint64_t u64);
5193#else
5194DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64)
5195{
5196# if RT_INLINE_ASM_USES_INTRIN
5197 unsigned long iBit;
5198# if ARCH_BITS == 64
5199 if (_BitScanForward64(&iBit, u64))
5200 iBit++;
5201 else
5202 iBit = 0;
5203# else
5204 if (_BitScanForward(&iBit, (uint32_t)u64))
5205 iBit++;
5206 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
5207 iBit += 33;
5208 else
5209 iBit = 0;
5210# endif
5211# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5212 uint64_t iBit;
5213 __asm__ __volatile__("bsfq %1, %0\n\t"
5214 "jnz 1f\n\t"
5215 "xorl %0, %0\n\t"
5216 "jmp 2f\n"
5217 "1:\n\t"
5218 "incl %0\n"
5219 "2:\n\t"
5220 : "=r" (iBit)
5221 : "rm" (u64));
5222# else
5223 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
5224 if (!iBit)
5225 {
5226 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
5227 if (iBit)
5228 iBit += 32;
5229 }
5230# endif
5231 return (unsigned)iBit;
5232}
5233#endif
5234
5235
5236/**
5237 * Finds the first bit which is set in the given 16-bit integer.
5238 *
5239 * Bits are numbered from 1 (least significant) to 16.
5240 *
5241 * @returns index [1..16] of the first set bit.
5242 * @returns 0 if all bits are cleared.
5243 * @param u16 Integer to search for set bits.
5244 * @remarks For 16-bit bs3kit code.
5245 */
5246#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5247DECLASM(unsigned) ASMBitFirstSetU16(uint16_t u16);
5248#else
5249DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16)
5250{
5251 return ASMBitFirstSetU32((uint32_t)u16);
5252}
5253#endif
5254
5255
5256/**
5257 * Finds the last bit which is set in the given 32-bit integer.
5258 * Bits are numbered from 1 (least significant) to 32.
5259 *
5260 * @returns index [1..32] of the last set bit.
5261 * @returns 0 if all bits are cleared.
5262 * @param u32 Integer to search for set bits.
5263 * @remark Similar to fls() in BSD.
5264 */
5265#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5266DECLASM(unsigned) ASMBitLastSetU32(uint32_t u32);
5267#else
5268DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5269{
5270# if RT_INLINE_ASM_USES_INTRIN
5271 unsigned long iBit;
5272 if (_BitScanReverse(&iBit, u32))
5273 iBit++;
5274 else
5275 iBit = 0;
5276# elif RT_INLINE_ASM_GNU_STYLE
5277 uint32_t iBit;
5278 __asm__ __volatile__("bsrl %1, %0\n\t"
5279 "jnz 1f\n\t"
5280 "xorl %0, %0\n\t"
5281 "jmp 2f\n"
5282 "1:\n\t"
5283 "incl %0\n"
5284 "2:\n\t"
5285 : "=r" (iBit)
5286 : "rm" (u32));
5287# else
5288 uint32_t iBit;
5289 _asm
5290 {
5291 bsr eax, [u32]
5292 jnz found
5293 xor eax, eax
5294 jmp done
5295 found:
5296 inc eax
5297 done:
5298 mov [iBit], eax
5299 }
5300# endif
5301 return iBit;
5302}
5303#endif
5304
5305
5306/**
5307 * Finds the last bit which is set in the given 32-bit integer.
5308 * Bits are numbered from 1 (least significant) to 32.
5309 *
5310 * @returns index [1..32] of the last set bit.
5311 * @returns 0 if all bits are cleared.
5312 * @param i32 Integer to search for set bits.
5313 * @remark Similar to fls() in BSD.
5314 */
5315DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5316{
5317 return ASMBitLastSetU32((uint32_t)i32);
5318}
5319
5320
5321/**
5322 * Finds the last bit which is set in the given 64-bit integer.
5323 *
5324 * Bits are numbered from 1 (least significant) to 64.
5325 *
5326 * @returns index [1..64] of the last set bit.
5327 * @returns 0 if all bits are cleared.
5328 * @param u64 Integer to search for set bits.
5329 * @remark Similar to fls() in BSD.
5330 */
5331#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5332DECLASM(unsigned) ASMBitLastSetU64(uint64_t u64);
5333#else
5334DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64)
5335{
5336# if RT_INLINE_ASM_USES_INTRIN
5337 unsigned long iBit;
5338# if ARCH_BITS == 64
5339 if (_BitScanReverse64(&iBit, u64))
5340 iBit++;
5341 else
5342 iBit = 0;
5343# else
5344 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
5345 iBit += 33;
5346 else if (_BitScanReverse(&iBit, (uint32_t)u64))
5347 iBit++;
5348 else
5349 iBit = 0;
5350# endif
5351# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5352 uint64_t iBit;
5353 __asm__ __volatile__("bsrq %1, %0\n\t"
5354 "jnz 1f\n\t"
5355 "xorl %0, %0\n\t"
5356 "jmp 2f\n"
5357 "1:\n\t"
5358 "incl %0\n"
5359 "2:\n\t"
5360 : "=r" (iBit)
5361 : "rm" (u64));
5362# else
5363 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
5364 if (iBit)
5365 iBit += 32;
5366 else
5367 iBit = ASMBitLastSetU32((uint32_t)u64);
5368#endif
5369 return (unsigned)iBit;
5370}
5371#endif
5372
5373
5374/**
5375 * Finds the last bit which is set in the given 16-bit integer.
5376 *
5377 * Bits are numbered from 1 (least significant) to 16.
5378 *
5379 * @returns index [1..16] of the last set bit.
5380 * @returns 0 if all bits are cleared.
5381 * @param u16 Integer to search for set bits.
5382 * @remarks For 16-bit bs3kit code.
5383 */
5384#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5385DECLASM(unsigned) ASMBitLastSetU16(uint16_t u16);
5386#else
5387DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16)
5388{
5389 return ASMBitLastSetU32((uint32_t)u16);
5390}
5391#endif
5392
5393
5394/**
5395 * Reverse the byte order of the given 16-bit integer.
5396 *
5397 * @returns Revert
5398 * @param u16 16-bit integer value.
5399 */
5400#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5401DECLASM(uint16_t) ASMByteSwapU16(uint16_t u16);
5402#else
5403DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
5404{
5405# if RT_INLINE_ASM_USES_INTRIN
5406 u16 = _byteswap_ushort(u16);
5407# elif RT_INLINE_ASM_GNU_STYLE
5408 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
5409# else
5410 _asm
5411 {
5412 mov ax, [u16]
5413 ror ax, 8
5414 mov [u16], ax
5415 }
5416# endif
5417 return u16;
5418}
5419#endif
5420
5421
5422/**
5423 * Reverse the byte order of the given 32-bit integer.
5424 *
5425 * @returns Revert
5426 * @param u32 32-bit integer value.
5427 */
5428#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5429DECLASM(uint32_t) ASMByteSwapU32(uint32_t u32);
5430#else
5431DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5432{
5433# if RT_INLINE_ASM_USES_INTRIN
5434 u32 = _byteswap_ulong(u32);
5435# elif RT_INLINE_ASM_GNU_STYLE
5436 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5437# else
5438 _asm
5439 {
5440 mov eax, [u32]
5441 bswap eax
5442 mov [u32], eax
5443 }
5444# endif
5445 return u32;
5446}
5447#endif
5448
5449
5450/**
5451 * Reverse the byte order of the given 64-bit integer.
5452 *
5453 * @returns Revert
5454 * @param u64 64-bit integer value.
5455 */
5456DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
5457{
5458#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
5459 u64 = _byteswap_uint64(u64);
5460#else
5461 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
5462 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
5463#endif
5464 return u64;
5465}
5466
5467
5468/**
5469 * Rotate 32-bit unsigned value to the left by @a cShift.
5470 *
5471 * @returns Rotated value.
5472 * @param u32 The value to rotate.
5473 * @param cShift How many bits to rotate by.
5474 */
5475#ifdef __WATCOMC__
5476DECLASM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift);
5477#else
5478DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift)
5479{
5480# if RT_INLINE_ASM_USES_INTRIN
5481 return _rotl(u32, cShift);
5482# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5483 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5484 return u32;
5485# else
5486 cShift &= 31;
5487 return (u32 << cShift) | (u32 >> (32 - cShift));
5488# endif
5489}
5490#endif
5491
5492
5493/**
5494 * Rotate 32-bit unsigned value to the right by @a cShift.
5495 *
5496 * @returns Rotated value.
5497 * @param u32 The value to rotate.
5498 * @param cShift How many bits to rotate by.
5499 */
5500#ifdef __WATCOMC__
5501DECLASM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift);
5502#else
5503DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift)
5504{
5505# if RT_INLINE_ASM_USES_INTRIN
5506 return _rotr(u32, cShift);
5507# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5508 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5509 return u32;
5510# else
5511 cShift &= 31;
5512 return (u32 >> cShift) | (u32 << (32 - cShift));
5513# endif
5514}
5515#endif
5516
5517
5518/**
5519 * Rotate 64-bit unsigned value to the left by @a cShift.
5520 *
5521 * @returns Rotated value.
5522 * @param u64 The value to rotate.
5523 * @param cShift How many bits to rotate by.
5524 */
5525DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift)
5526{
5527#if RT_INLINE_ASM_USES_INTRIN
5528 return _rotl64(u64, cShift);
5529#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5530 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5531 return u64;
5532#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5533 uint32_t uSpill;
5534 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5535 "jz 1f\n\t"
5536 "xchgl %%eax, %%edx\n\t"
5537 "1:\n\t"
5538 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5539 "jz 2f\n\t"
5540 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5541 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
5542 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
5543 "2:\n\t" /* } */
5544 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5545 : "0" (u64),
5546 "1" (cShift));
5547 return u64;
5548#else
5549 cShift &= 63;
5550 return (u64 << cShift) | (u64 >> (64 - cShift));
5551#endif
5552}
5553
5554
5555/**
5556 * Rotate 64-bit unsigned value to the right by @a cShift.
5557 *
5558 * @returns Rotated value.
5559 * @param u64 The value to rotate.
5560 * @param cShift How many bits to rotate by.
5561 */
5562DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift)
5563{
5564#if RT_INLINE_ASM_USES_INTRIN
5565 return _rotr64(u64, cShift);
5566#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5567 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5568 return u64;
5569#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5570 uint32_t uSpill;
5571 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5572 "jz 1f\n\t"
5573 "xchgl %%eax, %%edx\n\t"
5574 "1:\n\t"
5575 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5576 "jz 2f\n\t"
5577 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5578 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
5579 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
5580 "2:\n\t" /* } */
5581 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5582 : "0" (u64),
5583 "1" (cShift));
5584 return u64;
5585#else
5586 cShift &= 63;
5587 return (u64 >> cShift) | (u64 << (64 - cShift));
5588#endif
5589}
5590
5591/** @} */
5592
5593
5594/** @} */
5595
5596#endif
5597
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette