VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 68572

Last change on this file since 68572 was 68572, checked in by vboxsync, 7 years ago

merging vbglioc r117744: iprt: Introducing RT_FAR and friends to deal with the world of 16-bit compilers. Made iprt/types.h somewhat 16-bit safe (explicit far pointers, int type fixes).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 158.4 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2016 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37/* Solaris 10 header ugliness */
38#ifdef u
39# undef u
40#endif
41
42#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
43# pragma warning(push)
44# pragma warning(disable:4668) /* Several incorrect __cplusplus uses. */
45# pragma warning(disable:4255) /* Incorrect __slwpcb prototype. */
46# include <intrin.h>
47# pragma warning(pop)
48 /* Emit the intrinsics at all optimization levels. */
49# pragma intrinsic(_ReadWriteBarrier)
50# pragma intrinsic(__cpuid)
51# pragma intrinsic(__stosd)
52# pragma intrinsic(__stosw)
53# pragma intrinsic(__stosb)
54# pragma intrinsic(_BitScanForward)
55# pragma intrinsic(_BitScanReverse)
56# pragma intrinsic(_bittest)
57# pragma intrinsic(_bittestandset)
58# pragma intrinsic(_bittestandreset)
59# pragma intrinsic(_bittestandcomplement)
60# pragma intrinsic(_byteswap_ushort)
61# pragma intrinsic(_byteswap_ulong)
62# pragma intrinsic(_interlockedbittestandset)
63# pragma intrinsic(_interlockedbittestandreset)
64# pragma intrinsic(_InterlockedAnd)
65# pragma intrinsic(_InterlockedOr)
66# pragma intrinsic(_InterlockedIncrement)
67# pragma intrinsic(_InterlockedDecrement)
68# pragma intrinsic(_InterlockedExchange)
69# pragma intrinsic(_InterlockedExchangeAdd)
70# pragma intrinsic(_InterlockedCompareExchange)
71# pragma intrinsic(_InterlockedCompareExchange64)
72# pragma intrinsic(_rotl)
73# pragma intrinsic(_rotr)
74# pragma intrinsic(_rotl64)
75# pragma intrinsic(_rotr64)
76# ifdef RT_ARCH_AMD64
77# pragma intrinsic(__stosq)
78# pragma intrinsic(_byteswap_uint64)
79# pragma intrinsic(_InterlockedExchange64)
80# pragma intrinsic(_InterlockedExchangeAdd64)
81# pragma intrinsic(_InterlockedAnd64)
82# pragma intrinsic(_InterlockedOr64)
83# pragma intrinsic(_InterlockedIncrement64)
84# pragma intrinsic(_InterlockedDecrement64)
85# endif
86#endif
87
88/*
89 * Include #pragma aux definitions for Watcom C/C++.
90 */
91#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
92# include "asm-watcom-x86-16.h"
93#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
94# include "asm-watcom-x86-32.h"
95#endif
96
97
98
99/** @defgroup grp_rt_asm ASM - Assembly Routines
100 * @ingroup grp_rt
101 *
102 * @remarks The difference between ordered and unordered atomic operations are that
103 * the former will complete outstanding reads and writes before continuing
104 * while the latter doesn't make any promises about the order. Ordered
105 * operations doesn't, it seems, make any 100% promise wrt to whether
106 * the operation will complete before any subsequent memory access.
107 * (please, correct if wrong.)
108 *
109 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
110 * are unordered (note the Uo).
111 *
112 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
113 * or even optimize assembler instructions away. For instance, in the following code
114 * the second rdmsr instruction is optimized away because gcc treats that instruction
115 * as deterministic:
116 *
117 * @code
118 * static inline uint64_t rdmsr_low(int idx)
119 * {
120 * uint32_t low;
121 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
122 * }
123 * ...
124 * uint32_t msr1 = rdmsr_low(1);
125 * foo(msr1);
126 * msr1 = rdmsr_low(1);
127 * bar(msr1);
128 * @endcode
129 *
130 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
131 * use the result of the first call as input parameter for bar() as well. For rdmsr this
132 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
133 * machine status information in general.
134 *
135 * @{
136 */
137
138
139/** @def RT_INLINE_ASM_GCC_4_3_X_X86
140 * Used to work around some 4.3.x register allocation issues in this version of
141 * the compiler. So far this workaround is still required for 4.4 and 4.5 but
142 * definitely not for 5.x */
143#if (RT_GNUC_PREREQ(4, 3) && !RT_GNUC_PREREQ(5, 0) && defined(__i386__))
144# define RT_INLINE_ASM_GCC_4_3_X_X86 1
145#else
146# define RT_INLINE_ASM_GCC_4_3_X_X86 0
147#endif
148
149/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
150 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
151 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
152 * mode, x86.
153 *
154 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
155 * when in PIC mode on x86.
156 */
157#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
158# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
159# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
160# elif defined(_MSC_VER) /* Visual C++ has trouble too, but it'll only tell us when C4688 is enabled. */
161# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
162# elif ( (defined(PIC) || defined(__PIC__)) \
163 && defined(RT_ARCH_X86) \
164 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
165 || defined(RT_OS_DARWIN)) )
166# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
167# else
168# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
169# endif
170#endif
171
172
173/** @def ASMReturnAddress
174 * Gets the return address of the current (or calling if you like) function or method.
175 */
176#ifdef _MSC_VER
177# ifdef __cplusplus
178extern "C"
179# endif
180void * _ReturnAddress(void);
181# pragma intrinsic(_ReturnAddress)
182# define ASMReturnAddress() _ReturnAddress()
183#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
184# define ASMReturnAddress() __builtin_return_address(0)
185#elif defined(__WATCOMC__)
186# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
187#else
188# error "Unsupported compiler."
189#endif
190
191
192/**
193 * Compiler memory barrier.
194 *
195 * Ensure that the compiler does not use any cached (register/tmp stack) memory
196 * values or any outstanding writes when returning from this function.
197 *
198 * This function must be used if non-volatile data is modified by a
199 * device or the VMM. Typical cases are port access, MMIO access,
200 * trapping instruction, etc.
201 */
202#if RT_INLINE_ASM_GNU_STYLE
203# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
204#elif RT_INLINE_ASM_USES_INTRIN
205# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
206#elif defined(__WATCOMC__)
207void ASMCompilerBarrier(void);
208#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
209DECLINLINE(void) ASMCompilerBarrier(void)
210{
211 __asm
212 {
213 }
214}
215#endif
216
217
218/** @def ASMBreakpoint
219 * Debugger Breakpoint.
220 * @deprecated Use RT_BREAKPOINT instead.
221 * @internal
222 */
223#define ASMBreakpoint() RT_BREAKPOINT()
224
225
226/**
227 * Spinloop hint for platforms that have these, empty function on the other
228 * platforms.
229 *
230 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
231 * spin locks.
232 */
233#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
234DECLASM(void) ASMNopPause(void);
235#else
236DECLINLINE(void) ASMNopPause(void)
237{
238# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
239# if RT_INLINE_ASM_GNU_STYLE
240 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
241# else
242 __asm {
243 _emit 0f3h
244 _emit 090h
245 }
246# endif
247# else
248 /* dummy */
249# endif
250}
251#endif
252
253
254/**
255 * Atomically Exchange an unsigned 8-bit value, ordered.
256 *
257 * @returns Current *pu8 value
258 * @param pu8 Pointer to the 8-bit variable to update.
259 * @param u8 The 8-bit value to assign to *pu8.
260 */
261#if RT_INLINE_ASM_EXTERNAL
262DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
263#else
264DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
265{
266# if RT_INLINE_ASM_GNU_STYLE
267 __asm__ __volatile__("xchgb %0, %1\n\t"
268 : "=m" (*pu8),
269 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
270 : "1" (u8),
271 "m" (*pu8));
272# else
273 __asm
274 {
275# ifdef RT_ARCH_AMD64
276 mov rdx, [pu8]
277 mov al, [u8]
278 xchg [rdx], al
279 mov [u8], al
280# else
281 mov edx, [pu8]
282 mov al, [u8]
283 xchg [edx], al
284 mov [u8], al
285# endif
286 }
287# endif
288 return u8;
289}
290#endif
291
292
293/**
294 * Atomically Exchange a signed 8-bit value, ordered.
295 *
296 * @returns Current *pu8 value
297 * @param pi8 Pointer to the 8-bit variable to update.
298 * @param i8 The 8-bit value to assign to *pi8.
299 */
300DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
301{
302 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
303}
304
305
306/**
307 * Atomically Exchange a bool value, ordered.
308 *
309 * @returns Current *pf value
310 * @param pf Pointer to the 8-bit variable to update.
311 * @param f The 8-bit value to assign to *pi8.
312 */
313DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
314{
315#ifdef _MSC_VER
316 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
317#else
318 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
319#endif
320}
321
322
323/**
324 * Atomically Exchange an unsigned 16-bit value, ordered.
325 *
326 * @returns Current *pu16 value
327 * @param pu16 Pointer to the 16-bit variable to update.
328 * @param u16 The 16-bit value to assign to *pu16.
329 */
330#if RT_INLINE_ASM_EXTERNAL
331DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
332#else
333DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
334{
335# if RT_INLINE_ASM_GNU_STYLE
336 __asm__ __volatile__("xchgw %0, %1\n\t"
337 : "=m" (*pu16),
338 "=r" (u16)
339 : "1" (u16),
340 "m" (*pu16));
341# else
342 __asm
343 {
344# ifdef RT_ARCH_AMD64
345 mov rdx, [pu16]
346 mov ax, [u16]
347 xchg [rdx], ax
348 mov [u16], ax
349# else
350 mov edx, [pu16]
351 mov ax, [u16]
352 xchg [edx], ax
353 mov [u16], ax
354# endif
355 }
356# endif
357 return u16;
358}
359#endif
360
361
362/**
363 * Atomically Exchange a signed 16-bit value, ordered.
364 *
365 * @returns Current *pu16 value
366 * @param pi16 Pointer to the 16-bit variable to update.
367 * @param i16 The 16-bit value to assign to *pi16.
368 */
369DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
370{
371 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
372}
373
374
375/**
376 * Atomically Exchange an unsigned 32-bit value, ordered.
377 *
378 * @returns Current *pu32 value
379 * @param pu32 Pointer to the 32-bit variable to update.
380 * @param u32 The 32-bit value to assign to *pu32.
381 *
382 * @remarks Does not work on 286 and earlier.
383 */
384#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
385DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
386#else
387DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
388{
389# if RT_INLINE_ASM_GNU_STYLE
390 __asm__ __volatile__("xchgl %0, %1\n\t"
391 : "=m" (*pu32),
392 "=r" (u32)
393 : "1" (u32),
394 "m" (*pu32));
395
396# elif RT_INLINE_ASM_USES_INTRIN
397 u32 = _InterlockedExchange((long *)pu32, u32);
398
399# else
400 __asm
401 {
402# ifdef RT_ARCH_AMD64
403 mov rdx, [pu32]
404 mov eax, u32
405 xchg [rdx], eax
406 mov [u32], eax
407# else
408 mov edx, [pu32]
409 mov eax, u32
410 xchg [edx], eax
411 mov [u32], eax
412# endif
413 }
414# endif
415 return u32;
416}
417#endif
418
419
420/**
421 * Atomically Exchange a signed 32-bit value, ordered.
422 *
423 * @returns Current *pu32 value
424 * @param pi32 Pointer to the 32-bit variable to update.
425 * @param i32 The 32-bit value to assign to *pi32.
426 */
427DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
428{
429 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
430}
431
432
433/**
434 * Atomically Exchange an unsigned 64-bit value, ordered.
435 *
436 * @returns Current *pu64 value
437 * @param pu64 Pointer to the 64-bit variable to update.
438 * @param u64 The 64-bit value to assign to *pu64.
439 *
440 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
441 */
442#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
443 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
444DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
445#else
446DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
447{
448# if defined(RT_ARCH_AMD64)
449# if RT_INLINE_ASM_USES_INTRIN
450 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
451
452# elif RT_INLINE_ASM_GNU_STYLE
453 __asm__ __volatile__("xchgq %0, %1\n\t"
454 : "=m" (*pu64),
455 "=r" (u64)
456 : "1" (u64),
457 "m" (*pu64));
458# else
459 __asm
460 {
461 mov rdx, [pu64]
462 mov rax, [u64]
463 xchg [rdx], rax
464 mov [u64], rax
465 }
466# endif
467# else /* !RT_ARCH_AMD64 */
468# if RT_INLINE_ASM_GNU_STYLE
469# if defined(PIC) || defined(__PIC__)
470 uint32_t u32EBX = (uint32_t)u64;
471 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
472 "xchgl %%ebx, %3\n\t"
473 "1:\n\t"
474 "lock; cmpxchg8b (%5)\n\t"
475 "jnz 1b\n\t"
476 "movl %3, %%ebx\n\t"
477 /*"xchgl %%esi, %5\n\t"*/
478 : "=A" (u64),
479 "=m" (*pu64)
480 : "0" (*pu64),
481 "m" ( u32EBX ),
482 "c" ( (uint32_t)(u64 >> 32) ),
483 "S" (pu64));
484# else /* !PIC */
485 __asm__ __volatile__("1:\n\t"
486 "lock; cmpxchg8b %1\n\t"
487 "jnz 1b\n\t"
488 : "=A" (u64),
489 "=m" (*pu64)
490 : "0" (*pu64),
491 "b" ( (uint32_t)u64 ),
492 "c" ( (uint32_t)(u64 >> 32) ));
493# endif
494# else
495 __asm
496 {
497 mov ebx, dword ptr [u64]
498 mov ecx, dword ptr [u64 + 4]
499 mov edi, pu64
500 mov eax, dword ptr [edi]
501 mov edx, dword ptr [edi + 4]
502 retry:
503 lock cmpxchg8b [edi]
504 jnz retry
505 mov dword ptr [u64], eax
506 mov dword ptr [u64 + 4], edx
507 }
508# endif
509# endif /* !RT_ARCH_AMD64 */
510 return u64;
511}
512#endif
513
514
515/**
516 * Atomically Exchange an signed 64-bit value, ordered.
517 *
518 * @returns Current *pi64 value
519 * @param pi64 Pointer to the 64-bit variable to update.
520 * @param i64 The 64-bit value to assign to *pi64.
521 */
522DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
523{
524 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
525}
526
527
528/**
529 * Atomically Exchange a pointer value, ordered.
530 *
531 * @returns Current *ppv value
532 * @param ppv Pointer to the pointer variable to update.
533 * @param pv The pointer value to assign to *ppv.
534 */
535DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
536{
537#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_DATA_IS_FAR)
538 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
539#elif ARCH_BITS == 64
540 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
541#else
542# error "ARCH_BITS is bogus"
543#endif
544}
545
546
547/**
548 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
549 *
550 * @returns Current *pv value
551 * @param ppv Pointer to the pointer variable to update.
552 * @param pv The pointer value to assign to *ppv.
553 * @param Type The type of *ppv, sans volatile.
554 */
555#ifdef __GNUC__
556# define ASMAtomicXchgPtrT(ppv, pv, Type) \
557 __extension__ \
558 ({\
559 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
560 Type const pvTypeChecked = (pv); \
561 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
562 pvTypeCheckedRet; \
563 })
564#else
565# define ASMAtomicXchgPtrT(ppv, pv, Type) \
566 (Type)ASMAtomicXchgPtr((void * volatile *)(ppv), (void *)(pv))
567#endif
568
569
570/**
571 * Atomically Exchange a raw-mode context pointer value, ordered.
572 *
573 * @returns Current *ppv value
574 * @param ppvRC Pointer to the pointer variable to update.
575 * @param pvRC The pointer value to assign to *ppv.
576 */
577DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
578{
579 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
580}
581
582
583/**
584 * Atomically Exchange a ring-0 pointer value, ordered.
585 *
586 * @returns Current *ppv value
587 * @param ppvR0 Pointer to the pointer variable to update.
588 * @param pvR0 The pointer value to assign to *ppv.
589 */
590DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
591{
592#if R0_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_DATA_IS_FAR)
593 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
594#elif R0_ARCH_BITS == 64
595 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
596#else
597# error "R0_ARCH_BITS is bogus"
598#endif
599}
600
601
602/**
603 * Atomically Exchange a ring-3 pointer value, ordered.
604 *
605 * @returns Current *ppv value
606 * @param ppvR3 Pointer to the pointer variable to update.
607 * @param pvR3 The pointer value to assign to *ppv.
608 */
609DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
610{
611#if R3_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_DATA_IS_FAR)
612 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
613#elif R3_ARCH_BITS == 64
614 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
615#else
616# error "R3_ARCH_BITS is bogus"
617#endif
618}
619
620
621/** @def ASMAtomicXchgHandle
622 * Atomically Exchange a typical IPRT handle value, ordered.
623 *
624 * @param ph Pointer to the value to update.
625 * @param hNew The new value to assigned to *pu.
626 * @param phRes Where to store the current *ph value.
627 *
628 * @remarks This doesn't currently work for all handles (like RTFILE).
629 */
630#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_DATA_IS_FAR)
631# define ASMAtomicXchgHandle(ph, hNew, phRes) \
632 do { \
633 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
634 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
635 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
636 } while (0)
637#elif HC_ARCH_BITS == 64
638# define ASMAtomicXchgHandle(ph, hNew, phRes) \
639 do { \
640 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
641 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
642 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
643 } while (0)
644#else
645# error HC_ARCH_BITS
646#endif
647
648
649/**
650 * Atomically Exchange a value which size might differ
651 * between platforms or compilers, ordered.
652 *
653 * @param pu Pointer to the variable to update.
654 * @param uNew The value to assign to *pu.
655 * @todo This is busted as its missing the result argument.
656 */
657#define ASMAtomicXchgSize(pu, uNew) \
658 do { \
659 switch (sizeof(*(pu))) { \
660 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
661 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
662 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
663 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
664 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
665 } \
666 } while (0)
667
668/**
669 * Atomically Exchange a value which size might differ
670 * between platforms or compilers, ordered.
671 *
672 * @param pu Pointer to the variable to update.
673 * @param uNew The value to assign to *pu.
674 * @param puRes Where to store the current *pu value.
675 */
676#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
677 do { \
678 switch (sizeof(*(pu))) { \
679 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
680 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
681 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
682 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
683 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
684 } \
685 } while (0)
686
687
688
689/**
690 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
691 *
692 * @returns true if xchg was done.
693 * @returns false if xchg wasn't done.
694 *
695 * @param pu8 Pointer to the value to update.
696 * @param u8New The new value to assigned to *pu8.
697 * @param u8Old The old value to *pu8 compare with.
698 *
699 * @remarks x86: Requires a 486 or later.
700 */
701#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
702DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, const uint8_t u8Old);
703#else
704DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, uint8_t u8Old)
705{
706 uint8_t u8Ret;
707 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
708 "setz %1\n\t"
709 : "=m" (*pu8),
710 "=qm" (u8Ret),
711 "=a" (u8Old)
712 : "q" (u8New),
713 "2" (u8Old),
714 "m" (*pu8));
715 return (bool)u8Ret;
716}
717#endif
718
719
720/**
721 * Atomically Compare and Exchange a signed 8-bit value, ordered.
722 *
723 * @returns true if xchg was done.
724 * @returns false if xchg wasn't done.
725 *
726 * @param pi8 Pointer to the value to update.
727 * @param i8New The new value to assigned to *pi8.
728 * @param i8Old The old value to *pi8 compare with.
729 *
730 * @remarks x86: Requires a 486 or later.
731 */
732DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t *pi8, const int8_t i8New, const int8_t i8Old)
733{
734 return ASMAtomicCmpXchgU8((volatile uint8_t *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old);
735}
736
737
738/**
739 * Atomically Compare and Exchange a bool value, ordered.
740 *
741 * @returns true if xchg was done.
742 * @returns false if xchg wasn't done.
743 *
744 * @param pf Pointer to the value to update.
745 * @param fNew The new value to assigned to *pf.
746 * @param fOld The old value to *pf compare with.
747 *
748 * @remarks x86: Requires a 486 or later.
749 */
750DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool *pf, const bool fNew, const bool fOld)
751{
752 return ASMAtomicCmpXchgU8((volatile uint8_t *)pf, (const uint8_t)fNew, (const uint8_t)fOld);
753}
754
755
756/**
757 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
758 *
759 * @returns true if xchg was done.
760 * @returns false if xchg wasn't done.
761 *
762 * @param pu32 Pointer to the value to update.
763 * @param u32New The new value to assigned to *pu32.
764 * @param u32Old The old value to *pu32 compare with.
765 *
766 * @remarks x86: Requires a 486 or later.
767 */
768#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
769DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
770#else
771DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
772{
773# if RT_INLINE_ASM_GNU_STYLE
774 uint8_t u8Ret;
775 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
776 "setz %1\n\t"
777 : "=m" (*pu32),
778 "=qm" (u8Ret),
779 "=a" (u32Old)
780 : "r" (u32New),
781 "2" (u32Old),
782 "m" (*pu32));
783 return (bool)u8Ret;
784
785# elif RT_INLINE_ASM_USES_INTRIN
786 return (uint32_t)_InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
787
788# else
789 uint32_t u32Ret;
790 __asm
791 {
792# ifdef RT_ARCH_AMD64
793 mov rdx, [pu32]
794# else
795 mov edx, [pu32]
796# endif
797 mov eax, [u32Old]
798 mov ecx, [u32New]
799# ifdef RT_ARCH_AMD64
800 lock cmpxchg [rdx], ecx
801# else
802 lock cmpxchg [edx], ecx
803# endif
804 setz al
805 movzx eax, al
806 mov [u32Ret], eax
807 }
808 return !!u32Ret;
809# endif
810}
811#endif
812
813
814/**
815 * Atomically Compare and Exchange a signed 32-bit value, ordered.
816 *
817 * @returns true if xchg was done.
818 * @returns false if xchg wasn't done.
819 *
820 * @param pi32 Pointer to the value to update.
821 * @param i32New The new value to assigned to *pi32.
822 * @param i32Old The old value to *pi32 compare with.
823 *
824 * @remarks x86: Requires a 486 or later.
825 */
826DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
827{
828 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
829}
830
831
832/**
833 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
834 *
835 * @returns true if xchg was done.
836 * @returns false if xchg wasn't done.
837 *
838 * @param pu64 Pointer to the 64-bit variable to update.
839 * @param u64New The 64-bit value to assign to *pu64.
840 * @param u64Old The value to compare with.
841 *
842 * @remarks x86: Requires a Pentium or later.
843 */
844#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
845 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
846DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
847#else
848DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, uint64_t u64New, uint64_t u64Old)
849{
850# if RT_INLINE_ASM_USES_INTRIN
851 return (uint64_t)_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
852
853# elif defined(RT_ARCH_AMD64)
854# if RT_INLINE_ASM_GNU_STYLE
855 uint8_t u8Ret;
856 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
857 "setz %1\n\t"
858 : "=m" (*pu64),
859 "=qm" (u8Ret),
860 "=a" (u64Old)
861 : "r" (u64New),
862 "2" (u64Old),
863 "m" (*pu64));
864 return (bool)u8Ret;
865# else
866 bool fRet;
867 __asm
868 {
869 mov rdx, [pu32]
870 mov rax, [u64Old]
871 mov rcx, [u64New]
872 lock cmpxchg [rdx], rcx
873 setz al
874 mov [fRet], al
875 }
876 return fRet;
877# endif
878# else /* !RT_ARCH_AMD64 */
879 uint32_t u32Ret;
880# if RT_INLINE_ASM_GNU_STYLE
881# if defined(PIC) || defined(__PIC__)
882 uint32_t u32EBX = (uint32_t)u64New;
883 uint32_t u32Spill;
884 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
885 "lock; cmpxchg8b (%6)\n\t"
886 "setz %%al\n\t"
887 "movl %4, %%ebx\n\t"
888 "movzbl %%al, %%eax\n\t"
889 : "=a" (u32Ret),
890 "=d" (u32Spill),
891# if RT_GNUC_PREREQ(4, 3)
892 "+m" (*pu64)
893# else
894 "=m" (*pu64)
895# endif
896 : "A" (u64Old),
897 "m" ( u32EBX ),
898 "c" ( (uint32_t)(u64New >> 32) ),
899 "S" (pu64));
900# else /* !PIC */
901 uint32_t u32Spill;
902 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
903 "setz %%al\n\t"
904 "movzbl %%al, %%eax\n\t"
905 : "=a" (u32Ret),
906 "=d" (u32Spill),
907 "+m" (*pu64)
908 : "A" (u64Old),
909 "b" ( (uint32_t)u64New ),
910 "c" ( (uint32_t)(u64New >> 32) ));
911# endif
912 return (bool)u32Ret;
913# else
914 __asm
915 {
916 mov ebx, dword ptr [u64New]
917 mov ecx, dword ptr [u64New + 4]
918 mov edi, [pu64]
919 mov eax, dword ptr [u64Old]
920 mov edx, dword ptr [u64Old + 4]
921 lock cmpxchg8b [edi]
922 setz al
923 movzx eax, al
924 mov dword ptr [u32Ret], eax
925 }
926 return !!u32Ret;
927# endif
928# endif /* !RT_ARCH_AMD64 */
929}
930#endif
931
932
933/**
934 * Atomically Compare and exchange a signed 64-bit value, ordered.
935 *
936 * @returns true if xchg was done.
937 * @returns false if xchg wasn't done.
938 *
939 * @param pi64 Pointer to the 64-bit variable to update.
940 * @param i64 The 64-bit value to assign to *pu64.
941 * @param i64Old The value to compare with.
942 *
943 * @remarks x86: Requires a Pentium or later.
944 */
945DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
946{
947 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
948}
949
950
951/**
952 * Atomically Compare and Exchange a pointer value, ordered.
953 *
954 * @returns true if xchg was done.
955 * @returns false if xchg wasn't done.
956 *
957 * @param ppv Pointer to the value to update.
958 * @param pvNew The new value to assigned to *ppv.
959 * @param pvOld The old value to *ppv compare with.
960 *
961 * @remarks x86: Requires a 486 or later.
962 */
963DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld)
964{
965#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_DATA_IS_FAR)
966 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
967#elif ARCH_BITS == 64
968 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
969#else
970# error "ARCH_BITS is bogus"
971#endif
972}
973
974
975/**
976 * Atomically Compare and Exchange a pointer value, ordered.
977 *
978 * @returns true if xchg was done.
979 * @returns false if xchg wasn't done.
980 *
981 * @param ppv Pointer to the value to update.
982 * @param pvNew The new value to assigned to *ppv.
983 * @param pvOld The old value to *ppv compare with.
984 *
985 * @remarks This is relatively type safe on GCC platforms.
986 * @remarks x86: Requires a 486 or later.
987 */
988#ifdef __GNUC__
989# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
990 __extension__ \
991 ({\
992 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
993 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
994 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
995 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
996 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
997 fMacroRet; \
998 })
999#else
1000# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1001 ASMAtomicCmpXchgPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld))
1002#endif
1003
1004
1005/** @def ASMAtomicCmpXchgHandle
1006 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1007 *
1008 * @param ph Pointer to the value to update.
1009 * @param hNew The new value to assigned to *pu.
1010 * @param hOld The old value to *pu compare with.
1011 * @param fRc Where to store the result.
1012 *
1013 * @remarks This doesn't currently work for all handles (like RTFILE).
1014 * @remarks x86: Requires a 486 or later.
1015 */
1016#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_DATA_IS_FAR)
1017# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1018 do { \
1019 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1020 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1021 } while (0)
1022#elif HC_ARCH_BITS == 64
1023# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1024 do { \
1025 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1026 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1027 } while (0)
1028#else
1029# error HC_ARCH_BITS
1030#endif
1031
1032
1033/** @def ASMAtomicCmpXchgSize
1034 * Atomically Compare and Exchange a value which size might differ
1035 * between platforms or compilers, ordered.
1036 *
1037 * @param pu Pointer to the value to update.
1038 * @param uNew The new value to assigned to *pu.
1039 * @param uOld The old value to *pu compare with.
1040 * @param fRc Where to store the result.
1041 *
1042 * @remarks x86: Requires a 486 or later.
1043 */
1044#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1045 do { \
1046 switch (sizeof(*(pu))) { \
1047 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1048 break; \
1049 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1050 break; \
1051 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1052 (fRc) = false; \
1053 break; \
1054 } \
1055 } while (0)
1056
1057
1058/**
1059 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1060 * passes back old value, ordered.
1061 *
1062 * @returns true if xchg was done.
1063 * @returns false if xchg wasn't done.
1064 *
1065 * @param pu32 Pointer to the value to update.
1066 * @param u32New The new value to assigned to *pu32.
1067 * @param u32Old The old value to *pu32 compare with.
1068 * @param pu32Old Pointer store the old value at.
1069 *
1070 * @remarks x86: Requires a 486 or later.
1071 */
1072#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1073DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
1074#else
1075DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
1076{
1077# if RT_INLINE_ASM_GNU_STYLE
1078 uint8_t u8Ret;
1079 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1080 "setz %1\n\t"
1081 : "=m" (*pu32),
1082 "=qm" (u8Ret),
1083 "=a" (*pu32Old)
1084 : "r" (u32New),
1085 "a" (u32Old),
1086 "m" (*pu32));
1087 return (bool)u8Ret;
1088
1089# elif RT_INLINE_ASM_USES_INTRIN
1090 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
1091
1092# else
1093 uint32_t u32Ret;
1094 __asm
1095 {
1096# ifdef RT_ARCH_AMD64
1097 mov rdx, [pu32]
1098# else
1099 mov edx, [pu32]
1100# endif
1101 mov eax, [u32Old]
1102 mov ecx, [u32New]
1103# ifdef RT_ARCH_AMD64
1104 lock cmpxchg [rdx], ecx
1105 mov rdx, [pu32Old]
1106 mov [rdx], eax
1107# else
1108 lock cmpxchg [edx], ecx
1109 mov edx, [pu32Old]
1110 mov [edx], eax
1111# endif
1112 setz al
1113 movzx eax, al
1114 mov [u32Ret], eax
1115 }
1116 return !!u32Ret;
1117# endif
1118}
1119#endif
1120
1121
1122/**
1123 * Atomically Compare and Exchange a signed 32-bit value, additionally
1124 * passes back old value, ordered.
1125 *
1126 * @returns true if xchg was done.
1127 * @returns false if xchg wasn't done.
1128 *
1129 * @param pi32 Pointer to the value to update.
1130 * @param i32New The new value to assigned to *pi32.
1131 * @param i32Old The old value to *pi32 compare with.
1132 * @param pi32Old Pointer store the old value at.
1133 *
1134 * @remarks x86: Requires a 486 or later.
1135 */
1136DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
1137{
1138 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
1139}
1140
1141
1142/**
1143 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1144 * passing back old value, ordered.
1145 *
1146 * @returns true if xchg was done.
1147 * @returns false if xchg wasn't done.
1148 *
1149 * @param pu64 Pointer to the 64-bit variable to update.
1150 * @param u64New The 64-bit value to assign to *pu64.
1151 * @param u64Old The value to compare with.
1152 * @param pu64Old Pointer store the old value at.
1153 *
1154 * @remarks x86: Requires a Pentium or later.
1155 */
1156#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1157 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1158DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
1159#else
1160DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
1161{
1162# if RT_INLINE_ASM_USES_INTRIN
1163 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
1164
1165# elif defined(RT_ARCH_AMD64)
1166# if RT_INLINE_ASM_GNU_STYLE
1167 uint8_t u8Ret;
1168 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1169 "setz %1\n\t"
1170 : "=m" (*pu64),
1171 "=qm" (u8Ret),
1172 "=a" (*pu64Old)
1173 : "r" (u64New),
1174 "a" (u64Old),
1175 "m" (*pu64));
1176 return (bool)u8Ret;
1177# else
1178 bool fRet;
1179 __asm
1180 {
1181 mov rdx, [pu32]
1182 mov rax, [u64Old]
1183 mov rcx, [u64New]
1184 lock cmpxchg [rdx], rcx
1185 mov rdx, [pu64Old]
1186 mov [rdx], rax
1187 setz al
1188 mov [fRet], al
1189 }
1190 return fRet;
1191# endif
1192# else /* !RT_ARCH_AMD64 */
1193# if RT_INLINE_ASM_GNU_STYLE
1194 uint64_t u64Ret;
1195# if defined(PIC) || defined(__PIC__)
1196 /* NB: this code uses a memory clobber description, because the clean
1197 * solution with an output value for *pu64 makes gcc run out of registers.
1198 * This will cause suboptimal code, and anyone with a better solution is
1199 * welcome to improve this. */
1200 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1201 "lock; cmpxchg8b %3\n\t"
1202 "xchgl %%ebx, %1\n\t"
1203 : "=A" (u64Ret)
1204 : "DS" ((uint32_t)u64New),
1205 "c" ((uint32_t)(u64New >> 32)),
1206 "m" (*pu64),
1207 "0" (u64Old)
1208 : "memory" );
1209# else /* !PIC */
1210 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1211 : "=A" (u64Ret),
1212 "=m" (*pu64)
1213 : "b" ((uint32_t)u64New),
1214 "c" ((uint32_t)(u64New >> 32)),
1215 "m" (*pu64),
1216 "0" (u64Old));
1217# endif
1218 *pu64Old = u64Ret;
1219 return u64Ret == u64Old;
1220# else
1221 uint32_t u32Ret;
1222 __asm
1223 {
1224 mov ebx, dword ptr [u64New]
1225 mov ecx, dword ptr [u64New + 4]
1226 mov edi, [pu64]
1227 mov eax, dword ptr [u64Old]
1228 mov edx, dword ptr [u64Old + 4]
1229 lock cmpxchg8b [edi]
1230 mov ebx, [pu64Old]
1231 mov [ebx], eax
1232 setz al
1233 movzx eax, al
1234 add ebx, 4
1235 mov [ebx], edx
1236 mov dword ptr [u32Ret], eax
1237 }
1238 return !!u32Ret;
1239# endif
1240# endif /* !RT_ARCH_AMD64 */
1241}
1242#endif
1243
1244
1245/**
1246 * Atomically Compare and exchange a signed 64-bit value, additionally
1247 * passing back old value, ordered.
1248 *
1249 * @returns true if xchg was done.
1250 * @returns false if xchg wasn't done.
1251 *
1252 * @param pi64 Pointer to the 64-bit variable to update.
1253 * @param i64 The 64-bit value to assign to *pu64.
1254 * @param i64Old The value to compare with.
1255 * @param pi64Old Pointer store the old value at.
1256 *
1257 * @remarks x86: Requires a Pentium or later.
1258 */
1259DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
1260{
1261 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
1262}
1263
1264/** @def ASMAtomicCmpXchgExHandle
1265 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1266 *
1267 * @param ph Pointer to the value to update.
1268 * @param hNew The new value to assigned to *pu.
1269 * @param hOld The old value to *pu compare with.
1270 * @param fRc Where to store the result.
1271 * @param phOldVal Pointer to where to store the old value.
1272 *
1273 * @remarks This doesn't currently work for all handles (like RTFILE).
1274 */
1275#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_DATA_IS_FAR)
1276# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1277 do { \
1278 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1279 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1280 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
1281 } while (0)
1282#elif HC_ARCH_BITS == 64
1283# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1284 do { \
1285 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1286 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1287 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
1288 } while (0)
1289#else
1290# error HC_ARCH_BITS
1291#endif
1292
1293
1294/** @def ASMAtomicCmpXchgExSize
1295 * Atomically Compare and Exchange a value which size might differ
1296 * between platforms or compilers. Additionally passes back old value.
1297 *
1298 * @param pu Pointer to the value to update.
1299 * @param uNew The new value to assigned to *pu.
1300 * @param uOld The old value to *pu compare with.
1301 * @param fRc Where to store the result.
1302 * @param puOldVal Pointer to where to store the old value.
1303 *
1304 * @remarks x86: Requires a 486 or later.
1305 */
1306#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1307 do { \
1308 switch (sizeof(*(pu))) { \
1309 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
1310 break; \
1311 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
1312 break; \
1313 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1314 (fRc) = false; \
1315 (uOldVal) = 0; \
1316 break; \
1317 } \
1318 } while (0)
1319
1320
1321/**
1322 * Atomically Compare and Exchange a pointer value, additionally
1323 * passing back old value, ordered.
1324 *
1325 * @returns true if xchg was done.
1326 * @returns false if xchg wasn't done.
1327 *
1328 * @param ppv Pointer to the value to update.
1329 * @param pvNew The new value to assigned to *ppv.
1330 * @param pvOld The old value to *ppv compare with.
1331 * @param ppvOld Pointer store the old value at.
1332 *
1333 * @remarks x86: Requires a 486 or later.
1334 */
1335DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
1336{
1337#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_DATA_IS_FAR)
1338 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
1339#elif ARCH_BITS == 64
1340 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
1341#else
1342# error "ARCH_BITS is bogus"
1343#endif
1344}
1345
1346
1347/**
1348 * Atomically Compare and Exchange a pointer value, additionally
1349 * passing back old value, ordered.
1350 *
1351 * @returns true if xchg was done.
1352 * @returns false if xchg wasn't done.
1353 *
1354 * @param ppv Pointer to the value to update.
1355 * @param pvNew The new value to assigned to *ppv.
1356 * @param pvOld The old value to *ppv compare with.
1357 * @param ppvOld Pointer store the old value at.
1358 *
1359 * @remarks This is relatively type safe on GCC platforms.
1360 * @remarks x86: Requires a 486 or later.
1361 */
1362#ifdef __GNUC__
1363# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1364 __extension__ \
1365 ({\
1366 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1367 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1368 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1369 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1370 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1371 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1372 (void **)ppvOldTypeChecked); \
1373 fMacroRet; \
1374 })
1375#else
1376# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1377 ASMAtomicCmpXchgExPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld), (void **)(ppvOld))
1378#endif
1379
1380
1381/**
1382 * Virtualization unfriendly serializing instruction, always exits.
1383 */
1384#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1385DECLASM(void) ASMSerializeInstructionCpuId(void);
1386#else
1387DECLINLINE(void) ASMSerializeInstructionCpuId(void)
1388{
1389# if RT_INLINE_ASM_GNU_STYLE
1390 RTCCUINTREG xAX = 0;
1391# ifdef RT_ARCH_AMD64
1392 __asm__ __volatile__ ("cpuid"
1393 : "=a" (xAX)
1394 : "0" (xAX)
1395 : "rbx", "rcx", "rdx", "memory");
1396# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1397 __asm__ __volatile__ ("push %%ebx\n\t"
1398 "cpuid\n\t"
1399 "pop %%ebx\n\t"
1400 : "=a" (xAX)
1401 : "0" (xAX)
1402 : "ecx", "edx", "memory");
1403# else
1404 __asm__ __volatile__ ("cpuid"
1405 : "=a" (xAX)
1406 : "0" (xAX)
1407 : "ebx", "ecx", "edx", "memory");
1408# endif
1409
1410# elif RT_INLINE_ASM_USES_INTRIN
1411 int aInfo[4];
1412 _ReadWriteBarrier();
1413 __cpuid(aInfo, 0);
1414
1415# else
1416 __asm
1417 {
1418 push ebx
1419 xor eax, eax
1420 cpuid
1421 pop ebx
1422 }
1423# endif
1424}
1425#endif
1426
1427/**
1428 * Virtualization friendly serializing instruction, though more expensive.
1429 */
1430#if RT_INLINE_ASM_EXTERNAL
1431DECLASM(void) ASMSerializeInstructionIRet(void);
1432#else
1433DECLINLINE(void) ASMSerializeInstructionIRet(void)
1434{
1435# if RT_INLINE_ASM_GNU_STYLE
1436# ifdef RT_ARCH_AMD64
1437 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
1438 "subq $128, %%rsp\n\t" /*redzone*/
1439 "mov %%ss, %%eax\n\t"
1440 "pushq %%rax\n\t"
1441 "pushq %%r10\n\t"
1442 "pushfq\n\t"
1443 "movl %%cs, %%eax\n\t"
1444 "pushq %%rax\n\t"
1445 "leaq 1f(%%rip), %%rax\n\t"
1446 "pushq %%rax\n\t"
1447 "iretq\n\t"
1448 "1:\n\t"
1449 ::: "rax", "r10", "memory");
1450# else
1451 __asm__ __volatile__ ("pushfl\n\t"
1452 "pushl %%cs\n\t"
1453 "pushl $1f\n\t"
1454 "iretl\n\t"
1455 "1:\n\t"
1456 ::: "memory");
1457# endif
1458
1459# else
1460 __asm
1461 {
1462 pushfd
1463 push cs
1464 push la_ret
1465 iretd
1466 la_ret:
1467 }
1468# endif
1469}
1470#endif
1471
1472/**
1473 * Virtualization friendlier serializing instruction, may still cause exits.
1474 */
1475#if RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < 15
1476DECLASM(void) ASMSerializeInstructionRdTscp(void);
1477#else
1478DECLINLINE(void) ASMSerializeInstructionRdTscp(void)
1479{
1480# if RT_INLINE_ASM_GNU_STYLE
1481 /* rdtscp is not supported by ancient linux build VM of course :-( */
1482# ifdef RT_ARCH_AMD64
1483 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
1484 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
1485# else
1486 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
1487 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
1488# endif
1489# else
1490# if RT_INLINE_ASM_USES_INTRIN >= 15
1491 uint32_t uIgnore;
1492 _ReadWriteBarrier();
1493 (void)__rdtscp(&uIgnore);
1494 (void)uIgnore;
1495# else
1496 __asm
1497 {
1498 rdtscp
1499 }
1500# endif
1501# endif
1502}
1503#endif
1504
1505
1506/**
1507 * Serialize Instruction.
1508 */
1509#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
1510# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
1511#else
1512# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
1513#endif
1514
1515
1516/**
1517 * Memory fence, waits for any pending writes and reads to complete.
1518 */
1519DECLINLINE(void) ASMMemoryFence(void)
1520{
1521 /** @todo use mfence? check if all cpus we care for support it. */
1522#if ARCH_BITS == 16
1523 uint16_t volatile u16;
1524 ASMAtomicXchgU16(&u16, 0);
1525#else
1526 uint32_t volatile u32;
1527 ASMAtomicXchgU32(&u32, 0);
1528#endif
1529}
1530
1531
1532/**
1533 * Write fence, waits for any pending writes to complete.
1534 */
1535DECLINLINE(void) ASMWriteFence(void)
1536{
1537 /** @todo use sfence? check if all cpus we care for support it. */
1538 ASMMemoryFence();
1539}
1540
1541
1542/**
1543 * Read fence, waits for any pending reads to complete.
1544 */
1545DECLINLINE(void) ASMReadFence(void)
1546{
1547 /** @todo use lfence? check if all cpus we care for support it. */
1548 ASMMemoryFence();
1549}
1550
1551
1552/**
1553 * Atomically reads an unsigned 8-bit value, ordered.
1554 *
1555 * @returns Current *pu8 value
1556 * @param pu8 Pointer to the 8-bit variable to read.
1557 */
1558DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
1559{
1560 ASMMemoryFence();
1561 return *pu8; /* byte reads are atomic on x86 */
1562}
1563
1564
1565/**
1566 * Atomically reads an unsigned 8-bit value, unordered.
1567 *
1568 * @returns Current *pu8 value
1569 * @param pu8 Pointer to the 8-bit variable to read.
1570 */
1571DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
1572{
1573 return *pu8; /* byte reads are atomic on x86 */
1574}
1575
1576
1577/**
1578 * Atomically reads a signed 8-bit value, ordered.
1579 *
1580 * @returns Current *pi8 value
1581 * @param pi8 Pointer to the 8-bit variable to read.
1582 */
1583DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
1584{
1585 ASMMemoryFence();
1586 return *pi8; /* byte reads are atomic on x86 */
1587}
1588
1589
1590/**
1591 * Atomically reads a signed 8-bit value, unordered.
1592 *
1593 * @returns Current *pi8 value
1594 * @param pi8 Pointer to the 8-bit variable to read.
1595 */
1596DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
1597{
1598 return *pi8; /* byte reads are atomic on x86 */
1599}
1600
1601
1602/**
1603 * Atomically reads an unsigned 16-bit value, ordered.
1604 *
1605 * @returns Current *pu16 value
1606 * @param pu16 Pointer to the 16-bit variable to read.
1607 */
1608DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
1609{
1610 ASMMemoryFence();
1611 Assert(!((uintptr_t)pu16 & 1));
1612 return *pu16;
1613}
1614
1615
1616/**
1617 * Atomically reads an unsigned 16-bit value, unordered.
1618 *
1619 * @returns Current *pu16 value
1620 * @param pu16 Pointer to the 16-bit variable to read.
1621 */
1622DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
1623{
1624 Assert(!((uintptr_t)pu16 & 1));
1625 return *pu16;
1626}
1627
1628
1629/**
1630 * Atomically reads a signed 16-bit value, ordered.
1631 *
1632 * @returns Current *pi16 value
1633 * @param pi16 Pointer to the 16-bit variable to read.
1634 */
1635DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
1636{
1637 ASMMemoryFence();
1638 Assert(!((uintptr_t)pi16 & 1));
1639 return *pi16;
1640}
1641
1642
1643/**
1644 * Atomically reads a signed 16-bit value, unordered.
1645 *
1646 * @returns Current *pi16 value
1647 * @param pi16 Pointer to the 16-bit variable to read.
1648 */
1649DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
1650{
1651 Assert(!((uintptr_t)pi16 & 1));
1652 return *pi16;
1653}
1654
1655
1656/**
1657 * Atomically reads an unsigned 32-bit value, ordered.
1658 *
1659 * @returns Current *pu32 value
1660 * @param pu32 Pointer to the 32-bit variable to read.
1661 */
1662DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
1663{
1664 ASMMemoryFence();
1665 Assert(!((uintptr_t)pu32 & 3));
1666#if ARCH_BITS == 16
1667 AssertFailed(); /** @todo 16-bit */
1668#endif
1669 return *pu32;
1670}
1671
1672
1673/**
1674 * Atomically reads an unsigned 32-bit value, unordered.
1675 *
1676 * @returns Current *pu32 value
1677 * @param pu32 Pointer to the 32-bit variable to read.
1678 */
1679DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
1680{
1681 Assert(!((uintptr_t)pu32 & 3));
1682#if ARCH_BITS == 16
1683 AssertFailed(); /** @todo 16-bit */
1684#endif
1685 return *pu32;
1686}
1687
1688
1689/**
1690 * Atomically reads a signed 32-bit value, ordered.
1691 *
1692 * @returns Current *pi32 value
1693 * @param pi32 Pointer to the 32-bit variable to read.
1694 */
1695DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
1696{
1697 ASMMemoryFence();
1698 Assert(!((uintptr_t)pi32 & 3));
1699#if ARCH_BITS == 16
1700 AssertFailed(); /** @todo 16-bit */
1701#endif
1702 return *pi32;
1703}
1704
1705
1706/**
1707 * Atomically reads a signed 32-bit value, unordered.
1708 *
1709 * @returns Current *pi32 value
1710 * @param pi32 Pointer to the 32-bit variable to read.
1711 */
1712DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
1713{
1714 Assert(!((uintptr_t)pi32 & 3));
1715#if ARCH_BITS == 16
1716 AssertFailed(); /** @todo 16-bit */
1717#endif
1718 return *pi32;
1719}
1720
1721
1722/**
1723 * Atomically reads an unsigned 64-bit value, ordered.
1724 *
1725 * @returns Current *pu64 value
1726 * @param pu64 Pointer to the 64-bit variable to read.
1727 * The memory pointed to must be writable.
1728 *
1729 * @remarks This may fault if the memory is read-only!
1730 * @remarks x86: Requires a Pentium or later.
1731 */
1732#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1733 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1734DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
1735#else
1736DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
1737{
1738 uint64_t u64;
1739# ifdef RT_ARCH_AMD64
1740 Assert(!((uintptr_t)pu64 & 7));
1741/*# if RT_INLINE_ASM_GNU_STYLE
1742 __asm__ __volatile__( "mfence\n\t"
1743 "movq %1, %0\n\t"
1744 : "=r" (u64)
1745 : "m" (*pu64));
1746# else
1747 __asm
1748 {
1749 mfence
1750 mov rdx, [pu64]
1751 mov rax, [rdx]
1752 mov [u64], rax
1753 }
1754# endif*/
1755 ASMMemoryFence();
1756 u64 = *pu64;
1757# else /* !RT_ARCH_AMD64 */
1758# if RT_INLINE_ASM_GNU_STYLE
1759# if defined(PIC) || defined(__PIC__)
1760 uint32_t u32EBX = 0;
1761 Assert(!((uintptr_t)pu64 & 7));
1762 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1763 "lock; cmpxchg8b (%5)\n\t"
1764 "movl %3, %%ebx\n\t"
1765 : "=A" (u64),
1766# if RT_GNUC_PREREQ(4, 3)
1767 "+m" (*pu64)
1768# else
1769 "=m" (*pu64)
1770# endif
1771 : "0" (0ULL),
1772 "m" (u32EBX),
1773 "c" (0),
1774 "S" (pu64));
1775# else /* !PIC */
1776 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1777 : "=A" (u64),
1778 "+m" (*pu64)
1779 : "0" (0ULL),
1780 "b" (0),
1781 "c" (0));
1782# endif
1783# else
1784 Assert(!((uintptr_t)pu64 & 7));
1785 __asm
1786 {
1787 xor eax, eax
1788 xor edx, edx
1789 mov edi, pu64
1790 xor ecx, ecx
1791 xor ebx, ebx
1792 lock cmpxchg8b [edi]
1793 mov dword ptr [u64], eax
1794 mov dword ptr [u64 + 4], edx
1795 }
1796# endif
1797# endif /* !RT_ARCH_AMD64 */
1798 return u64;
1799}
1800#endif
1801
1802
1803/**
1804 * Atomically reads an unsigned 64-bit value, unordered.
1805 *
1806 * @returns Current *pu64 value
1807 * @param pu64 Pointer to the 64-bit variable to read.
1808 * The memory pointed to must be writable.
1809 *
1810 * @remarks This may fault if the memory is read-only!
1811 * @remarks x86: Requires a Pentium or later.
1812 */
1813#if !defined(RT_ARCH_AMD64) \
1814 && ( (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1815 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
1816DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
1817#else
1818DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
1819{
1820 uint64_t u64;
1821# ifdef RT_ARCH_AMD64
1822 Assert(!((uintptr_t)pu64 & 7));
1823/*# if RT_INLINE_ASM_GNU_STYLE
1824 Assert(!((uintptr_t)pu64 & 7));
1825 __asm__ __volatile__("movq %1, %0\n\t"
1826 : "=r" (u64)
1827 : "m" (*pu64));
1828# else
1829 __asm
1830 {
1831 mov rdx, [pu64]
1832 mov rax, [rdx]
1833 mov [u64], rax
1834 }
1835# endif */
1836 u64 = *pu64;
1837# else /* !RT_ARCH_AMD64 */
1838# if RT_INLINE_ASM_GNU_STYLE
1839# if defined(PIC) || defined(__PIC__)
1840 uint32_t u32EBX = 0;
1841 uint32_t u32Spill;
1842 Assert(!((uintptr_t)pu64 & 7));
1843 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1844 "xor %%ecx,%%ecx\n\t"
1845 "xor %%edx,%%edx\n\t"
1846 "xchgl %%ebx, %3\n\t"
1847 "lock; cmpxchg8b (%4)\n\t"
1848 "movl %3, %%ebx\n\t"
1849 : "=A" (u64),
1850# if RT_GNUC_PREREQ(4, 3)
1851 "+m" (*pu64),
1852# else
1853 "=m" (*pu64),
1854# endif
1855 "=c" (u32Spill)
1856 : "m" (u32EBX),
1857 "S" (pu64));
1858# else /* !PIC */
1859 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1860 : "=A" (u64),
1861 "+m" (*pu64)
1862 : "0" (0ULL),
1863 "b" (0),
1864 "c" (0));
1865# endif
1866# else
1867 Assert(!((uintptr_t)pu64 & 7));
1868 __asm
1869 {
1870 xor eax, eax
1871 xor edx, edx
1872 mov edi, pu64
1873 xor ecx, ecx
1874 xor ebx, ebx
1875 lock cmpxchg8b [edi]
1876 mov dword ptr [u64], eax
1877 mov dword ptr [u64 + 4], edx
1878 }
1879# endif
1880# endif /* !RT_ARCH_AMD64 */
1881 return u64;
1882}
1883#endif
1884
1885
1886/**
1887 * Atomically reads a signed 64-bit value, ordered.
1888 *
1889 * @returns Current *pi64 value
1890 * @param pi64 Pointer to the 64-bit variable to read.
1891 * The memory pointed to must be writable.
1892 *
1893 * @remarks This may fault if the memory is read-only!
1894 * @remarks x86: Requires a Pentium or later.
1895 */
1896DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
1897{
1898 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
1899}
1900
1901
1902/**
1903 * Atomically reads a signed 64-bit value, unordered.
1904 *
1905 * @returns Current *pi64 value
1906 * @param pi64 Pointer to the 64-bit variable to read.
1907 * The memory pointed to must be writable.
1908 *
1909 * @remarks This will fault if the memory is read-only!
1910 * @remarks x86: Requires a Pentium or later.
1911 */
1912DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
1913{
1914 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
1915}
1916
1917
1918/**
1919 * Atomically reads a size_t value, ordered.
1920 *
1921 * @returns Current *pcb value
1922 * @param pcb Pointer to the size_t variable to read.
1923 */
1924DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile *pcb)
1925{
1926#if ARCH_BITS == 64
1927 return ASMAtomicReadU64((uint64_t volatile *)pcb);
1928#elif ARCH_BITS == 32
1929 return ASMAtomicReadU32((uint32_t volatile *)pcb);
1930#elif ARCH_BITS == 16
1931 AssertCompileSize(size_t, 2);
1932 return ASMAtomicReadU16((uint16_t volatile *)pcb);
1933#else
1934# error "Unsupported ARCH_BITS value"
1935#endif
1936}
1937
1938
1939/**
1940 * Atomically reads a size_t value, unordered.
1941 *
1942 * @returns Current *pcb value
1943 * @param pcb Pointer to the size_t variable to read.
1944 */
1945DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile *pcb)
1946{
1947#if ARCH_BITS == 64 || (ARCH_BITS == 16 && RT_DATA_IS_FAR)
1948 return ASMAtomicUoReadU64((uint64_t volatile *)pcb);
1949#elif ARCH_BITS == 32
1950 return ASMAtomicUoReadU32((uint32_t volatile *)pcb);
1951#elif ARCH_BITS == 16
1952 AssertCompileSize(size_t, 2);
1953 return ASMAtomicUoReadU16((uint16_t volatile *)pcb);
1954#else
1955# error "Unsupported ARCH_BITS value"
1956#endif
1957}
1958
1959
1960/**
1961 * Atomically reads a pointer value, ordered.
1962 *
1963 * @returns Current *pv value
1964 * @param ppv Pointer to the pointer variable to read.
1965 *
1966 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
1967 * requires less typing (no casts).
1968 */
1969DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
1970{
1971#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_DATA_IS_FAR)
1972 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
1973#elif ARCH_BITS == 64
1974 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
1975#else
1976# error "ARCH_BITS is bogus"
1977#endif
1978}
1979
1980/**
1981 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
1982 *
1983 * @returns Current *pv value
1984 * @param ppv Pointer to the pointer variable to read.
1985 * @param Type The type of *ppv, sans volatile.
1986 */
1987#ifdef __GNUC__
1988# define ASMAtomicReadPtrT(ppv, Type) \
1989 __extension__ \
1990 ({\
1991 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
1992 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
1993 pvTypeChecked; \
1994 })
1995#else
1996# define ASMAtomicReadPtrT(ppv, Type) \
1997 (Type)ASMAtomicReadPtr((void * volatile *)(ppv))
1998#endif
1999
2000
2001/**
2002 * Atomically reads a pointer value, unordered.
2003 *
2004 * @returns Current *pv value
2005 * @param ppv Pointer to the pointer variable to read.
2006 *
2007 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
2008 * requires less typing (no casts).
2009 */
2010DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
2011{
2012#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_DATA_IS_FAR)
2013 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
2014#elif ARCH_BITS == 64
2015 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
2016#else
2017# error "ARCH_BITS is bogus"
2018#endif
2019}
2020
2021
2022/**
2023 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
2024 *
2025 * @returns Current *pv value
2026 * @param ppv Pointer to the pointer variable to read.
2027 * @param Type The type of *ppv, sans volatile.
2028 */
2029#ifdef __GNUC__
2030# define ASMAtomicUoReadPtrT(ppv, Type) \
2031 __extension__ \
2032 ({\
2033 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2034 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
2035 pvTypeChecked; \
2036 })
2037#else
2038# define ASMAtomicUoReadPtrT(ppv, Type) \
2039 (Type)ASMAtomicUoReadPtr((void * volatile *)(ppv))
2040#endif
2041
2042
2043/**
2044 * Atomically reads a boolean value, ordered.
2045 *
2046 * @returns Current *pf value
2047 * @param pf Pointer to the boolean variable to read.
2048 */
2049DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
2050{
2051 ASMMemoryFence();
2052 return *pf; /* byte reads are atomic on x86 */
2053}
2054
2055
2056/**
2057 * Atomically reads a boolean value, unordered.
2058 *
2059 * @returns Current *pf value
2060 * @param pf Pointer to the boolean variable to read.
2061 */
2062DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
2063{
2064 return *pf; /* byte reads are atomic on x86 */
2065}
2066
2067
2068/**
2069 * Atomically read a typical IPRT handle value, ordered.
2070 *
2071 * @param ph Pointer to the handle variable to read.
2072 * @param phRes Where to store the result.
2073 *
2074 * @remarks This doesn't currently work for all handles (like RTFILE).
2075 */
2076#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_DATA_IS_FAR)
2077# define ASMAtomicReadHandle(ph, phRes) \
2078 do { \
2079 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2080 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2081 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
2082 } while (0)
2083#elif HC_ARCH_BITS == 64
2084# define ASMAtomicReadHandle(ph, phRes) \
2085 do { \
2086 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2087 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2088 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
2089 } while (0)
2090#else
2091# error HC_ARCH_BITS
2092#endif
2093
2094
2095/**
2096 * Atomically read a typical IPRT handle value, unordered.
2097 *
2098 * @param ph Pointer to the handle variable to read.
2099 * @param phRes Where to store the result.
2100 *
2101 * @remarks This doesn't currently work for all handles (like RTFILE).
2102 */
2103#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_DATA_IS_FAR)
2104# define ASMAtomicUoReadHandle(ph, phRes) \
2105 do { \
2106 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2107 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2108 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
2109 } while (0)
2110#elif HC_ARCH_BITS == 64
2111# define ASMAtomicUoReadHandle(ph, phRes) \
2112 do { \
2113 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2114 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2115 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
2116 } while (0)
2117#else
2118# error HC_ARCH_BITS
2119#endif
2120
2121
2122/**
2123 * Atomically read a value which size might differ
2124 * between platforms or compilers, ordered.
2125 *
2126 * @param pu Pointer to the variable to read.
2127 * @param puRes Where to store the result.
2128 */
2129#define ASMAtomicReadSize(pu, puRes) \
2130 do { \
2131 switch (sizeof(*(pu))) { \
2132 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
2133 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
2134 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
2135 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
2136 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2137 } \
2138 } while (0)
2139
2140
2141/**
2142 * Atomically read a value which size might differ
2143 * between platforms or compilers, unordered.
2144 *
2145 * @param pu Pointer to the variable to read.
2146 * @param puRes Where to store the result.
2147 */
2148#define ASMAtomicUoReadSize(pu, puRes) \
2149 do { \
2150 switch (sizeof(*(pu))) { \
2151 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
2152 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
2153 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
2154 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
2155 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2156 } \
2157 } while (0)
2158
2159
2160/**
2161 * Atomically writes an unsigned 8-bit value, ordered.
2162 *
2163 * @param pu8 Pointer to the 8-bit variable.
2164 * @param u8 The 8-bit value to assign to *pu8.
2165 */
2166DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
2167{
2168 ASMAtomicXchgU8(pu8, u8);
2169}
2170
2171
2172/**
2173 * Atomically writes an unsigned 8-bit value, unordered.
2174 *
2175 * @param pu8 Pointer to the 8-bit variable.
2176 * @param u8 The 8-bit value to assign to *pu8.
2177 */
2178DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
2179{
2180 *pu8 = u8; /* byte writes are atomic on x86 */
2181}
2182
2183
2184/**
2185 * Atomically writes a signed 8-bit value, ordered.
2186 *
2187 * @param pi8 Pointer to the 8-bit variable to read.
2188 * @param i8 The 8-bit value to assign to *pi8.
2189 */
2190DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
2191{
2192 ASMAtomicXchgS8(pi8, i8);
2193}
2194
2195
2196/**
2197 * Atomically writes a signed 8-bit value, unordered.
2198 *
2199 * @param pi8 Pointer to the 8-bit variable to write.
2200 * @param i8 The 8-bit value to assign to *pi8.
2201 */
2202DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
2203{
2204 *pi8 = i8; /* byte writes are atomic on x86 */
2205}
2206
2207
2208/**
2209 * Atomically writes an unsigned 16-bit value, ordered.
2210 *
2211 * @param pu16 Pointer to the 16-bit variable to write.
2212 * @param u16 The 16-bit value to assign to *pu16.
2213 */
2214DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
2215{
2216 ASMAtomicXchgU16(pu16, u16);
2217}
2218
2219
2220/**
2221 * Atomically writes an unsigned 16-bit value, unordered.
2222 *
2223 * @param pu16 Pointer to the 16-bit variable to write.
2224 * @param u16 The 16-bit value to assign to *pu16.
2225 */
2226DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
2227{
2228 Assert(!((uintptr_t)pu16 & 1));
2229 *pu16 = u16;
2230}
2231
2232
2233/**
2234 * Atomically writes a signed 16-bit value, ordered.
2235 *
2236 * @param pi16 Pointer to the 16-bit variable to write.
2237 * @param i16 The 16-bit value to assign to *pi16.
2238 */
2239DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
2240{
2241 ASMAtomicXchgS16(pi16, i16);
2242}
2243
2244
2245/**
2246 * Atomically writes a signed 16-bit value, unordered.
2247 *
2248 * @param pi16 Pointer to the 16-bit variable to write.
2249 * @param i16 The 16-bit value to assign to *pi16.
2250 */
2251DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
2252{
2253 Assert(!((uintptr_t)pi16 & 1));
2254 *pi16 = i16;
2255}
2256
2257
2258/**
2259 * Atomically writes an unsigned 32-bit value, ordered.
2260 *
2261 * @param pu32 Pointer to the 32-bit variable to write.
2262 * @param u32 The 32-bit value to assign to *pu32.
2263 */
2264DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
2265{
2266 ASMAtomicXchgU32(pu32, u32);
2267}
2268
2269
2270/**
2271 * Atomically writes an unsigned 32-bit value, unordered.
2272 *
2273 * @param pu32 Pointer to the 32-bit variable to write.
2274 * @param u32 The 32-bit value to assign to *pu32.
2275 */
2276DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
2277{
2278 Assert(!((uintptr_t)pu32 & 3));
2279#if ARCH_BITS >= 32
2280 *pu32 = u32;
2281#else
2282 ASMAtomicXchgU32(pu32, u32);
2283#endif
2284}
2285
2286
2287/**
2288 * Atomically writes a signed 32-bit value, ordered.
2289 *
2290 * @param pi32 Pointer to the 32-bit variable to write.
2291 * @param i32 The 32-bit value to assign to *pi32.
2292 */
2293DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
2294{
2295 ASMAtomicXchgS32(pi32, i32);
2296}
2297
2298
2299/**
2300 * Atomically writes a signed 32-bit value, unordered.
2301 *
2302 * @param pi32 Pointer to the 32-bit variable to write.
2303 * @param i32 The 32-bit value to assign to *pi32.
2304 */
2305DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
2306{
2307 Assert(!((uintptr_t)pi32 & 3));
2308#if ARCH_BITS >= 32
2309 *pi32 = i32;
2310#else
2311 ASMAtomicXchgS32(pi32, i32);
2312#endif
2313}
2314
2315
2316/**
2317 * Atomically writes an unsigned 64-bit value, ordered.
2318 *
2319 * @param pu64 Pointer to the 64-bit variable to write.
2320 * @param u64 The 64-bit value to assign to *pu64.
2321 */
2322DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
2323{
2324 ASMAtomicXchgU64(pu64, u64);
2325}
2326
2327
2328/**
2329 * Atomically writes an unsigned 64-bit value, unordered.
2330 *
2331 * @param pu64 Pointer to the 64-bit variable to write.
2332 * @param u64 The 64-bit value to assign to *pu64.
2333 */
2334DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
2335{
2336 Assert(!((uintptr_t)pu64 & 7));
2337#if ARCH_BITS == 64
2338 *pu64 = u64;
2339#else
2340 ASMAtomicXchgU64(pu64, u64);
2341#endif
2342}
2343
2344
2345/**
2346 * Atomically writes a signed 64-bit value, ordered.
2347 *
2348 * @param pi64 Pointer to the 64-bit variable to write.
2349 * @param i64 The 64-bit value to assign to *pi64.
2350 */
2351DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
2352{
2353 ASMAtomicXchgS64(pi64, i64);
2354}
2355
2356
2357/**
2358 * Atomically writes a signed 64-bit value, unordered.
2359 *
2360 * @param pi64 Pointer to the 64-bit variable to write.
2361 * @param i64 The 64-bit value to assign to *pi64.
2362 */
2363DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
2364{
2365 Assert(!((uintptr_t)pi64 & 7));
2366#if ARCH_BITS == 64
2367 *pi64 = i64;
2368#else
2369 ASMAtomicXchgS64(pi64, i64);
2370#endif
2371}
2372
2373
2374/**
2375 * Atomically writes a boolean value, unordered.
2376 *
2377 * @param pf Pointer to the boolean variable to write.
2378 * @param f The boolean value to assign to *pf.
2379 */
2380DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
2381{
2382 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
2383}
2384
2385
2386/**
2387 * Atomically writes a boolean value, unordered.
2388 *
2389 * @param pf Pointer to the boolean variable to write.
2390 * @param f The boolean value to assign to *pf.
2391 */
2392DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
2393{
2394 *pf = f; /* byte writes are atomic on x86 */
2395}
2396
2397
2398/**
2399 * Atomically writes a pointer value, ordered.
2400 *
2401 * @param ppv Pointer to the pointer variable to write.
2402 * @param pv The pointer value to assign to *ppv.
2403 */
2404DECLINLINE(void) ASMAtomicWritePtrVoid(void * volatile *ppv, const void *pv)
2405{
2406#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_DATA_IS_FAR)
2407 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2408#elif ARCH_BITS == 64
2409 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2410#else
2411# error "ARCH_BITS is bogus"
2412#endif
2413}
2414
2415
2416/**
2417 * Atomically writes a pointer value, ordered.
2418 *
2419 * @param ppv Pointer to the pointer variable to write.
2420 * @param pv The pointer value to assign to *ppv. If NULL use
2421 * ASMAtomicWriteNullPtr or you'll land in trouble.
2422 *
2423 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2424 * NULL.
2425 */
2426#ifdef __GNUC__
2427# define ASMAtomicWritePtr(ppv, pv) \
2428 do \
2429 { \
2430 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2431 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2432 \
2433 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2434 AssertCompile(sizeof(pv) == sizeof(void *)); \
2435 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2436 \
2437 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), (void *)(pvTypeChecked)); \
2438 } while (0)
2439#else
2440# define ASMAtomicWritePtr(ppv, pv) \
2441 do \
2442 { \
2443 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2444 AssertCompile(sizeof(pv) == sizeof(void *)); \
2445 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2446 \
2447 ASMAtomicWritePtrVoid((void * volatile *)(ppv), (void *)(pv)); \
2448 } while (0)
2449#endif
2450
2451
2452/**
2453 * Atomically sets a pointer to NULL, ordered.
2454 *
2455 * @param ppv Pointer to the pointer variable that should be set to NULL.
2456 *
2457 * @remarks This is relatively type safe on GCC platforms.
2458 */
2459#ifdef __GNUC__
2460# define ASMAtomicWriteNullPtr(ppv) \
2461 do \
2462 { \
2463 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
2464 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2465 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2466 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), NULL); \
2467 } while (0)
2468#else
2469# define ASMAtomicWriteNullPtr(ppv) \
2470 do \
2471 { \
2472 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2473 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2474 ASMAtomicWritePtrVoid((void * volatile *)(ppv), NULL); \
2475 } while (0)
2476#endif
2477
2478
2479/**
2480 * Atomically writes a pointer value, unordered.
2481 *
2482 * @returns Current *pv value
2483 * @param ppv Pointer to the pointer variable.
2484 * @param pv The pointer value to assign to *ppv. If NULL use
2485 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2486 *
2487 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2488 * NULL.
2489 */
2490#ifdef __GNUC__
2491# define ASMAtomicUoWritePtr(ppv, pv) \
2492 do \
2493 { \
2494 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2495 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2496 \
2497 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2498 AssertCompile(sizeof(pv) == sizeof(void *)); \
2499 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2500 \
2501 *(ppvTypeChecked) = pvTypeChecked; \
2502 } while (0)
2503#else
2504# define ASMAtomicUoWritePtr(ppv, pv) \
2505 do \
2506 { \
2507 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2508 AssertCompile(sizeof(pv) == sizeof(void *)); \
2509 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2510 *(ppv) = pv; \
2511 } while (0)
2512#endif
2513
2514
2515/**
2516 * Atomically sets a pointer to NULL, unordered.
2517 *
2518 * @param ppv Pointer to the pointer variable that should be set to NULL.
2519 *
2520 * @remarks This is relatively type safe on GCC platforms.
2521 */
2522#ifdef __GNUC__
2523# define ASMAtomicUoWriteNullPtr(ppv) \
2524 do \
2525 { \
2526 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2527 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2528 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2529 *(ppvTypeChecked) = NULL; \
2530 } while (0)
2531#else
2532# define ASMAtomicUoWriteNullPtr(ppv) \
2533 do \
2534 { \
2535 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2536 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2537 *(ppv) = NULL; \
2538 } while (0)
2539#endif
2540
2541
2542/**
2543 * Atomically write a typical IPRT handle value, ordered.
2544 *
2545 * @param ph Pointer to the variable to update.
2546 * @param hNew The value to assign to *ph.
2547 *
2548 * @remarks This doesn't currently work for all handles (like RTFILE).
2549 */
2550#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_DATA_IS_FAR)
2551# define ASMAtomicWriteHandle(ph, hNew) \
2552 do { \
2553 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2554 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2555 } while (0)
2556#elif HC_ARCH_BITS == 64
2557# define ASMAtomicWriteHandle(ph, hNew) \
2558 do { \
2559 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2560 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
2561 } while (0)
2562#else
2563# error HC_ARCH_BITS
2564#endif
2565
2566
2567/**
2568 * Atomically write a typical IPRT handle value, unordered.
2569 *
2570 * @param ph Pointer to the variable to update.
2571 * @param hNew The value to assign to *ph.
2572 *
2573 * @remarks This doesn't currently work for all handles (like RTFILE).
2574 */
2575#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_DATA_IS_FAR)
2576# define ASMAtomicUoWriteHandle(ph, hNew) \
2577 do { \
2578 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2579 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
2580 } while (0)
2581#elif HC_ARCH_BITS == 64
2582# define ASMAtomicUoWriteHandle(ph, hNew) \
2583 do { \
2584 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2585 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
2586 } while (0)
2587#else
2588# error HC_ARCH_BITS
2589#endif
2590
2591
2592/**
2593 * Atomically write a value which size might differ
2594 * between platforms or compilers, ordered.
2595 *
2596 * @param pu Pointer to the variable to update.
2597 * @param uNew The value to assign to *pu.
2598 */
2599#define ASMAtomicWriteSize(pu, uNew) \
2600 do { \
2601 switch (sizeof(*(pu))) { \
2602 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2603 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2604 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2605 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2606 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2607 } \
2608 } while (0)
2609
2610/**
2611 * Atomically write a value which size might differ
2612 * between platforms or compilers, unordered.
2613 *
2614 * @param pu Pointer to the variable to update.
2615 * @param uNew The value to assign to *pu.
2616 */
2617#define ASMAtomicUoWriteSize(pu, uNew) \
2618 do { \
2619 switch (sizeof(*(pu))) { \
2620 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2621 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2622 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2623 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2624 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2625 } \
2626 } while (0)
2627
2628
2629
2630/**
2631 * Atomically exchanges and adds to a 16-bit value, ordered.
2632 *
2633 * @returns The old value.
2634 * @param pu16 Pointer to the value.
2635 * @param u16 Number to add.
2636 *
2637 * @remarks Currently not implemented, just to make 16-bit code happy.
2638 * @remarks x86: Requires a 486 or later.
2639 */
2640DECLASM(uint16_t) ASMAtomicAddU16(uint16_t volatile *pu16, uint32_t u16);
2641
2642
2643/**
2644 * Atomically exchanges and adds to a 32-bit value, ordered.
2645 *
2646 * @returns The old value.
2647 * @param pu32 Pointer to the value.
2648 * @param u32 Number to add.
2649 *
2650 * @remarks x86: Requires a 486 or later.
2651 */
2652#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2653DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
2654#else
2655DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
2656{
2657# if RT_INLINE_ASM_USES_INTRIN
2658 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
2659 return u32;
2660
2661# elif RT_INLINE_ASM_GNU_STYLE
2662 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2663 : "=r" (u32),
2664 "=m" (*pu32)
2665 : "0" (u32),
2666 "m" (*pu32)
2667 : "memory");
2668 return u32;
2669# else
2670 __asm
2671 {
2672 mov eax, [u32]
2673# ifdef RT_ARCH_AMD64
2674 mov rdx, [pu32]
2675 lock xadd [rdx], eax
2676# else
2677 mov edx, [pu32]
2678 lock xadd [edx], eax
2679# endif
2680 mov [u32], eax
2681 }
2682 return u32;
2683# endif
2684}
2685#endif
2686
2687
2688/**
2689 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2690 *
2691 * @returns The old value.
2692 * @param pi32 Pointer to the value.
2693 * @param i32 Number to add.
2694 *
2695 * @remarks x86: Requires a 486 or later.
2696 */
2697DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
2698{
2699 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
2700}
2701
2702
2703/**
2704 * Atomically exchanges and adds to a 64-bit value, ordered.
2705 *
2706 * @returns The old value.
2707 * @param pu64 Pointer to the value.
2708 * @param u64 Number to add.
2709 *
2710 * @remarks x86: Requires a Pentium or later.
2711 */
2712#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2713DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64);
2714#else
2715DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64)
2716{
2717# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2718 u64 = _InterlockedExchangeAdd64((__int64 *)pu64, u64);
2719 return u64;
2720
2721# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2722 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2723 : "=r" (u64),
2724 "=m" (*pu64)
2725 : "0" (u64),
2726 "m" (*pu64)
2727 : "memory");
2728 return u64;
2729# else
2730 uint64_t u64Old;
2731 for (;;)
2732 {
2733 uint64_t u64New;
2734 u64Old = ASMAtomicUoReadU64(pu64);
2735 u64New = u64Old + u64;
2736 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2737 break;
2738 ASMNopPause();
2739 }
2740 return u64Old;
2741# endif
2742}
2743#endif
2744
2745
2746/**
2747 * Atomically exchanges and adds to a signed 64-bit value, ordered.
2748 *
2749 * @returns The old value.
2750 * @param pi64 Pointer to the value.
2751 * @param i64 Number to add.
2752 *
2753 * @remarks x86: Requires a Pentium or later.
2754 */
2755DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile *pi64, int64_t i64)
2756{
2757 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)i64);
2758}
2759
2760
2761/**
2762 * Atomically exchanges and adds to a size_t value, ordered.
2763 *
2764 * @returns The old value.
2765 * @param pcb Pointer to the size_t value.
2766 * @param cb Number to add.
2767 */
2768DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile *pcb, size_t cb)
2769{
2770#if ARCH_BITS == 64
2771 AssertCompileSize(size_t, 8);
2772 return ASMAtomicAddU64((uint64_t volatile *)pcb, cb);
2773#elif ARCH_BITS == 32
2774 AssertCompileSize(size_t, 4);
2775 return ASMAtomicAddU32((uint32_t volatile *)pcb, cb);
2776#elif ARCH_BITS == 16
2777 AssertCompileSize(size_t, 2);
2778 return ASMAtomicAddU16((uint16_t volatile *)pcb, cb);
2779#else
2780# error "Unsupported ARCH_BITS value"
2781#endif
2782}
2783
2784
2785/**
2786 * Atomically exchanges and adds a value which size might differ between
2787 * platforms or compilers, ordered.
2788 *
2789 * @param pu Pointer to the variable to update.
2790 * @param uNew The value to add to *pu.
2791 * @param puOld Where to store the old value.
2792 */
2793#define ASMAtomicAddSize(pu, uNew, puOld) \
2794 do { \
2795 switch (sizeof(*(pu))) { \
2796 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2797 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2798 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
2799 } \
2800 } while (0)
2801
2802
2803
2804/**
2805 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
2806 *
2807 * @returns The old value.
2808 * @param pu16 Pointer to the value.
2809 * @param u16 Number to subtract.
2810 *
2811 * @remarks x86: Requires a 486 or later.
2812 */
2813DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile *pu16, uint32_t u16)
2814{
2815 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
2816}
2817
2818
2819/**
2820 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
2821 *
2822 * @returns The old value.
2823 * @param pi16 Pointer to the value.
2824 * @param i16 Number to subtract.
2825 *
2826 * @remarks x86: Requires a 486 or later.
2827 */
2828DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile *pi16, int16_t i16)
2829{
2830 return (int16_t)ASMAtomicAddU16((uint16_t volatile *)pi16, (uint16_t)-i16);
2831}
2832
2833
2834/**
2835 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
2836 *
2837 * @returns The old value.
2838 * @param pu32 Pointer to the value.
2839 * @param u32 Number to subtract.
2840 *
2841 * @remarks x86: Requires a 486 or later.
2842 */
2843DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile *pu32, uint32_t u32)
2844{
2845 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
2846}
2847
2848
2849/**
2850 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
2851 *
2852 * @returns The old value.
2853 * @param pi32 Pointer to the value.
2854 * @param i32 Number to subtract.
2855 *
2856 * @remarks x86: Requires a 486 or later.
2857 */
2858DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
2859{
2860 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
2861}
2862
2863
2864/**
2865 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
2866 *
2867 * @returns The old value.
2868 * @param pu64 Pointer to the value.
2869 * @param u64 Number to subtract.
2870 *
2871 * @remarks x86: Requires a Pentium or later.
2872 */
2873DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile *pu64, uint64_t u64)
2874{
2875 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
2876}
2877
2878
2879/**
2880 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
2881 *
2882 * @returns The old value.
2883 * @param pi64 Pointer to the value.
2884 * @param i64 Number to subtract.
2885 *
2886 * @remarks x86: Requires a Pentium or later.
2887 */
2888DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile *pi64, int64_t i64)
2889{
2890 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)-i64);
2891}
2892
2893
2894/**
2895 * Atomically exchanges and subtracts to a size_t value, ordered.
2896 *
2897 * @returns The old value.
2898 * @param pcb Pointer to the size_t value.
2899 * @param cb Number to subtract.
2900 *
2901 * @remarks x86: Requires a 486 or later.
2902 */
2903DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile *pcb, size_t cb)
2904{
2905#if ARCH_BITS == 64
2906 return ASMAtomicSubU64((uint64_t volatile *)pcb, cb);
2907#elif ARCH_BITS == 32
2908 return ASMAtomicSubU32((uint32_t volatile *)pcb, cb);
2909#elif ARCH_BITS == 16
2910 AssertCompileSize(size_t, 2);
2911 return ASMAtomicSubU16((uint16_t volatile *)pcb, cb);
2912#else
2913# error "Unsupported ARCH_BITS value"
2914#endif
2915}
2916
2917
2918/**
2919 * Atomically exchanges and subtracts a value which size might differ between
2920 * platforms or compilers, ordered.
2921 *
2922 * @param pu Pointer to the variable to update.
2923 * @param uNew The value to subtract to *pu.
2924 * @param puOld Where to store the old value.
2925 *
2926 * @remarks x86: Requires a 486 or later.
2927 */
2928#define ASMAtomicSubSize(pu, uNew, puOld) \
2929 do { \
2930 switch (sizeof(*(pu))) { \
2931 case 4: *(uint32_t *)(puOld) = ASMAtomicSubU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2932 case 8: *(uint64_t *)(puOld) = ASMAtomicSubU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2933 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
2934 } \
2935 } while (0)
2936
2937
2938
2939/**
2940 * Atomically increment a 16-bit value, ordered.
2941 *
2942 * @returns The new value.
2943 * @param pu16 Pointer to the value to increment.
2944 * @remarks Not implemented. Just to make 16-bit code happy.
2945 *
2946 * @remarks x86: Requires a 486 or later.
2947 */
2948DECLASM(uint16_t) ASMAtomicIncU16(uint16_t volatile *pu16);
2949
2950
2951/**
2952 * Atomically increment a 32-bit value, ordered.
2953 *
2954 * @returns The new value.
2955 * @param pu32 Pointer to the value to increment.
2956 *
2957 * @remarks x86: Requires a 486 or later.
2958 */
2959#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2960DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2961#else
2962DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2963{
2964 uint32_t u32;
2965# if RT_INLINE_ASM_USES_INTRIN
2966 u32 = _InterlockedIncrement((long *)pu32);
2967 return u32;
2968
2969# elif RT_INLINE_ASM_GNU_STYLE
2970 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2971 : "=r" (u32),
2972 "=m" (*pu32)
2973 : "0" (1),
2974 "m" (*pu32)
2975 : "memory");
2976 return u32+1;
2977# else
2978 __asm
2979 {
2980 mov eax, 1
2981# ifdef RT_ARCH_AMD64
2982 mov rdx, [pu32]
2983 lock xadd [rdx], eax
2984# else
2985 mov edx, [pu32]
2986 lock xadd [edx], eax
2987# endif
2988 mov u32, eax
2989 }
2990 return u32+1;
2991# endif
2992}
2993#endif
2994
2995
2996/**
2997 * Atomically increment a signed 32-bit value, ordered.
2998 *
2999 * @returns The new value.
3000 * @param pi32 Pointer to the value to increment.
3001 *
3002 * @remarks x86: Requires a 486 or later.
3003 */
3004DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
3005{
3006 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
3007}
3008
3009
3010/**
3011 * Atomically increment a 64-bit value, ordered.
3012 *
3013 * @returns The new value.
3014 * @param pu64 Pointer to the value to increment.
3015 *
3016 * @remarks x86: Requires a Pentium or later.
3017 */
3018#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3019DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64);
3020#else
3021DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64)
3022{
3023# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3024 uint64_t u64;
3025 u64 = _InterlockedIncrement64((__int64 *)pu64);
3026 return u64;
3027
3028# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3029 uint64_t u64;
3030 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
3031 : "=r" (u64),
3032 "=m" (*pu64)
3033 : "0" (1),
3034 "m" (*pu64)
3035 : "memory");
3036 return u64 + 1;
3037# else
3038 return ASMAtomicAddU64(pu64, 1) + 1;
3039# endif
3040}
3041#endif
3042
3043
3044/**
3045 * Atomically increment a signed 64-bit value, ordered.
3046 *
3047 * @returns The new value.
3048 * @param pi64 Pointer to the value to increment.
3049 *
3050 * @remarks x86: Requires a Pentium or later.
3051 */
3052DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile *pi64)
3053{
3054 return (int64_t)ASMAtomicIncU64((uint64_t volatile *)pi64);
3055}
3056
3057
3058/**
3059 * Atomically increment a size_t value, ordered.
3060 *
3061 * @returns The new value.
3062 * @param pcb Pointer to the value to increment.
3063 *
3064 * @remarks x86: Requires a 486 or later.
3065 */
3066DECLINLINE(int64_t) ASMAtomicIncZ(size_t volatile *pcb)
3067{
3068#if ARCH_BITS == 64
3069 return ASMAtomicIncU64((uint64_t volatile *)pcb);
3070#elif ARCH_BITS == 32
3071 return ASMAtomicIncU32((uint32_t volatile *)pcb);
3072#elif ARCH_BITS == 16
3073 return ASMAtomicIncU16((uint16_t volatile *)pcb);
3074#else
3075# error "Unsupported ARCH_BITS value"
3076#endif
3077}
3078
3079
3080
3081/**
3082 * Atomically decrement an unsigned 32-bit value, ordered.
3083 *
3084 * @returns The new value.
3085 * @param pu16 Pointer to the value to decrement.
3086 * @remarks Not implemented. Just to make 16-bit code happy.
3087 *
3088 * @remarks x86: Requires a 486 or later.
3089 */
3090DECLASM(uint32_t) ASMAtomicDecU16(uint16_t volatile *pu16);
3091
3092
3093/**
3094 * Atomically decrement an unsigned 32-bit value, ordered.
3095 *
3096 * @returns The new value.
3097 * @param pu32 Pointer to the value to decrement.
3098 *
3099 * @remarks x86: Requires a 486 or later.
3100 */
3101#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3102DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3103#else
3104DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3105{
3106 uint32_t u32;
3107# if RT_INLINE_ASM_USES_INTRIN
3108 u32 = _InterlockedDecrement((long *)pu32);
3109 return u32;
3110
3111# elif RT_INLINE_ASM_GNU_STYLE
3112 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3113 : "=r" (u32),
3114 "=m" (*pu32)
3115 : "0" (-1),
3116 "m" (*pu32)
3117 : "memory");
3118 return u32-1;
3119# else
3120 __asm
3121 {
3122 mov eax, -1
3123# ifdef RT_ARCH_AMD64
3124 mov rdx, [pu32]
3125 lock xadd [rdx], eax
3126# else
3127 mov edx, [pu32]
3128 lock xadd [edx], eax
3129# endif
3130 mov u32, eax
3131 }
3132 return u32-1;
3133# endif
3134}
3135#endif
3136
3137
3138/**
3139 * Atomically decrement a signed 32-bit value, ordered.
3140 *
3141 * @returns The new value.
3142 * @param pi32 Pointer to the value to decrement.
3143 *
3144 * @remarks x86: Requires a 486 or later.
3145 */
3146DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3147{
3148 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3149}
3150
3151
3152/**
3153 * Atomically decrement an unsigned 64-bit value, ordered.
3154 *
3155 * @returns The new value.
3156 * @param pu64 Pointer to the value to decrement.
3157 *
3158 * @remarks x86: Requires a Pentium or later.
3159 */
3160#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3161DECLASM(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64);
3162#else
3163DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64)
3164{
3165# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3166 uint64_t u64 = _InterlockedDecrement64((__int64 volatile *)pu64);
3167 return u64;
3168
3169# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3170 uint64_t u64;
3171 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
3172 : "=r" (u64),
3173 "=m" (*pu64)
3174 : "0" (~(uint64_t)0),
3175 "m" (*pu64)
3176 : "memory");
3177 return u64-1;
3178# else
3179 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
3180# endif
3181}
3182#endif
3183
3184
3185/**
3186 * Atomically decrement a signed 64-bit value, ordered.
3187 *
3188 * @returns The new value.
3189 * @param pi64 Pointer to the value to decrement.
3190 *
3191 * @remarks x86: Requires a Pentium or later.
3192 */
3193DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile *pi64)
3194{
3195 return (int64_t)ASMAtomicDecU64((uint64_t volatile *)pi64);
3196}
3197
3198
3199/**
3200 * Atomically decrement a size_t value, ordered.
3201 *
3202 * @returns The new value.
3203 * @param pcb Pointer to the value to decrement.
3204 *
3205 * @remarks x86: Requires a 486 or later.
3206 */
3207DECLINLINE(int64_t) ASMAtomicDecZ(size_t volatile *pcb)
3208{
3209#if ARCH_BITS == 64
3210 return ASMAtomicDecU64((uint64_t volatile *)pcb);
3211#elif ARCH_BITS == 32
3212 return ASMAtomicDecU32((uint32_t volatile *)pcb);
3213#elif ARCH_BITS == 16
3214 return ASMAtomicDecU16((uint16_t volatile *)pcb);
3215#else
3216# error "Unsupported ARCH_BITS value"
3217#endif
3218}
3219
3220
3221/**
3222 * Atomically Or an unsigned 32-bit value, ordered.
3223 *
3224 * @param pu32 Pointer to the pointer variable to OR u32 with.
3225 * @param u32 The value to OR *pu32 with.
3226 *
3227 * @remarks x86: Requires a 386 or later.
3228 */
3229#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3230DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3231#else
3232DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3233{
3234# if RT_INLINE_ASM_USES_INTRIN
3235 _InterlockedOr((long volatile *)pu32, (long)u32);
3236
3237# elif RT_INLINE_ASM_GNU_STYLE
3238 __asm__ __volatile__("lock; orl %1, %0\n\t"
3239 : "=m" (*pu32)
3240 : "ir" (u32),
3241 "m" (*pu32));
3242# else
3243 __asm
3244 {
3245 mov eax, [u32]
3246# ifdef RT_ARCH_AMD64
3247 mov rdx, [pu32]
3248 lock or [rdx], eax
3249# else
3250 mov edx, [pu32]
3251 lock or [edx], eax
3252# endif
3253 }
3254# endif
3255}
3256#endif
3257
3258
3259/**
3260 * Atomically Or a signed 32-bit value, ordered.
3261 *
3262 * @param pi32 Pointer to the pointer variable to OR u32 with.
3263 * @param i32 The value to OR *pu32 with.
3264 *
3265 * @remarks x86: Requires a 386 or later.
3266 */
3267DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3268{
3269 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3270}
3271
3272
3273/**
3274 * Atomically Or an unsigned 64-bit value, ordered.
3275 *
3276 * @param pu64 Pointer to the pointer variable to OR u64 with.
3277 * @param u64 The value to OR *pu64 with.
3278 *
3279 * @remarks x86: Requires a Pentium or later.
3280 */
3281#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3282DECLASM(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64);
3283#else
3284DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64)
3285{
3286# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3287 _InterlockedOr64((__int64 volatile *)pu64, (__int64)u64);
3288
3289# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3290 __asm__ __volatile__("lock; orq %1, %q0\n\t"
3291 : "=m" (*pu64)
3292 : "r" (u64),
3293 "m" (*pu64));
3294# else
3295 for (;;)
3296 {
3297 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3298 uint64_t u64New = u64Old | u64;
3299 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3300 break;
3301 ASMNopPause();
3302 }
3303# endif
3304}
3305#endif
3306
3307
3308/**
3309 * Atomically Or a signed 64-bit value, ordered.
3310 *
3311 * @param pi64 Pointer to the pointer variable to OR u64 with.
3312 * @param i64 The value to OR *pu64 with.
3313 *
3314 * @remarks x86: Requires a Pentium or later.
3315 */
3316DECLINLINE(void) ASMAtomicOrS64(int64_t volatile *pi64, int64_t i64)
3317{
3318 ASMAtomicOrU64((uint64_t volatile *)pi64, i64);
3319}
3320
3321
3322/**
3323 * Atomically And an unsigned 32-bit value, ordered.
3324 *
3325 * @param pu32 Pointer to the pointer variable to AND u32 with.
3326 * @param u32 The value to AND *pu32 with.
3327 *
3328 * @remarks x86: Requires a 386 or later.
3329 */
3330#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3331DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3332#else
3333DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3334{
3335# if RT_INLINE_ASM_USES_INTRIN
3336 _InterlockedAnd((long volatile *)pu32, u32);
3337
3338# elif RT_INLINE_ASM_GNU_STYLE
3339 __asm__ __volatile__("lock; andl %1, %0\n\t"
3340 : "=m" (*pu32)
3341 : "ir" (u32),
3342 "m" (*pu32));
3343# else
3344 __asm
3345 {
3346 mov eax, [u32]
3347# ifdef RT_ARCH_AMD64
3348 mov rdx, [pu32]
3349 lock and [rdx], eax
3350# else
3351 mov edx, [pu32]
3352 lock and [edx], eax
3353# endif
3354 }
3355# endif
3356}
3357#endif
3358
3359
3360/**
3361 * Atomically And a signed 32-bit value, ordered.
3362 *
3363 * @param pi32 Pointer to the pointer variable to AND i32 with.
3364 * @param i32 The value to AND *pi32 with.
3365 *
3366 * @remarks x86: Requires a 386 or later.
3367 */
3368DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3369{
3370 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3371}
3372
3373
3374/**
3375 * Atomically And an unsigned 64-bit value, ordered.
3376 *
3377 * @param pu64 Pointer to the pointer variable to AND u64 with.
3378 * @param u64 The value to AND *pu64 with.
3379 *
3380 * @remarks x86: Requires a Pentium or later.
3381 */
3382#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3383DECLASM(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64);
3384#else
3385DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64)
3386{
3387# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3388 _InterlockedAnd64((__int64 volatile *)pu64, u64);
3389
3390# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3391 __asm__ __volatile__("lock; andq %1, %0\n\t"
3392 : "=m" (*pu64)
3393 : "r" (u64),
3394 "m" (*pu64));
3395# else
3396 for (;;)
3397 {
3398 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3399 uint64_t u64New = u64Old & u64;
3400 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3401 break;
3402 ASMNopPause();
3403 }
3404# endif
3405}
3406#endif
3407
3408
3409/**
3410 * Atomically And a signed 64-bit value, ordered.
3411 *
3412 * @param pi64 Pointer to the pointer variable to AND i64 with.
3413 * @param i64 The value to AND *pi64 with.
3414 *
3415 * @remarks x86: Requires a Pentium or later.
3416 */
3417DECLINLINE(void) ASMAtomicAndS64(int64_t volatile *pi64, int64_t i64)
3418{
3419 ASMAtomicAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3420}
3421
3422
3423/**
3424 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
3425 *
3426 * @param pu32 Pointer to the pointer variable to OR u32 with.
3427 * @param u32 The value to OR *pu32 with.
3428 *
3429 * @remarks x86: Requires a 386 or later.
3430 */
3431#if RT_INLINE_ASM_EXTERNAL
3432DECLASM(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32);
3433#else
3434DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32)
3435{
3436# if RT_INLINE_ASM_GNU_STYLE
3437 __asm__ __volatile__("orl %1, %0\n\t"
3438 : "=m" (*pu32)
3439 : "ir" (u32),
3440 "m" (*pu32));
3441# else
3442 __asm
3443 {
3444 mov eax, [u32]
3445# ifdef RT_ARCH_AMD64
3446 mov rdx, [pu32]
3447 or [rdx], eax
3448# else
3449 mov edx, [pu32]
3450 or [edx], eax
3451# endif
3452 }
3453# endif
3454}
3455#endif
3456
3457
3458/**
3459 * Atomically OR a signed 32-bit value, unordered.
3460 *
3461 * @param pi32 Pointer to the pointer variable to OR u32 with.
3462 * @param i32 The value to OR *pu32 with.
3463 *
3464 * @remarks x86: Requires a 386 or later.
3465 */
3466DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile *pi32, int32_t i32)
3467{
3468 ASMAtomicUoOrU32((uint32_t volatile *)pi32, i32);
3469}
3470
3471
3472/**
3473 * Atomically OR an unsigned 64-bit value, unordered.
3474 *
3475 * @param pu64 Pointer to the pointer variable to OR u64 with.
3476 * @param u64 The value to OR *pu64 with.
3477 *
3478 * @remarks x86: Requires a Pentium or later.
3479 */
3480#if RT_INLINE_ASM_EXTERNAL
3481DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64);
3482#else
3483DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64)
3484{
3485# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3486 __asm__ __volatile__("orq %1, %q0\n\t"
3487 : "=m" (*pu64)
3488 : "r" (u64),
3489 "m" (*pu64));
3490# else
3491 for (;;)
3492 {
3493 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3494 uint64_t u64New = u64Old | u64;
3495 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3496 break;
3497 ASMNopPause();
3498 }
3499# endif
3500}
3501#endif
3502
3503
3504/**
3505 * Atomically Or a signed 64-bit value, unordered.
3506 *
3507 * @param pi64 Pointer to the pointer variable to OR u64 with.
3508 * @param i64 The value to OR *pu64 with.
3509 *
3510 * @remarks x86: Requires a Pentium or later.
3511 */
3512DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile *pi64, int64_t i64)
3513{
3514 ASMAtomicUoOrU64((uint64_t volatile *)pi64, i64);
3515}
3516
3517
3518/**
3519 * Atomically And an unsigned 32-bit value, unordered.
3520 *
3521 * @param pu32 Pointer to the pointer variable to AND u32 with.
3522 * @param u32 The value to AND *pu32 with.
3523 *
3524 * @remarks x86: Requires a 386 or later.
3525 */
3526#if RT_INLINE_ASM_EXTERNAL
3527DECLASM(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32);
3528#else
3529DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32)
3530{
3531# if RT_INLINE_ASM_GNU_STYLE
3532 __asm__ __volatile__("andl %1, %0\n\t"
3533 : "=m" (*pu32)
3534 : "ir" (u32),
3535 "m" (*pu32));
3536# else
3537 __asm
3538 {
3539 mov eax, [u32]
3540# ifdef RT_ARCH_AMD64
3541 mov rdx, [pu32]
3542 and [rdx], eax
3543# else
3544 mov edx, [pu32]
3545 and [edx], eax
3546# endif
3547 }
3548# endif
3549}
3550#endif
3551
3552
3553/**
3554 * Atomically And a signed 32-bit value, unordered.
3555 *
3556 * @param pi32 Pointer to the pointer variable to AND i32 with.
3557 * @param i32 The value to AND *pi32 with.
3558 *
3559 * @remarks x86: Requires a 386 or later.
3560 */
3561DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile *pi32, int32_t i32)
3562{
3563 ASMAtomicUoAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3564}
3565
3566
3567/**
3568 * Atomically And an unsigned 64-bit value, unordered.
3569 *
3570 * @param pu64 Pointer to the pointer variable to AND u64 with.
3571 * @param u64 The value to AND *pu64 with.
3572 *
3573 * @remarks x86: Requires a Pentium or later.
3574 */
3575#if RT_INLINE_ASM_EXTERNAL
3576DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64);
3577#else
3578DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64)
3579{
3580# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3581 __asm__ __volatile__("andq %1, %0\n\t"
3582 : "=m" (*pu64)
3583 : "r" (u64),
3584 "m" (*pu64));
3585# else
3586 for (;;)
3587 {
3588 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3589 uint64_t u64New = u64Old & u64;
3590 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3591 break;
3592 ASMNopPause();
3593 }
3594# endif
3595}
3596#endif
3597
3598
3599/**
3600 * Atomically And a signed 64-bit value, unordered.
3601 *
3602 * @param pi64 Pointer to the pointer variable to AND i64 with.
3603 * @param i64 The value to AND *pi64 with.
3604 *
3605 * @remarks x86: Requires a Pentium or later.
3606 */
3607DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile *pi64, int64_t i64)
3608{
3609 ASMAtomicUoAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3610}
3611
3612
3613/**
3614 * Atomically increment an unsigned 32-bit value, unordered.
3615 *
3616 * @returns the new value.
3617 * @param pu32 Pointer to the variable to increment.
3618 *
3619 * @remarks x86: Requires a 486 or later.
3620 */
3621#if RT_INLINE_ASM_EXTERNAL
3622DECLASM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile *pu32);
3623#else
3624DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile *pu32)
3625{
3626 uint32_t u32;
3627# if RT_INLINE_ASM_GNU_STYLE
3628 __asm__ __volatile__("xaddl %0, %1\n\t"
3629 : "=r" (u32),
3630 "=m" (*pu32)
3631 : "0" (1),
3632 "m" (*pu32)
3633 : "memory");
3634 return u32 + 1;
3635# else
3636 __asm
3637 {
3638 mov eax, 1
3639# ifdef RT_ARCH_AMD64
3640 mov rdx, [pu32]
3641 xadd [rdx], eax
3642# else
3643 mov edx, [pu32]
3644 xadd [edx], eax
3645# endif
3646 mov u32, eax
3647 }
3648 return u32 + 1;
3649# endif
3650}
3651#endif
3652
3653
3654/**
3655 * Atomically decrement an unsigned 32-bit value, unordered.
3656 *
3657 * @returns the new value.
3658 * @param pu32 Pointer to the variable to decrement.
3659 *
3660 * @remarks x86: Requires a 486 or later.
3661 */
3662#if RT_INLINE_ASM_EXTERNAL
3663DECLASM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile *pu32);
3664#else
3665DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile *pu32)
3666{
3667 uint32_t u32;
3668# if RT_INLINE_ASM_GNU_STYLE
3669 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3670 : "=r" (u32),
3671 "=m" (*pu32)
3672 : "0" (-1),
3673 "m" (*pu32)
3674 : "memory");
3675 return u32 - 1;
3676# else
3677 __asm
3678 {
3679 mov eax, -1
3680# ifdef RT_ARCH_AMD64
3681 mov rdx, [pu32]
3682 xadd [rdx], eax
3683# else
3684 mov edx, [pu32]
3685 xadd [edx], eax
3686# endif
3687 mov u32, eax
3688 }
3689 return u32 - 1;
3690# endif
3691}
3692#endif
3693
3694
3695/** @def RT_ASM_PAGE_SIZE
3696 * We try avoid dragging in iprt/param.h here.
3697 * @internal
3698 */
3699#if defined(RT_ARCH_SPARC64)
3700# define RT_ASM_PAGE_SIZE 0x2000
3701# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3702# if PAGE_SIZE != 0x2000
3703# error "PAGE_SIZE is not 0x2000!"
3704# endif
3705# endif
3706#else
3707# define RT_ASM_PAGE_SIZE 0x1000
3708# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3709# if PAGE_SIZE != 0x1000
3710# error "PAGE_SIZE is not 0x1000!"
3711# endif
3712# endif
3713#endif
3714
3715/**
3716 * Zeros a 4K memory page.
3717 *
3718 * @param pv Pointer to the memory block. This must be page aligned.
3719 */
3720#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3721DECLASM(void) ASMMemZeroPage(volatile void *pv);
3722# else
3723DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
3724{
3725# if RT_INLINE_ASM_USES_INTRIN
3726# ifdef RT_ARCH_AMD64
3727 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
3728# else
3729 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
3730# endif
3731
3732# elif RT_INLINE_ASM_GNU_STYLE
3733 RTCCUINTREG uDummy;
3734# ifdef RT_ARCH_AMD64
3735 __asm__ __volatile__("rep stosq"
3736 : "=D" (pv),
3737 "=c" (uDummy)
3738 : "0" (pv),
3739 "c" (RT_ASM_PAGE_SIZE >> 3),
3740 "a" (0)
3741 : "memory");
3742# else
3743 __asm__ __volatile__("rep stosl"
3744 : "=D" (pv),
3745 "=c" (uDummy)
3746 : "0" (pv),
3747 "c" (RT_ASM_PAGE_SIZE >> 2),
3748 "a" (0)
3749 : "memory");
3750# endif
3751# else
3752 __asm
3753 {
3754# ifdef RT_ARCH_AMD64
3755 xor rax, rax
3756 mov ecx, 0200h
3757 mov rdi, [pv]
3758 rep stosq
3759# else
3760 xor eax, eax
3761 mov ecx, 0400h
3762 mov edi, [pv]
3763 rep stosd
3764# endif
3765 }
3766# endif
3767}
3768# endif
3769
3770
3771/**
3772 * Zeros a memory block with a 32-bit aligned size.
3773 *
3774 * @param pv Pointer to the memory block.
3775 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3776 */
3777#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3778DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3779#else
3780DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3781{
3782# if RT_INLINE_ASM_USES_INTRIN
3783# ifdef RT_ARCH_AMD64
3784 if (!(cb & 7))
3785 __stosq((unsigned __int64 *)pv, 0, cb / 8);
3786 else
3787# endif
3788 __stosd((unsigned long *)pv, 0, cb / 4);
3789
3790# elif RT_INLINE_ASM_GNU_STYLE
3791 __asm__ __volatile__("rep stosl"
3792 : "=D" (pv),
3793 "=c" (cb)
3794 : "0" (pv),
3795 "1" (cb >> 2),
3796 "a" (0)
3797 : "memory");
3798# else
3799 __asm
3800 {
3801 xor eax, eax
3802# ifdef RT_ARCH_AMD64
3803 mov rcx, [cb]
3804 shr rcx, 2
3805 mov rdi, [pv]
3806# else
3807 mov ecx, [cb]
3808 shr ecx, 2
3809 mov edi, [pv]
3810# endif
3811 rep stosd
3812 }
3813# endif
3814}
3815#endif
3816
3817
3818/**
3819 * Fills a memory block with a 32-bit aligned size.
3820 *
3821 * @param pv Pointer to the memory block.
3822 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3823 * @param u32 The value to fill with.
3824 */
3825#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3826DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3827#else
3828DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3829{
3830# if RT_INLINE_ASM_USES_INTRIN
3831# ifdef RT_ARCH_AMD64
3832 if (!(cb & 7))
3833 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3834 else
3835# endif
3836 __stosd((unsigned long *)pv, u32, cb / 4);
3837
3838# elif RT_INLINE_ASM_GNU_STYLE
3839 __asm__ __volatile__("rep stosl"
3840 : "=D" (pv),
3841 "=c" (cb)
3842 : "0" (pv),
3843 "1" (cb >> 2),
3844 "a" (u32)
3845 : "memory");
3846# else
3847 __asm
3848 {
3849# ifdef RT_ARCH_AMD64
3850 mov rcx, [cb]
3851 shr rcx, 2
3852 mov rdi, [pv]
3853# else
3854 mov ecx, [cb]
3855 shr ecx, 2
3856 mov edi, [pv]
3857# endif
3858 mov eax, [u32]
3859 rep stosd
3860 }
3861# endif
3862}
3863#endif
3864
3865
3866/**
3867 * Checks if a memory block is all zeros.
3868 *
3869 * @returns Pointer to the first non-zero byte.
3870 * @returns NULL if all zero.
3871 *
3872 * @param pv Pointer to the memory block.
3873 * @param cb Number of bytes in the block.
3874 *
3875 * @todo Fix name, it is a predicate function but it's not returning boolean!
3876 */
3877#if !defined(RDESKTOP) && (!defined(RT_OS_LINUX) || !defined(__KERNEL__))
3878DECLASM(void *) ASMMemFirstNonZero(void const *pv, size_t cb);
3879#else
3880DECLINLINE(void *) ASMMemFirstNonZero(void const *pv, size_t cb)
3881{
3882 uint8_t const *pb = (uint8_t const *)pv;
3883 for (; cb; cb--, pb++)
3884 if (RT_LIKELY(*pb == 0))
3885 { /* likely */ }
3886 else
3887 return (void *)pb;
3888 return NULL;
3889}
3890#endif
3891
3892
3893/**
3894 * Checks if a memory block is all zeros.
3895 *
3896 * @returns true if zero, false if not.
3897 *
3898 * @param pv Pointer to the memory block.
3899 * @param cb Number of bytes in the block.
3900 *
3901 * @sa ASMMemFirstNonZero
3902 */
3903DECLINLINE(bool) ASMMemIsZero(void const *pv, size_t cb)
3904{
3905 return ASMMemFirstNonZero(pv, cb) == NULL;
3906}
3907
3908
3909/**
3910 * Checks if a memory page is all zeros.
3911 *
3912 * @returns true / false.
3913 *
3914 * @param pvPage Pointer to the page. Must be aligned on 16 byte
3915 * boundary
3916 */
3917DECLINLINE(bool) ASMMemIsZeroPage(void const *pvPage)
3918{
3919# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
3920 union { RTCCUINTREG r; bool f; } uAX;
3921 RTCCUINTREG xCX, xDI;
3922 Assert(!((uintptr_t)pvPage & 15));
3923 __asm__ __volatile__("repe; "
3924# ifdef RT_ARCH_AMD64
3925 "scasq\n\t"
3926# else
3927 "scasl\n\t"
3928# endif
3929 "setnc %%al\n\t"
3930 : "=&c" (xCX),
3931 "=&D" (xDI),
3932 "=&a" (uAX.r)
3933 : "mr" (pvPage),
3934# ifdef RT_ARCH_AMD64
3935 "0" (RT_ASM_PAGE_SIZE/8),
3936# else
3937 "0" (RT_ASM_PAGE_SIZE/4),
3938# endif
3939 "1" (pvPage),
3940 "2" (0));
3941 return uAX.f;
3942# else
3943 uintptr_t const *puPtr = (uintptr_t const *)pvPage;
3944 size_t cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
3945 Assert(!((uintptr_t)pvPage & 15));
3946 for (;;)
3947 {
3948 if (puPtr[0]) return false;
3949 if (puPtr[4]) return false;
3950
3951 if (puPtr[2]) return false;
3952 if (puPtr[6]) return false;
3953
3954 if (puPtr[1]) return false;
3955 if (puPtr[5]) return false;
3956
3957 if (puPtr[3]) return false;
3958 if (puPtr[7]) return false;
3959
3960 if (!--cLeft)
3961 return true;
3962 puPtr += 8;
3963 }
3964# endif
3965}
3966
3967
3968/**
3969 * Checks if a memory block is filled with the specified byte, returning the
3970 * first mismatch.
3971 *
3972 * This is sort of an inverted memchr.
3973 *
3974 * @returns Pointer to the byte which doesn't equal u8.
3975 * @returns NULL if all equal to u8.
3976 *
3977 * @param pv Pointer to the memory block.
3978 * @param cb Number of bytes in the block.
3979 * @param u8 The value it's supposed to be filled with.
3980 *
3981 * @remarks No alignment requirements.
3982 */
3983#if (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
3984 && (!defined(RT_OS_FREEBSD) || !defined(_KERNEL))
3985DECLASM(void *) ASMMemFirstMismatchingU8(void const *pv, size_t cb, uint8_t u8);
3986#else
3987DECLINLINE(void *) ASMMemFirstMismatchingU8(void const *pv, size_t cb, uint8_t u8)
3988{
3989 uint8_t const *pb = (uint8_t const *)pv;
3990 for (; cb; cb--, pb++)
3991 if (RT_LIKELY(*pb == u8))
3992 { /* likely */ }
3993 else
3994 return (void *)pb;
3995 return NULL;
3996}
3997#endif
3998
3999
4000/**
4001 * Checks if a memory block is filled with the specified byte.
4002 *
4003 * @returns true if all matching, false if not.
4004 *
4005 * @param pv Pointer to the memory block.
4006 * @param cb Number of bytes in the block.
4007 * @param u8 The value it's supposed to be filled with.
4008 *
4009 * @remarks No alignment requirements.
4010 */
4011DECLINLINE(bool) ASMMemIsAllU8(void const *pv, size_t cb, uint8_t u8)
4012{
4013 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
4014}
4015
4016
4017/**
4018 * Checks if a memory block is filled with the specified 32-bit value.
4019 *
4020 * This is a sort of inverted memchr.
4021 *
4022 * @returns Pointer to the first value which doesn't equal u32.
4023 * @returns NULL if all equal to u32.
4024 *
4025 * @param pv Pointer to the memory block.
4026 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4027 * @param u32 The value it's supposed to be filled with.
4028 */
4029DECLINLINE(uint32_t *) ASMMemFirstMismatchingU32(void const *pv, size_t cb, uint32_t u32)
4030{
4031/** @todo rewrite this in inline assembly? */
4032 uint32_t const *pu32 = (uint32_t const *)pv;
4033 for (; cb; cb -= 4, pu32++)
4034 if (RT_LIKELY(*pu32 == u32))
4035 { /* likely */ }
4036 else
4037 return (uint32_t *)pu32;
4038 return NULL;
4039}
4040
4041
4042/**
4043 * Probes a byte pointer for read access.
4044 *
4045 * While the function will not fault if the byte is not read accessible,
4046 * the idea is to do this in a safe place like before acquiring locks
4047 * and such like.
4048 *
4049 * Also, this functions guarantees that an eager compiler is not going
4050 * to optimize the probing away.
4051 *
4052 * @param pvByte Pointer to the byte.
4053 */
4054#if RT_INLINE_ASM_EXTERNAL
4055DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
4056#else
4057DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
4058{
4059 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4060 uint8_t u8;
4061# if RT_INLINE_ASM_GNU_STYLE
4062 __asm__ __volatile__("movb (%1), %0\n\t"
4063 : "=r" (u8)
4064 : "r" (pvByte));
4065# else
4066 __asm
4067 {
4068# ifdef RT_ARCH_AMD64
4069 mov rax, [pvByte]
4070 mov al, [rax]
4071# else
4072 mov eax, [pvByte]
4073 mov al, [eax]
4074# endif
4075 mov [u8], al
4076 }
4077# endif
4078 return u8;
4079}
4080#endif
4081
4082/**
4083 * Probes a buffer for read access page by page.
4084 *
4085 * While the function will fault if the buffer is not fully read
4086 * accessible, the idea is to do this in a safe place like before
4087 * acquiring locks and such like.
4088 *
4089 * Also, this functions guarantees that an eager compiler is not going
4090 * to optimize the probing away.
4091 *
4092 * @param pvBuf Pointer to the buffer.
4093 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4094 */
4095DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
4096{
4097 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4098 /* the first byte */
4099 const uint8_t *pu8 = (const uint8_t *)pvBuf;
4100 ASMProbeReadByte(pu8);
4101
4102 /* the pages in between pages. */
4103 while (cbBuf > RT_ASM_PAGE_SIZE)
4104 {
4105 ASMProbeReadByte(pu8);
4106 cbBuf -= RT_ASM_PAGE_SIZE;
4107 pu8 += RT_ASM_PAGE_SIZE;
4108 }
4109
4110 /* the last byte */
4111 ASMProbeReadByte(pu8 + cbBuf - 1);
4112}
4113
4114
4115
4116/** @defgroup grp_inline_bits Bit Operations
4117 * @{
4118 */
4119
4120
4121/**
4122 * Sets a bit in a bitmap.
4123 *
4124 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
4125 * @param iBit The bit to set.
4126 *
4127 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4128 * However, doing so will yield better performance as well as avoiding
4129 * traps accessing the last bits in the bitmap.
4130 */
4131#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4132DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
4133#else
4134DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
4135{
4136# if RT_INLINE_ASM_USES_INTRIN
4137 _bittestandset((long *)pvBitmap, iBit);
4138
4139# elif RT_INLINE_ASM_GNU_STYLE
4140 __asm__ __volatile__("btsl %1, %0"
4141 : "=m" (*(volatile long *)pvBitmap)
4142 : "Ir" (iBit),
4143 "m" (*(volatile long *)pvBitmap)
4144 : "memory");
4145# else
4146 __asm
4147 {
4148# ifdef RT_ARCH_AMD64
4149 mov rax, [pvBitmap]
4150 mov edx, [iBit]
4151 bts [rax], edx
4152# else
4153 mov eax, [pvBitmap]
4154 mov edx, [iBit]
4155 bts [eax], edx
4156# endif
4157 }
4158# endif
4159}
4160#endif
4161
4162
4163/**
4164 * Atomically sets a bit in a bitmap, ordered.
4165 *
4166 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4167 * the memory access isn't atomic!
4168 * @param iBit The bit to set.
4169 *
4170 * @remarks x86: Requires a 386 or later.
4171 */
4172#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4173DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
4174#else
4175DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
4176{
4177 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4178# if RT_INLINE_ASM_USES_INTRIN
4179 _interlockedbittestandset((long *)pvBitmap, iBit);
4180# elif RT_INLINE_ASM_GNU_STYLE
4181 __asm__ __volatile__("lock; btsl %1, %0"
4182 : "=m" (*(volatile long *)pvBitmap)
4183 : "Ir" (iBit),
4184 "m" (*(volatile long *)pvBitmap)
4185 : "memory");
4186# else
4187 __asm
4188 {
4189# ifdef RT_ARCH_AMD64
4190 mov rax, [pvBitmap]
4191 mov edx, [iBit]
4192 lock bts [rax], edx
4193# else
4194 mov eax, [pvBitmap]
4195 mov edx, [iBit]
4196 lock bts [eax], edx
4197# endif
4198 }
4199# endif
4200}
4201#endif
4202
4203
4204/**
4205 * Clears a bit in a bitmap.
4206 *
4207 * @param pvBitmap Pointer to the bitmap.
4208 * @param iBit The bit to clear.
4209 *
4210 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4211 * However, doing so will yield better performance as well as avoiding
4212 * traps accessing the last bits in the bitmap.
4213 */
4214#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4215DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
4216#else
4217DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
4218{
4219# if RT_INLINE_ASM_USES_INTRIN
4220 _bittestandreset((long *)pvBitmap, iBit);
4221
4222# elif RT_INLINE_ASM_GNU_STYLE
4223 __asm__ __volatile__("btrl %1, %0"
4224 : "=m" (*(volatile long *)pvBitmap)
4225 : "Ir" (iBit),
4226 "m" (*(volatile long *)pvBitmap)
4227 : "memory");
4228# else
4229 __asm
4230 {
4231# ifdef RT_ARCH_AMD64
4232 mov rax, [pvBitmap]
4233 mov edx, [iBit]
4234 btr [rax], edx
4235# else
4236 mov eax, [pvBitmap]
4237 mov edx, [iBit]
4238 btr [eax], edx
4239# endif
4240 }
4241# endif
4242}
4243#endif
4244
4245
4246/**
4247 * Atomically clears a bit in a bitmap, ordered.
4248 *
4249 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4250 * the memory access isn't atomic!
4251 * @param iBit The bit to toggle set.
4252 *
4253 * @remarks No memory barrier, take care on smp.
4254 * @remarks x86: Requires a 386 or later.
4255 */
4256#if RT_INLINE_ASM_EXTERNAL
4257DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
4258#else
4259DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
4260{
4261 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4262# if RT_INLINE_ASM_GNU_STYLE
4263 __asm__ __volatile__("lock; btrl %1, %0"
4264 : "=m" (*(volatile long *)pvBitmap)
4265 : "Ir" (iBit),
4266 "m" (*(volatile long *)pvBitmap)
4267 : "memory");
4268# else
4269 __asm
4270 {
4271# ifdef RT_ARCH_AMD64
4272 mov rax, [pvBitmap]
4273 mov edx, [iBit]
4274 lock btr [rax], edx
4275# else
4276 mov eax, [pvBitmap]
4277 mov edx, [iBit]
4278 lock btr [eax], edx
4279# endif
4280 }
4281# endif
4282}
4283#endif
4284
4285
4286/**
4287 * Toggles a bit in a bitmap.
4288 *
4289 * @param pvBitmap Pointer to the bitmap.
4290 * @param iBit The bit to toggle.
4291 *
4292 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4293 * However, doing so will yield better performance as well as avoiding
4294 * traps accessing the last bits in the bitmap.
4295 */
4296#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4297DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
4298#else
4299DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
4300{
4301# if RT_INLINE_ASM_USES_INTRIN
4302 _bittestandcomplement((long *)pvBitmap, iBit);
4303# elif RT_INLINE_ASM_GNU_STYLE
4304 __asm__ __volatile__("btcl %1, %0"
4305 : "=m" (*(volatile long *)pvBitmap)
4306 : "Ir" (iBit),
4307 "m" (*(volatile long *)pvBitmap)
4308 : "memory");
4309# else
4310 __asm
4311 {
4312# ifdef RT_ARCH_AMD64
4313 mov rax, [pvBitmap]
4314 mov edx, [iBit]
4315 btc [rax], edx
4316# else
4317 mov eax, [pvBitmap]
4318 mov edx, [iBit]
4319 btc [eax], edx
4320# endif
4321 }
4322# endif
4323}
4324#endif
4325
4326
4327/**
4328 * Atomically toggles a bit in a bitmap, ordered.
4329 *
4330 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4331 * the memory access isn't atomic!
4332 * @param iBit The bit to test and set.
4333 *
4334 * @remarks x86: Requires a 386 or later.
4335 */
4336#if RT_INLINE_ASM_EXTERNAL
4337DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
4338#else
4339DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
4340{
4341 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4342# if RT_INLINE_ASM_GNU_STYLE
4343 __asm__ __volatile__("lock; btcl %1, %0"
4344 : "=m" (*(volatile long *)pvBitmap)
4345 : "Ir" (iBit),
4346 "m" (*(volatile long *)pvBitmap)
4347 : "memory");
4348# else
4349 __asm
4350 {
4351# ifdef RT_ARCH_AMD64
4352 mov rax, [pvBitmap]
4353 mov edx, [iBit]
4354 lock btc [rax], edx
4355# else
4356 mov eax, [pvBitmap]
4357 mov edx, [iBit]
4358 lock btc [eax], edx
4359# endif
4360 }
4361# endif
4362}
4363#endif
4364
4365
4366/**
4367 * Tests and sets a bit in a bitmap.
4368 *
4369 * @returns true if the bit was set.
4370 * @returns false if the bit was clear.
4371 *
4372 * @param pvBitmap Pointer to the bitmap.
4373 * @param iBit The bit to test and set.
4374 *
4375 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4376 * However, doing so will yield better performance as well as avoiding
4377 * traps accessing the last bits in the bitmap.
4378 */
4379#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4380DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4381#else
4382DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4383{
4384 union { bool f; uint32_t u32; uint8_t u8; } rc;
4385# if RT_INLINE_ASM_USES_INTRIN
4386 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
4387
4388# elif RT_INLINE_ASM_GNU_STYLE
4389 __asm__ __volatile__("btsl %2, %1\n\t"
4390 "setc %b0\n\t"
4391 "andl $1, %0\n\t"
4392 : "=q" (rc.u32),
4393 "=m" (*(volatile long *)pvBitmap)
4394 : "Ir" (iBit),
4395 "m" (*(volatile long *)pvBitmap)
4396 : "memory");
4397# else
4398 __asm
4399 {
4400 mov edx, [iBit]
4401# ifdef RT_ARCH_AMD64
4402 mov rax, [pvBitmap]
4403 bts [rax], edx
4404# else
4405 mov eax, [pvBitmap]
4406 bts [eax], edx
4407# endif
4408 setc al
4409 and eax, 1
4410 mov [rc.u32], eax
4411 }
4412# endif
4413 return rc.f;
4414}
4415#endif
4416
4417
4418/**
4419 * Atomically tests and sets a bit in a bitmap, ordered.
4420 *
4421 * @returns true if the bit was set.
4422 * @returns false if the bit was clear.
4423 *
4424 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4425 * the memory access isn't atomic!
4426 * @param iBit The bit to set.
4427 *
4428 * @remarks x86: Requires a 386 or later.
4429 */
4430#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4431DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4432#else
4433DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4434{
4435 union { bool f; uint32_t u32; uint8_t u8; } rc;
4436 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4437# if RT_INLINE_ASM_USES_INTRIN
4438 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
4439# elif RT_INLINE_ASM_GNU_STYLE
4440 __asm__ __volatile__("lock; btsl %2, %1\n\t"
4441 "setc %b0\n\t"
4442 "andl $1, %0\n\t"
4443 : "=q" (rc.u32),
4444 "=m" (*(volatile long *)pvBitmap)
4445 : "Ir" (iBit),
4446 "m" (*(volatile long *)pvBitmap)
4447 : "memory");
4448# else
4449 __asm
4450 {
4451 mov edx, [iBit]
4452# ifdef RT_ARCH_AMD64
4453 mov rax, [pvBitmap]
4454 lock bts [rax], edx
4455# else
4456 mov eax, [pvBitmap]
4457 lock bts [eax], edx
4458# endif
4459 setc al
4460 and eax, 1
4461 mov [rc.u32], eax
4462 }
4463# endif
4464 return rc.f;
4465}
4466#endif
4467
4468
4469/**
4470 * Tests and clears a bit in a bitmap.
4471 *
4472 * @returns true if the bit was set.
4473 * @returns false if the bit was clear.
4474 *
4475 * @param pvBitmap Pointer to the bitmap.
4476 * @param iBit The bit to test and clear.
4477 *
4478 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4479 * However, doing so will yield better performance as well as avoiding
4480 * traps accessing the last bits in the bitmap.
4481 */
4482#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4483DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4484#else
4485DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4486{
4487 union { bool f; uint32_t u32; uint8_t u8; } rc;
4488# if RT_INLINE_ASM_USES_INTRIN
4489 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
4490
4491# elif RT_INLINE_ASM_GNU_STYLE
4492 __asm__ __volatile__("btrl %2, %1\n\t"
4493 "setc %b0\n\t"
4494 "andl $1, %0\n\t"
4495 : "=q" (rc.u32),
4496 "=m" (*(volatile long *)pvBitmap)
4497 : "Ir" (iBit),
4498 "m" (*(volatile long *)pvBitmap)
4499 : "memory");
4500# else
4501 __asm
4502 {
4503 mov edx, [iBit]
4504# ifdef RT_ARCH_AMD64
4505 mov rax, [pvBitmap]
4506 btr [rax], edx
4507# else
4508 mov eax, [pvBitmap]
4509 btr [eax], edx
4510# endif
4511 setc al
4512 and eax, 1
4513 mov [rc.u32], eax
4514 }
4515# endif
4516 return rc.f;
4517}
4518#endif
4519
4520
4521/**
4522 * Atomically tests and clears a bit in a bitmap, ordered.
4523 *
4524 * @returns true if the bit was set.
4525 * @returns false if the bit was clear.
4526 *
4527 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4528 * the memory access isn't atomic!
4529 * @param iBit The bit to test and clear.
4530 *
4531 * @remarks No memory barrier, take care on smp.
4532 * @remarks x86: Requires a 386 or later.
4533 */
4534#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4535DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4536#else
4537DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4538{
4539 union { bool f; uint32_t u32; uint8_t u8; } rc;
4540 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4541# if RT_INLINE_ASM_USES_INTRIN
4542 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
4543
4544# elif RT_INLINE_ASM_GNU_STYLE
4545 __asm__ __volatile__("lock; btrl %2, %1\n\t"
4546 "setc %b0\n\t"
4547 "andl $1, %0\n\t"
4548 : "=q" (rc.u32),
4549 "=m" (*(volatile long *)pvBitmap)
4550 : "Ir" (iBit),
4551 "m" (*(volatile long *)pvBitmap)
4552 : "memory");
4553# else
4554 __asm
4555 {
4556 mov edx, [iBit]
4557# ifdef RT_ARCH_AMD64
4558 mov rax, [pvBitmap]
4559 lock btr [rax], edx
4560# else
4561 mov eax, [pvBitmap]
4562 lock btr [eax], edx
4563# endif
4564 setc al
4565 and eax, 1
4566 mov [rc.u32], eax
4567 }
4568# endif
4569 return rc.f;
4570}
4571#endif
4572
4573
4574/**
4575 * Tests and toggles a bit in a bitmap.
4576 *
4577 * @returns true if the bit was set.
4578 * @returns false if the bit was clear.
4579 *
4580 * @param pvBitmap Pointer to the bitmap.
4581 * @param iBit The bit to test and toggle.
4582 *
4583 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4584 * However, doing so will yield better performance as well as avoiding
4585 * traps accessing the last bits in the bitmap.
4586 */
4587#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4588DECLASM(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4589#else
4590DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4591{
4592 union { bool f; uint32_t u32; uint8_t u8; } rc;
4593# if RT_INLINE_ASM_USES_INTRIN
4594 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
4595
4596# elif RT_INLINE_ASM_GNU_STYLE
4597 __asm__ __volatile__("btcl %2, %1\n\t"
4598 "setc %b0\n\t"
4599 "andl $1, %0\n\t"
4600 : "=q" (rc.u32),
4601 "=m" (*(volatile long *)pvBitmap)
4602 : "Ir" (iBit),
4603 "m" (*(volatile long *)pvBitmap)
4604 : "memory");
4605# else
4606 __asm
4607 {
4608 mov edx, [iBit]
4609# ifdef RT_ARCH_AMD64
4610 mov rax, [pvBitmap]
4611 btc [rax], edx
4612# else
4613 mov eax, [pvBitmap]
4614 btc [eax], edx
4615# endif
4616 setc al
4617 and eax, 1
4618 mov [rc.u32], eax
4619 }
4620# endif
4621 return rc.f;
4622}
4623#endif
4624
4625
4626/**
4627 * Atomically tests and toggles a bit in a bitmap, ordered.
4628 *
4629 * @returns true if the bit was set.
4630 * @returns false if the bit was clear.
4631 *
4632 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4633 * the memory access isn't atomic!
4634 * @param iBit The bit to test and toggle.
4635 *
4636 * @remarks x86: Requires a 386 or later.
4637 */
4638#if RT_INLINE_ASM_EXTERNAL
4639DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4640#else
4641DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4642{
4643 union { bool f; uint32_t u32; uint8_t u8; } rc;
4644 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4645# if RT_INLINE_ASM_GNU_STYLE
4646 __asm__ __volatile__("lock; btcl %2, %1\n\t"
4647 "setc %b0\n\t"
4648 "andl $1, %0\n\t"
4649 : "=q" (rc.u32),
4650 "=m" (*(volatile long *)pvBitmap)
4651 : "Ir" (iBit),
4652 "m" (*(volatile long *)pvBitmap)
4653 : "memory");
4654# else
4655 __asm
4656 {
4657 mov edx, [iBit]
4658# ifdef RT_ARCH_AMD64
4659 mov rax, [pvBitmap]
4660 lock btc [rax], edx
4661# else
4662 mov eax, [pvBitmap]
4663 lock btc [eax], edx
4664# endif
4665 setc al
4666 and eax, 1
4667 mov [rc.u32], eax
4668 }
4669# endif
4670 return rc.f;
4671}
4672#endif
4673
4674
4675/**
4676 * Tests if a bit in a bitmap is set.
4677 *
4678 * @returns true if the bit is set.
4679 * @returns false if the bit is clear.
4680 *
4681 * @param pvBitmap Pointer to the bitmap.
4682 * @param iBit The bit to test.
4683 *
4684 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4685 * However, doing so will yield better performance as well as avoiding
4686 * traps accessing the last bits in the bitmap.
4687 */
4688#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4689DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
4690#else
4691DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
4692{
4693 union { bool f; uint32_t u32; uint8_t u8; } rc;
4694# if RT_INLINE_ASM_USES_INTRIN
4695 rc.u32 = _bittest((long *)pvBitmap, iBit);
4696# elif RT_INLINE_ASM_GNU_STYLE
4697
4698 __asm__ __volatile__("btl %2, %1\n\t"
4699 "setc %b0\n\t"
4700 "andl $1, %0\n\t"
4701 : "=q" (rc.u32)
4702 : "m" (*(const volatile long *)pvBitmap),
4703 "Ir" (iBit)
4704 : "memory");
4705# else
4706 __asm
4707 {
4708 mov edx, [iBit]
4709# ifdef RT_ARCH_AMD64
4710 mov rax, [pvBitmap]
4711 bt [rax], edx
4712# else
4713 mov eax, [pvBitmap]
4714 bt [eax], edx
4715# endif
4716 setc al
4717 and eax, 1
4718 mov [rc.u32], eax
4719 }
4720# endif
4721 return rc.f;
4722}
4723#endif
4724
4725
4726/**
4727 * Clears a bit range within a bitmap.
4728 *
4729 * @param pvBitmap Pointer to the bitmap.
4730 * @param iBitStart The First bit to clear.
4731 * @param iBitEnd The first bit not to clear.
4732 */
4733DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4734{
4735 if (iBitStart < iBitEnd)
4736 {
4737 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4738 int32_t iStart = iBitStart & ~31;
4739 int32_t iEnd = iBitEnd & ~31;
4740 if (iStart == iEnd)
4741 *pu32 &= ((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4742 else
4743 {
4744 /* bits in first dword. */
4745 if (iBitStart & 31)
4746 {
4747 *pu32 &= (UINT32_C(1) << (iBitStart & 31)) - 1;
4748 pu32++;
4749 iBitStart = iStart + 32;
4750 }
4751
4752 /* whole dword. */
4753 if (iBitStart != iEnd)
4754 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4755
4756 /* bits in last dword. */
4757 if (iBitEnd & 31)
4758 {
4759 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4760 *pu32 &= ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4761 }
4762 }
4763 }
4764}
4765
4766
4767/**
4768 * Sets a bit range within a bitmap.
4769 *
4770 * @param pvBitmap Pointer to the bitmap.
4771 * @param iBitStart The First bit to set.
4772 * @param iBitEnd The first bit not to set.
4773 */
4774DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4775{
4776 if (iBitStart < iBitEnd)
4777 {
4778 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4779 int32_t iStart = iBitStart & ~31;
4780 int32_t iEnd = iBitEnd & ~31;
4781 if (iStart == iEnd)
4782 *pu32 |= ((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31);
4783 else
4784 {
4785 /* bits in first dword. */
4786 if (iBitStart & 31)
4787 {
4788 *pu32 |= ~((UINT32_C(1) << (iBitStart & 31)) - 1);
4789 pu32++;
4790 iBitStart = iStart + 32;
4791 }
4792
4793 /* whole dword. */
4794 if (iBitStart != iEnd)
4795 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
4796
4797 /* bits in last dword. */
4798 if (iBitEnd & 31)
4799 {
4800 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4801 *pu32 |= (UINT32_C(1) << (iBitEnd & 31)) - 1;
4802 }
4803 }
4804 }
4805}
4806
4807
4808/**
4809 * Finds the first clear bit in a bitmap.
4810 *
4811 * @returns Index of the first zero bit.
4812 * @returns -1 if no clear bit was found.
4813 * @param pvBitmap Pointer to the bitmap.
4814 * @param cBits The number of bits in the bitmap. Multiple of 32.
4815 */
4816#if RT_INLINE_ASM_EXTERNAL
4817DECLASM(int32_t) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
4818#else
4819DECLINLINE(int32_t) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
4820{
4821 if (cBits)
4822 {
4823 int32_t iBit;
4824# if RT_INLINE_ASM_GNU_STYLE
4825 RTCCUINTREG uEAX, uECX, uEDI;
4826 cBits = RT_ALIGN_32(cBits, 32);
4827 __asm__ __volatile__("repe; scasl\n\t"
4828 "je 1f\n\t"
4829# ifdef RT_ARCH_AMD64
4830 "lea -4(%%rdi), %%rdi\n\t"
4831 "xorl (%%rdi), %%eax\n\t"
4832 "subq %5, %%rdi\n\t"
4833# else
4834 "lea -4(%%edi), %%edi\n\t"
4835 "xorl (%%edi), %%eax\n\t"
4836 "subl %5, %%edi\n\t"
4837# endif
4838 "shll $3, %%edi\n\t"
4839 "bsfl %%eax, %%edx\n\t"
4840 "addl %%edi, %%edx\n\t"
4841 "1:\t\n"
4842 : "=d" (iBit),
4843 "=&c" (uECX),
4844 "=&D" (uEDI),
4845 "=&a" (uEAX)
4846 : "0" (0xffffffff),
4847 "mr" (pvBitmap),
4848 "1" (cBits >> 5),
4849 "2" (pvBitmap),
4850 "3" (0xffffffff));
4851# else
4852 cBits = RT_ALIGN_32(cBits, 32);
4853 __asm
4854 {
4855# ifdef RT_ARCH_AMD64
4856 mov rdi, [pvBitmap]
4857 mov rbx, rdi
4858# else
4859 mov edi, [pvBitmap]
4860 mov ebx, edi
4861# endif
4862 mov edx, 0ffffffffh
4863 mov eax, edx
4864 mov ecx, [cBits]
4865 shr ecx, 5
4866 repe scasd
4867 je done
4868
4869# ifdef RT_ARCH_AMD64
4870 lea rdi, [rdi - 4]
4871 xor eax, [rdi]
4872 sub rdi, rbx
4873# else
4874 lea edi, [edi - 4]
4875 xor eax, [edi]
4876 sub edi, ebx
4877# endif
4878 shl edi, 3
4879 bsf edx, eax
4880 add edx, edi
4881 done:
4882 mov [iBit], edx
4883 }
4884# endif
4885 return iBit;
4886 }
4887 return -1;
4888}
4889#endif
4890
4891
4892/**
4893 * Finds the next clear bit in a bitmap.
4894 *
4895 * @returns Index of the first zero bit.
4896 * @returns -1 if no clear bit was found.
4897 * @param pvBitmap Pointer to the bitmap.
4898 * @param cBits The number of bits in the bitmap. Multiple of 32.
4899 * @param iBitPrev The bit returned from the last search.
4900 * The search will start at iBitPrev + 1.
4901 */
4902#if RT_INLINE_ASM_EXTERNAL
4903DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4904#else
4905DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4906{
4907 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4908 int iBit = ++iBitPrev & 31;
4909 if (iBit)
4910 {
4911 /*
4912 * Inspect the 32-bit word containing the unaligned bit.
4913 */
4914 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
4915
4916# if RT_INLINE_ASM_USES_INTRIN
4917 unsigned long ulBit = 0;
4918 if (_BitScanForward(&ulBit, u32))
4919 return ulBit + iBitPrev;
4920# else
4921# if RT_INLINE_ASM_GNU_STYLE
4922 __asm__ __volatile__("bsf %1, %0\n\t"
4923 "jnz 1f\n\t"
4924 "movl $-1, %0\n\t"
4925 "1:\n\t"
4926 : "=r" (iBit)
4927 : "r" (u32));
4928# else
4929 __asm
4930 {
4931 mov edx, [u32]
4932 bsf eax, edx
4933 jnz done
4934 mov eax, 0ffffffffh
4935 done:
4936 mov [iBit], eax
4937 }
4938# endif
4939 if (iBit >= 0)
4940 return iBit + iBitPrev;
4941# endif
4942
4943 /*
4944 * Skip ahead and see if there is anything left to search.
4945 */
4946 iBitPrev |= 31;
4947 iBitPrev++;
4948 if (cBits <= (uint32_t)iBitPrev)
4949 return -1;
4950 }
4951
4952 /*
4953 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4954 */
4955 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4956 if (iBit >= 0)
4957 iBit += iBitPrev;
4958 return iBit;
4959}
4960#endif
4961
4962
4963/**
4964 * Finds the first set bit in a bitmap.
4965 *
4966 * @returns Index of the first set bit.
4967 * @returns -1 if no clear bit was found.
4968 * @param pvBitmap Pointer to the bitmap.
4969 * @param cBits The number of bits in the bitmap. Multiple of 32.
4970 */
4971#if RT_INLINE_ASM_EXTERNAL
4972DECLASM(int32_t) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
4973#else
4974DECLINLINE(int32_t) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
4975{
4976 if (cBits)
4977 {
4978 int32_t iBit;
4979# if RT_INLINE_ASM_GNU_STYLE
4980 RTCCUINTREG uEAX, uECX, uEDI;
4981 cBits = RT_ALIGN_32(cBits, 32);
4982 __asm__ __volatile__("repe; scasl\n\t"
4983 "je 1f\n\t"
4984# ifdef RT_ARCH_AMD64
4985 "lea -4(%%rdi), %%rdi\n\t"
4986 "movl (%%rdi), %%eax\n\t"
4987 "subq %5, %%rdi\n\t"
4988# else
4989 "lea -4(%%edi), %%edi\n\t"
4990 "movl (%%edi), %%eax\n\t"
4991 "subl %5, %%edi\n\t"
4992# endif
4993 "shll $3, %%edi\n\t"
4994 "bsfl %%eax, %%edx\n\t"
4995 "addl %%edi, %%edx\n\t"
4996 "1:\t\n"
4997 : "=d" (iBit),
4998 "=&c" (uECX),
4999 "=&D" (uEDI),
5000 "=&a" (uEAX)
5001 : "0" (0xffffffff),
5002 "mr" (pvBitmap),
5003 "1" (cBits >> 5),
5004 "2" (pvBitmap),
5005 "3" (0));
5006# else
5007 cBits = RT_ALIGN_32(cBits, 32);
5008 __asm
5009 {
5010# ifdef RT_ARCH_AMD64
5011 mov rdi, [pvBitmap]
5012 mov rbx, rdi
5013# else
5014 mov edi, [pvBitmap]
5015 mov ebx, edi
5016# endif
5017 mov edx, 0ffffffffh
5018 xor eax, eax
5019 mov ecx, [cBits]
5020 shr ecx, 5
5021 repe scasd
5022 je done
5023# ifdef RT_ARCH_AMD64
5024 lea rdi, [rdi - 4]
5025 mov eax, [rdi]
5026 sub rdi, rbx
5027# else
5028 lea edi, [edi - 4]
5029 mov eax, [edi]
5030 sub edi, ebx
5031# endif
5032 shl edi, 3
5033 bsf edx, eax
5034 add edx, edi
5035 done:
5036 mov [iBit], edx
5037 }
5038# endif
5039 return iBit;
5040 }
5041 return -1;
5042}
5043#endif
5044
5045
5046/**
5047 * Finds the next set bit in a bitmap.
5048 *
5049 * @returns Index of the next set bit.
5050 * @returns -1 if no set bit was found.
5051 * @param pvBitmap Pointer to the bitmap.
5052 * @param cBits The number of bits in the bitmap. Multiple of 32.
5053 * @param iBitPrev The bit returned from the last search.
5054 * The search will start at iBitPrev + 1.
5055 */
5056#if RT_INLINE_ASM_EXTERNAL
5057DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5058#else
5059DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5060{
5061 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
5062 int iBit = ++iBitPrev & 31;
5063 if (iBit)
5064 {
5065 /*
5066 * Inspect the 32-bit word containing the unaligned bit.
5067 */
5068 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
5069
5070# if RT_INLINE_ASM_USES_INTRIN
5071 unsigned long ulBit = 0;
5072 if (_BitScanForward(&ulBit, u32))
5073 return ulBit + iBitPrev;
5074# else
5075# if RT_INLINE_ASM_GNU_STYLE
5076 __asm__ __volatile__("bsf %1, %0\n\t"
5077 "jnz 1f\n\t"
5078 "movl $-1, %0\n\t"
5079 "1:\n\t"
5080 : "=r" (iBit)
5081 : "r" (u32));
5082# else
5083 __asm
5084 {
5085 mov edx, [u32]
5086 bsf eax, edx
5087 jnz done
5088 mov eax, 0ffffffffh
5089 done:
5090 mov [iBit], eax
5091 }
5092# endif
5093 if (iBit >= 0)
5094 return iBit + iBitPrev;
5095# endif
5096
5097 /*
5098 * Skip ahead and see if there is anything left to search.
5099 */
5100 iBitPrev |= 31;
5101 iBitPrev++;
5102 if (cBits <= (uint32_t)iBitPrev)
5103 return -1;
5104 }
5105
5106 /*
5107 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
5108 */
5109 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
5110 if (iBit >= 0)
5111 iBit += iBitPrev;
5112 return iBit;
5113}
5114#endif
5115
5116
5117/**
5118 * Finds the first bit which is set in the given 32-bit integer.
5119 * Bits are numbered from 1 (least significant) to 32.
5120 *
5121 * @returns index [1..32] of the first set bit.
5122 * @returns 0 if all bits are cleared.
5123 * @param u32 Integer to search for set bits.
5124 * @remarks Similar to ffs() in BSD.
5125 */
5126#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5127DECLASM(unsigned) ASMBitFirstSetU32(uint32_t u32);
5128#else
5129DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5130{
5131# if RT_INLINE_ASM_USES_INTRIN
5132 unsigned long iBit;
5133 if (_BitScanForward(&iBit, u32))
5134 iBit++;
5135 else
5136 iBit = 0;
5137# elif RT_INLINE_ASM_GNU_STYLE
5138 uint32_t iBit;
5139 __asm__ __volatile__("bsf %1, %0\n\t"
5140 "jnz 1f\n\t"
5141 "xorl %0, %0\n\t"
5142 "jmp 2f\n"
5143 "1:\n\t"
5144 "incl %0\n"
5145 "2:\n\t"
5146 : "=r" (iBit)
5147 : "rm" (u32));
5148# else
5149 uint32_t iBit;
5150 _asm
5151 {
5152 bsf eax, [u32]
5153 jnz found
5154 xor eax, eax
5155 jmp done
5156 found:
5157 inc eax
5158 done:
5159 mov [iBit], eax
5160 }
5161# endif
5162 return iBit;
5163}
5164#endif
5165
5166
5167/**
5168 * Finds the first bit which is set in the given 32-bit integer.
5169 * Bits are numbered from 1 (least significant) to 32.
5170 *
5171 * @returns index [1..32] of the first set bit.
5172 * @returns 0 if all bits are cleared.
5173 * @param i32 Integer to search for set bits.
5174 * @remark Similar to ffs() in BSD.
5175 */
5176DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5177{
5178 return ASMBitFirstSetU32((uint32_t)i32);
5179}
5180
5181
5182/**
5183 * Finds the first bit which is set in the given 64-bit integer.
5184 *
5185 * Bits are numbered from 1 (least significant) to 64.
5186 *
5187 * @returns index [1..64] of the first set bit.
5188 * @returns 0 if all bits are cleared.
5189 * @param u64 Integer to search for set bits.
5190 * @remarks Similar to ffs() in BSD.
5191 */
5192#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5193DECLASM(unsigned) ASMBitFirstSetU64(uint64_t u64);
5194#else
5195DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64)
5196{
5197# if RT_INLINE_ASM_USES_INTRIN
5198 unsigned long iBit;
5199# if ARCH_BITS == 64
5200 if (_BitScanForward64(&iBit, u64))
5201 iBit++;
5202 else
5203 iBit = 0;
5204# else
5205 if (_BitScanForward(&iBit, (uint32_t)u64))
5206 iBit++;
5207 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
5208 iBit += 33;
5209 else
5210 iBit = 0;
5211# endif
5212# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5213 uint64_t iBit;
5214 __asm__ __volatile__("bsfq %1, %0\n\t"
5215 "jnz 1f\n\t"
5216 "xorl %k0, %k0\n\t"
5217 "jmp 2f\n"
5218 "1:\n\t"
5219 "incl %k0\n"
5220 "2:\n\t"
5221 : "=r" (iBit)
5222 : "rm" (u64));
5223# else
5224 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
5225 if (!iBit)
5226 {
5227 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
5228 if (iBit)
5229 iBit += 32;
5230 }
5231# endif
5232 return (unsigned)iBit;
5233}
5234#endif
5235
5236
5237/**
5238 * Finds the first bit which is set in the given 16-bit integer.
5239 *
5240 * Bits are numbered from 1 (least significant) to 16.
5241 *
5242 * @returns index [1..16] of the first set bit.
5243 * @returns 0 if all bits are cleared.
5244 * @param u16 Integer to search for set bits.
5245 * @remarks For 16-bit bs3kit code.
5246 */
5247#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5248DECLASM(unsigned) ASMBitFirstSetU16(uint16_t u16);
5249#else
5250DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16)
5251{
5252 return ASMBitFirstSetU32((uint32_t)u16);
5253}
5254#endif
5255
5256
5257/**
5258 * Finds the last bit which is set in the given 32-bit integer.
5259 * Bits are numbered from 1 (least significant) to 32.
5260 *
5261 * @returns index [1..32] of the last set bit.
5262 * @returns 0 if all bits are cleared.
5263 * @param u32 Integer to search for set bits.
5264 * @remark Similar to fls() in BSD.
5265 */
5266#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5267DECLASM(unsigned) ASMBitLastSetU32(uint32_t u32);
5268#else
5269DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5270{
5271# if RT_INLINE_ASM_USES_INTRIN
5272 unsigned long iBit;
5273 if (_BitScanReverse(&iBit, u32))
5274 iBit++;
5275 else
5276 iBit = 0;
5277# elif RT_INLINE_ASM_GNU_STYLE
5278 uint32_t iBit;
5279 __asm__ __volatile__("bsrl %1, %0\n\t"
5280 "jnz 1f\n\t"
5281 "xorl %0, %0\n\t"
5282 "jmp 2f\n"
5283 "1:\n\t"
5284 "incl %0\n"
5285 "2:\n\t"
5286 : "=r" (iBit)
5287 : "rm" (u32));
5288# else
5289 uint32_t iBit;
5290 _asm
5291 {
5292 bsr eax, [u32]
5293 jnz found
5294 xor eax, eax
5295 jmp done
5296 found:
5297 inc eax
5298 done:
5299 mov [iBit], eax
5300 }
5301# endif
5302 return iBit;
5303}
5304#endif
5305
5306
5307/**
5308 * Finds the last bit which is set in the given 32-bit integer.
5309 * Bits are numbered from 1 (least significant) to 32.
5310 *
5311 * @returns index [1..32] of the last set bit.
5312 * @returns 0 if all bits are cleared.
5313 * @param i32 Integer to search for set bits.
5314 * @remark Similar to fls() in BSD.
5315 */
5316DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5317{
5318 return ASMBitLastSetU32((uint32_t)i32);
5319}
5320
5321
5322/**
5323 * Finds the last bit which is set in the given 64-bit integer.
5324 *
5325 * Bits are numbered from 1 (least significant) to 64.
5326 *
5327 * @returns index [1..64] of the last set bit.
5328 * @returns 0 if all bits are cleared.
5329 * @param u64 Integer to search for set bits.
5330 * @remark Similar to fls() in BSD.
5331 */
5332#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5333DECLASM(unsigned) ASMBitLastSetU64(uint64_t u64);
5334#else
5335DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64)
5336{
5337# if RT_INLINE_ASM_USES_INTRIN
5338 unsigned long iBit;
5339# if ARCH_BITS == 64
5340 if (_BitScanReverse64(&iBit, u64))
5341 iBit++;
5342 else
5343 iBit = 0;
5344# else
5345 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
5346 iBit += 33;
5347 else if (_BitScanReverse(&iBit, (uint32_t)u64))
5348 iBit++;
5349 else
5350 iBit = 0;
5351# endif
5352# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5353 uint64_t iBit;
5354 __asm__ __volatile__("bsrq %1, %0\n\t"
5355 "jnz 1f\n\t"
5356 "xorl %k0, %k0\n\t"
5357 "jmp 2f\n"
5358 "1:\n\t"
5359 "incl %k0\n"
5360 "2:\n\t"
5361 : "=r" (iBit)
5362 : "rm" (u64));
5363# else
5364 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
5365 if (iBit)
5366 iBit += 32;
5367 else
5368 iBit = ASMBitLastSetU32((uint32_t)u64);
5369#endif
5370 return (unsigned)iBit;
5371}
5372#endif
5373
5374
5375/**
5376 * Finds the last bit which is set in the given 16-bit integer.
5377 *
5378 * Bits are numbered from 1 (least significant) to 16.
5379 *
5380 * @returns index [1..16] of the last set bit.
5381 * @returns 0 if all bits are cleared.
5382 * @param u16 Integer to search for set bits.
5383 * @remarks For 16-bit bs3kit code.
5384 */
5385#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5386DECLASM(unsigned) ASMBitLastSetU16(uint16_t u16);
5387#else
5388DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16)
5389{
5390 return ASMBitLastSetU32((uint32_t)u16);
5391}
5392#endif
5393
5394
5395/**
5396 * Reverse the byte order of the given 16-bit integer.
5397 *
5398 * @returns Revert
5399 * @param u16 16-bit integer value.
5400 */
5401#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5402DECLASM(uint16_t) ASMByteSwapU16(uint16_t u16);
5403#else
5404DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
5405{
5406# if RT_INLINE_ASM_USES_INTRIN
5407 u16 = _byteswap_ushort(u16);
5408# elif RT_INLINE_ASM_GNU_STYLE
5409 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
5410# else
5411 _asm
5412 {
5413 mov ax, [u16]
5414 ror ax, 8
5415 mov [u16], ax
5416 }
5417# endif
5418 return u16;
5419}
5420#endif
5421
5422
5423/**
5424 * Reverse the byte order of the given 32-bit integer.
5425 *
5426 * @returns Revert
5427 * @param u32 32-bit integer value.
5428 */
5429#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5430DECLASM(uint32_t) ASMByteSwapU32(uint32_t u32);
5431#else
5432DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5433{
5434# if RT_INLINE_ASM_USES_INTRIN
5435 u32 = _byteswap_ulong(u32);
5436# elif RT_INLINE_ASM_GNU_STYLE
5437 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5438# else
5439 _asm
5440 {
5441 mov eax, [u32]
5442 bswap eax
5443 mov [u32], eax
5444 }
5445# endif
5446 return u32;
5447}
5448#endif
5449
5450
5451/**
5452 * Reverse the byte order of the given 64-bit integer.
5453 *
5454 * @returns Revert
5455 * @param u64 64-bit integer value.
5456 */
5457DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
5458{
5459#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
5460 u64 = _byteswap_uint64(u64);
5461#else
5462 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
5463 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
5464#endif
5465 return u64;
5466}
5467
5468
5469/**
5470 * Rotate 32-bit unsigned value to the left by @a cShift.
5471 *
5472 * @returns Rotated value.
5473 * @param u32 The value to rotate.
5474 * @param cShift How many bits to rotate by.
5475 */
5476#ifdef __WATCOMC__
5477DECLASM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift);
5478#else
5479DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift)
5480{
5481# if RT_INLINE_ASM_USES_INTRIN
5482 return _rotl(u32, cShift);
5483# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5484 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5485 return u32;
5486# else
5487 cShift &= 31;
5488 return (u32 << cShift) | (u32 >> (32 - cShift));
5489# endif
5490}
5491#endif
5492
5493
5494/**
5495 * Rotate 32-bit unsigned value to the right by @a cShift.
5496 *
5497 * @returns Rotated value.
5498 * @param u32 The value to rotate.
5499 * @param cShift How many bits to rotate by.
5500 */
5501#ifdef __WATCOMC__
5502DECLASM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift);
5503#else
5504DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift)
5505{
5506# if RT_INLINE_ASM_USES_INTRIN
5507 return _rotr(u32, cShift);
5508# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5509 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5510 return u32;
5511# else
5512 cShift &= 31;
5513 return (u32 >> cShift) | (u32 << (32 - cShift));
5514# endif
5515}
5516#endif
5517
5518
5519/**
5520 * Rotate 64-bit unsigned value to the left by @a cShift.
5521 *
5522 * @returns Rotated value.
5523 * @param u64 The value to rotate.
5524 * @param cShift How many bits to rotate by.
5525 */
5526DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift)
5527{
5528#if RT_INLINE_ASM_USES_INTRIN
5529 return _rotl64(u64, cShift);
5530#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5531 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5532 return u64;
5533#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5534 uint32_t uSpill;
5535 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5536 "jz 1f\n\t"
5537 "xchgl %%eax, %%edx\n\t"
5538 "1:\n\t"
5539 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5540 "jz 2f\n\t"
5541 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5542 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
5543 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
5544 "2:\n\t" /* } */
5545 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5546 : "0" (u64),
5547 "1" (cShift));
5548 return u64;
5549#else
5550 cShift &= 63;
5551 return (u64 << cShift) | (u64 >> (64 - cShift));
5552#endif
5553}
5554
5555
5556/**
5557 * Rotate 64-bit unsigned value to the right by @a cShift.
5558 *
5559 * @returns Rotated value.
5560 * @param u64 The value to rotate.
5561 * @param cShift How many bits to rotate by.
5562 */
5563DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift)
5564{
5565#if RT_INLINE_ASM_USES_INTRIN
5566 return _rotr64(u64, cShift);
5567#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5568 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5569 return u64;
5570#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5571 uint32_t uSpill;
5572 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5573 "jz 1f\n\t"
5574 "xchgl %%eax, %%edx\n\t"
5575 "1:\n\t"
5576 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5577 "jz 2f\n\t"
5578 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5579 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
5580 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
5581 "2:\n\t" /* } */
5582 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5583 : "0" (u64),
5584 "1" (cShift));
5585 return u64;
5586#else
5587 cShift &= 63;
5588 return (u64 >> cShift) | (u64 << (64 - cShift));
5589#endif
5590}
5591
5592/** @} */
5593
5594
5595/** @} */
5596
5597#endif
5598
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette