VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 29286

Last change on this file since 29286 was 29286, checked in by vboxsync, 15 years ago

iprt/asm.h: Create annoyance in DEBUG builds, but continue to play safe in release builds.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 113.7 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2010 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37/* Solaris 10 header ugliness */
38#ifdef u
39# undef u
40#endif
41
42#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
43# include <intrin.h>
44 /* Emit the intrinsics at all optimization levels. */
45# pragma intrinsic(_ReadWriteBarrier)
46# pragma intrinsic(__cpuid)
47# pragma intrinsic(__stosd)
48# pragma intrinsic(__stosw)
49# pragma intrinsic(__stosb)
50# pragma intrinsic(_BitScanForward)
51# pragma intrinsic(_BitScanReverse)
52# pragma intrinsic(_bittest)
53# pragma intrinsic(_bittestandset)
54# pragma intrinsic(_bittestandreset)
55# pragma intrinsic(_bittestandcomplement)
56# pragma intrinsic(_byteswap_ushort)
57# pragma intrinsic(_byteswap_ulong)
58# pragma intrinsic(_interlockedbittestandset)
59# pragma intrinsic(_interlockedbittestandreset)
60# pragma intrinsic(_InterlockedAnd)
61# pragma intrinsic(_InterlockedOr)
62# pragma intrinsic(_InterlockedIncrement)
63# pragma intrinsic(_InterlockedDecrement)
64# pragma intrinsic(_InterlockedExchange)
65# pragma intrinsic(_InterlockedExchangeAdd)
66# pragma intrinsic(_InterlockedCompareExchange)
67# pragma intrinsic(_InterlockedCompareExchange64)
68# ifdef RT_ARCH_AMD64
69# pragma intrinsic(__stosq)
70# pragma intrinsic(_byteswap_uint64)
71# pragma intrinsic(_InterlockedExchange64)
72# endif
73#endif
74
75
76/** @defgroup grp_rt_asm ASM - Assembly Routines
77 * @ingroup grp_rt
78 *
79 * @remarks The difference between ordered and unordered atomic operations are that
80 * the former will complete outstanding reads and writes before continuing
81 * while the latter doesn't make any promisses about the order. Ordered
82 * operations doesn't, it seems, make any 100% promise wrt to whether
83 * the operation will complete before any subsequent memory access.
84 * (please, correct if wrong.)
85 *
86 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
87 * are unordered (note the Uo).
88 *
89 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
90 * or even optimize assembler instructions away. For instance, in the following code
91 * the second rdmsr instruction is optimized away because gcc treats that instruction
92 * as deterministic:
93 *
94 * @code
95 * static inline uint64_t rdmsr_low(int idx)
96 * {
97 * uint32_t low;
98 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
99 * }
100 * ...
101 * uint32_t msr1 = rdmsr_low(1);
102 * foo(msr1);
103 * msr1 = rdmsr_low(1);
104 * bar(msr1);
105 * @endcode
106 *
107 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
108 * use the result of the first call as input parameter for bar() as well. For rdmsr this
109 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
110 * machine status information in general.
111 *
112 * @{
113 */
114
115
116/** @def RT_INLINE_ASM_GCC_4_3_X_X86
117 * Used to work around some 4.3.x register allocation issues in this version of
118 * the compiler. So far this workaround is still required for 4.4 and 4.5. */
119#ifdef __GNUC__
120# define RT_INLINE_ASM_GCC_4_3_X_X86 (__GNUC__ == 4 && __GNUC_MINOR__ >= 3 && defined(__i386__))
121#endif
122#ifndef RT_INLINE_ASM_GCC_4_3_X_X86
123# define RT_INLINE_ASM_GCC_4_3_X_X86 0
124#endif
125
126/** @def RT_INLINE_DONT_USE_CMPXCHG8B
127 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
128 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
129 * mode, x86.
130 *
131 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
132 * when in PIC mode on x86.
133 */
134#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
135# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC \
136 ( (defined(PIC) || defined(__PIC__)) \
137 && defined(RT_ARCH_X86) \
138 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
139 || defined(RT_OS_DARWIN)) )
140#endif
141
142
143/** @def ASMReturnAddress
144 * Gets the return address of the current (or calling if you like) function or method.
145 */
146#ifdef _MSC_VER
147# ifdef __cplusplus
148extern "C"
149# endif
150void * _ReturnAddress(void);
151# pragma intrinsic(_ReturnAddress)
152# define ASMReturnAddress() _ReturnAddress()
153#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
154# define ASMReturnAddress() __builtin_return_address(0)
155#else
156# error "Unsupported compiler."
157#endif
158
159
160/**
161 * Compiler memory barrier.
162 *
163 * Ensure that the compiler does not use any cached (register/tmp stack) memory
164 * values or any outstanding writes when returning from this function.
165 *
166 * This function must be used if non-volatile data is modified by a
167 * device or the VMM. Typical cases are port access, MMIO access,
168 * trapping instruction, etc.
169 */
170#if RT_INLINE_ASM_GNU_STYLE
171# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
172#elif RT_INLINE_ASM_USES_INTRIN
173# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
174#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
175DECLINLINE(void) ASMCompilerBarrier(void)
176{
177 __asm
178 {
179 }
180}
181#endif
182
183
184
185/**
186 * Atomically Exchange an unsigned 8-bit value, ordered.
187 *
188 * @returns Current *pu8 value
189 * @param pu8 Pointer to the 8-bit variable to update.
190 * @param u8 The 8-bit value to assign to *pu8.
191 */
192#if RT_INLINE_ASM_EXTERNAL
193DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
194#else
195DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
196{
197# if RT_INLINE_ASM_GNU_STYLE
198 __asm__ __volatile__("xchgb %0, %1\n\t"
199 : "=m" (*pu8),
200 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
201 : "1" (u8),
202 "m" (*pu8));
203# else
204 __asm
205 {
206# ifdef RT_ARCH_AMD64
207 mov rdx, [pu8]
208 mov al, [u8]
209 xchg [rdx], al
210 mov [u8], al
211# else
212 mov edx, [pu8]
213 mov al, [u8]
214 xchg [edx], al
215 mov [u8], al
216# endif
217 }
218# endif
219 return u8;
220}
221#endif
222
223
224/**
225 * Atomically Exchange a signed 8-bit value, ordered.
226 *
227 * @returns Current *pu8 value
228 * @param pi8 Pointer to the 8-bit variable to update.
229 * @param i8 The 8-bit value to assign to *pi8.
230 */
231DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
232{
233 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
234}
235
236
237/**
238 * Atomically Exchange a bool value, ordered.
239 *
240 * @returns Current *pf value
241 * @param pf Pointer to the 8-bit variable to update.
242 * @param f The 8-bit value to assign to *pi8.
243 */
244DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
245{
246#ifdef _MSC_VER
247 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
248#else
249 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
250#endif
251}
252
253
254/**
255 * Atomically Exchange an unsigned 16-bit value, ordered.
256 *
257 * @returns Current *pu16 value
258 * @param pu16 Pointer to the 16-bit variable to update.
259 * @param u16 The 16-bit value to assign to *pu16.
260 */
261#if RT_INLINE_ASM_EXTERNAL
262DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
263#else
264DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
265{
266# if RT_INLINE_ASM_GNU_STYLE
267 __asm__ __volatile__("xchgw %0, %1\n\t"
268 : "=m" (*pu16),
269 "=r" (u16)
270 : "1" (u16),
271 "m" (*pu16));
272# else
273 __asm
274 {
275# ifdef RT_ARCH_AMD64
276 mov rdx, [pu16]
277 mov ax, [u16]
278 xchg [rdx], ax
279 mov [u16], ax
280# else
281 mov edx, [pu16]
282 mov ax, [u16]
283 xchg [edx], ax
284 mov [u16], ax
285# endif
286 }
287# endif
288 return u16;
289}
290#endif
291
292
293/**
294 * Atomically Exchange a signed 16-bit value, ordered.
295 *
296 * @returns Current *pu16 value
297 * @param pi16 Pointer to the 16-bit variable to update.
298 * @param i16 The 16-bit value to assign to *pi16.
299 */
300DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
301{
302 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
303}
304
305
306/**
307 * Atomically Exchange an unsigned 32-bit value, ordered.
308 *
309 * @returns Current *pu32 value
310 * @param pu32 Pointer to the 32-bit variable to update.
311 * @param u32 The 32-bit value to assign to *pu32.
312 */
313#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
314DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
315#else
316DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
317{
318# if RT_INLINE_ASM_GNU_STYLE
319 __asm__ __volatile__("xchgl %0, %1\n\t"
320 : "=m" (*pu32),
321 "=r" (u32)
322 : "1" (u32),
323 "m" (*pu32));
324
325# elif RT_INLINE_ASM_USES_INTRIN
326 u32 = _InterlockedExchange((long *)pu32, u32);
327
328# else
329 __asm
330 {
331# ifdef RT_ARCH_AMD64
332 mov rdx, [pu32]
333 mov eax, u32
334 xchg [rdx], eax
335 mov [u32], eax
336# else
337 mov edx, [pu32]
338 mov eax, u32
339 xchg [edx], eax
340 mov [u32], eax
341# endif
342 }
343# endif
344 return u32;
345}
346#endif
347
348
349/**
350 * Atomically Exchange a signed 32-bit value, ordered.
351 *
352 * @returns Current *pu32 value
353 * @param pi32 Pointer to the 32-bit variable to update.
354 * @param i32 The 32-bit value to assign to *pi32.
355 */
356DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
357{
358 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
359}
360
361
362/**
363 * Atomically Exchange an unsigned 64-bit value, ordered.
364 *
365 * @returns Current *pu64 value
366 * @param pu64 Pointer to the 64-bit variable to update.
367 * @param u64 The 64-bit value to assign to *pu64.
368 */
369#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
370 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
371DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
372#else
373DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
374{
375# if defined(RT_ARCH_AMD64)
376# if RT_INLINE_ASM_USES_INTRIN
377 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
378
379# elif RT_INLINE_ASM_GNU_STYLE
380 __asm__ __volatile__("xchgq %0, %1\n\t"
381 : "=m" (*pu64),
382 "=r" (u64)
383 : "1" (u64),
384 "m" (*pu64));
385# else
386 __asm
387 {
388 mov rdx, [pu64]
389 mov rax, [u64]
390 xchg [rdx], rax
391 mov [u64], rax
392 }
393# endif
394# else /* !RT_ARCH_AMD64 */
395# if RT_INLINE_ASM_GNU_STYLE
396# if defined(PIC) || defined(__PIC__)
397 uint32_t u32EBX = (uint32_t)u64;
398 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
399 "xchgl %%ebx, %3\n\t"
400 "1:\n\t"
401 "lock; cmpxchg8b (%5)\n\t"
402 "jnz 1b\n\t"
403 "movl %3, %%ebx\n\t"
404 /*"xchgl %%esi, %5\n\t"*/
405 : "=A" (u64),
406 "=m" (*pu64)
407 : "0" (*pu64),
408 "m" ( u32EBX ),
409 "c" ( (uint32_t)(u64 >> 32) ),
410 "S" (pu64));
411# else /* !PIC */
412 __asm__ __volatile__("1:\n\t"
413 "lock; cmpxchg8b %1\n\t"
414 "jnz 1b\n\t"
415 : "=A" (u64),
416 "=m" (*pu64)
417 : "0" (*pu64),
418 "b" ( (uint32_t)u64 ),
419 "c" ( (uint32_t)(u64 >> 32) ));
420# endif
421# else
422 __asm
423 {
424 mov ebx, dword ptr [u64]
425 mov ecx, dword ptr [u64 + 4]
426 mov edi, pu64
427 mov eax, dword ptr [edi]
428 mov edx, dword ptr [edi + 4]
429 retry:
430 lock cmpxchg8b [edi]
431 jnz retry
432 mov dword ptr [u64], eax
433 mov dword ptr [u64 + 4], edx
434 }
435# endif
436# endif /* !RT_ARCH_AMD64 */
437 return u64;
438}
439#endif
440
441
442/**
443 * Atomically Exchange an signed 64-bit value, ordered.
444 *
445 * @returns Current *pi64 value
446 * @param pi64 Pointer to the 64-bit variable to update.
447 * @param i64 The 64-bit value to assign to *pi64.
448 */
449DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
450{
451 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
452}
453
454
455/**
456 * Atomically Exchange a pointer value, ordered.
457 *
458 * @returns Current *ppv value
459 * @param ppv Pointer to the pointer variable to update.
460 * @param pv The pointer value to assign to *ppv.
461 */
462DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
463{
464#if ARCH_BITS == 32
465 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
466#elif ARCH_BITS == 64
467 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
468#else
469# error "ARCH_BITS is bogus"
470#endif
471}
472
473
474/**
475 * Atomically Exchange a raw-mode context pointer value, ordered.
476 *
477 * @returns Current *ppv value
478 * @param ppvRC Pointer to the pointer variable to update.
479 * @param pvRC The pointer value to assign to *ppv.
480 */
481DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
482{
483 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
484}
485
486
487/**
488 * Atomically Exchange a ring-0 pointer value, ordered.
489 *
490 * @returns Current *ppv value
491 * @param ppvR0 Pointer to the pointer variable to update.
492 * @param pvR0 The pointer value to assign to *ppv.
493 */
494DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
495{
496#if R0_ARCH_BITS == 32
497 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
498#elif R0_ARCH_BITS == 64
499 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
500#else
501# error "R0_ARCH_BITS is bogus"
502#endif
503}
504
505
506/**
507 * Atomically Exchange a ring-3 pointer value, ordered.
508 *
509 * @returns Current *ppv value
510 * @param ppvR3 Pointer to the pointer variable to update.
511 * @param pvR3 The pointer value to assign to *ppv.
512 */
513DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
514{
515#if R3_ARCH_BITS == 32
516 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
517#elif R3_ARCH_BITS == 64
518 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
519#else
520# error "R3_ARCH_BITS is bogus"
521#endif
522}
523
524
525/** @def ASMAtomicXchgHandle
526 * Atomically Exchange a typical IPRT handle value, ordered.
527 *
528 * @param ph Pointer to the value to update.
529 * @param hNew The new value to assigned to *pu.
530 * @param phRes Where to store the current *ph value.
531 *
532 * @remarks This doesn't currently work for all handles (like RTFILE).
533 */
534#if HC_ARCH_BITS == 32
535# define ASMAtomicXchgHandle(ph, hNew, phRes) \
536 do { \
537 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
538 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
539 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
540 } while (0)
541#elif HC_ARCH_BITS == 64
542# define ASMAtomicXchgHandle(ph, hNew, phRes) \
543 do { \
544 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
545 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
546 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
547 } while (0)
548#else
549# error HC_ARCH_BITS
550#endif
551
552
553/**
554 * Atomically Exchange a value which size might differ
555 * between platforms or compilers, ordered.
556 *
557 * @param pu Pointer to the variable to update.
558 * @param uNew The value to assign to *pu.
559 * @todo This is busted as its missing the result argument.
560 */
561#define ASMAtomicXchgSize(pu, uNew) \
562 do { \
563 switch (sizeof(*(pu))) { \
564 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
565 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
566 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
567 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
568 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
569 } \
570 } while (0)
571
572/**
573 * Atomically Exchange a value which size might differ
574 * between platforms or compilers, ordered.
575 *
576 * @param pu Pointer to the variable to update.
577 * @param uNew The value to assign to *pu.
578 * @param puRes Where to store the current *pu value.
579 */
580#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
581 do { \
582 switch (sizeof(*(pu))) { \
583 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
584 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
585 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
586 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
587 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
588 } \
589 } while (0)
590
591
592
593/**
594 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
595 *
596 * @returns true if xchg was done.
597 * @returns false if xchg wasn't done.
598 *
599 * @param pu8 Pointer to the value to update.
600 * @param u8New The new value to assigned to *pu8.
601 * @param u8Old The old value to *pu8 compare with.
602 */
603#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
604DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, const uint8_t u8Old);
605#else
606DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, uint8_t u8Old)
607{
608 uint8_t u8Ret;
609 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
610 "setz %1\n\t"
611 : "=m" (*pu8),
612 "=qm" (u8Ret),
613 "=a" (u8Old)
614 : "q" (u8New),
615 "2" (u8Old),
616 "m" (*pu8));
617 return (bool)u8Ret;
618}
619#endif
620
621
622/**
623 * Atomically Compare and Exchange a signed 8-bit value, ordered.
624 *
625 * @returns true if xchg was done.
626 * @returns false if xchg wasn't done.
627 *
628 * @param pi8 Pointer to the value to update.
629 * @param i8New The new value to assigned to *pi8.
630 * @param i8Old The old value to *pi8 compare with.
631 */
632DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t *pi8, const int8_t i8New, const int8_t i8Old)
633{
634 return ASMAtomicCmpXchgU8((volatile uint8_t *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old);
635}
636
637
638/**
639 * Atomically Compare and Exchange a bool value, ordered.
640 *
641 * @returns true if xchg was done.
642 * @returns false if xchg wasn't done.
643 *
644 * @param pf Pointer to the value to update.
645 * @param fNew The new value to assigned to *pf.
646 * @param fOld The old value to *pf compare with.
647 */
648DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool *pf, const bool fNew, const bool fOld)
649{
650 return ASMAtomicCmpXchgU8((volatile uint8_t *)pf, (const uint8_t)fNew, (const uint8_t)fOld);
651}
652
653
654/**
655 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
656 *
657 * @returns true if xchg was done.
658 * @returns false if xchg wasn't done.
659 *
660 * @param pu32 Pointer to the value to update.
661 * @param u32New The new value to assigned to *pu32.
662 * @param u32Old The old value to *pu32 compare with.
663 */
664#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
665DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
666#else
667DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
668{
669# if RT_INLINE_ASM_GNU_STYLE
670 uint8_t u8Ret;
671 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
672 "setz %1\n\t"
673 : "=m" (*pu32),
674 "=qm" (u8Ret),
675 "=a" (u32Old)
676 : "r" (u32New),
677 "2" (u32Old),
678 "m" (*pu32));
679 return (bool)u8Ret;
680
681# elif RT_INLINE_ASM_USES_INTRIN
682 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
683
684# else
685 uint32_t u32Ret;
686 __asm
687 {
688# ifdef RT_ARCH_AMD64
689 mov rdx, [pu32]
690# else
691 mov edx, [pu32]
692# endif
693 mov eax, [u32Old]
694 mov ecx, [u32New]
695# ifdef RT_ARCH_AMD64
696 lock cmpxchg [rdx], ecx
697# else
698 lock cmpxchg [edx], ecx
699# endif
700 setz al
701 movzx eax, al
702 mov [u32Ret], eax
703 }
704 return !!u32Ret;
705# endif
706}
707#endif
708
709
710/**
711 * Atomically Compare and Exchange a signed 32-bit value, ordered.
712 *
713 * @returns true if xchg was done.
714 * @returns false if xchg wasn't done.
715 *
716 * @param pi32 Pointer to the value to update.
717 * @param i32New The new value to assigned to *pi32.
718 * @param i32Old The old value to *pi32 compare with.
719 */
720DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
721{
722 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
723}
724
725
726/**
727 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
728 *
729 * @returns true if xchg was done.
730 * @returns false if xchg wasn't done.
731 *
732 * @param pu64 Pointer to the 64-bit variable to update.
733 * @param u64New The 64-bit value to assign to *pu64.
734 * @param u64Old The value to compare with.
735 */
736#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
737 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
738DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
739#else
740DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, uint64_t u64New, uint64_t u64Old)
741{
742# if RT_INLINE_ASM_USES_INTRIN
743 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
744
745# elif defined(RT_ARCH_AMD64)
746# if RT_INLINE_ASM_GNU_STYLE
747 uint8_t u8Ret;
748 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
749 "setz %1\n\t"
750 : "=m" (*pu64),
751 "=qm" (u8Ret),
752 "=a" (u64Old)
753 : "r" (u64New),
754 "2" (u64Old),
755 "m" (*pu64));
756 return (bool)u8Ret;
757# else
758 bool fRet;
759 __asm
760 {
761 mov rdx, [pu32]
762 mov rax, [u64Old]
763 mov rcx, [u64New]
764 lock cmpxchg [rdx], rcx
765 setz al
766 mov [fRet], al
767 }
768 return fRet;
769# endif
770# else /* !RT_ARCH_AMD64 */
771 uint32_t u32Ret;
772# if RT_INLINE_ASM_GNU_STYLE
773# if defined(PIC) || defined(__PIC__)
774 uint32_t u32EBX = (uint32_t)u64New;
775 uint32_t u32Spill;
776 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
777 "lock; cmpxchg8b (%6)\n\t"
778 "setz %%al\n\t"
779 "movl %4, %%ebx\n\t"
780 "movzbl %%al, %%eax\n\t"
781 : "=a" (u32Ret),
782 "=d" (u32Spill),
783# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
784 "+m" (*pu64)
785# else
786 "=m" (*pu64)
787# endif
788 : "A" (u64Old),
789 "m" ( u32EBX ),
790 "c" ( (uint32_t)(u64New >> 32) ),
791 "S" (pu64));
792# else /* !PIC */
793 uint32_t u32Spill;
794 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
795 "setz %%al\n\t"
796 "movzbl %%al, %%eax\n\t"
797 : "=a" (u32Ret),
798 "=d" (u32Spill),
799 "+m" (*pu64)
800 : "A" (u64Old),
801 "b" ( (uint32_t)u64New ),
802 "c" ( (uint32_t)(u64New >> 32) ));
803# endif
804 return (bool)u32Ret;
805# else
806 __asm
807 {
808 mov ebx, dword ptr [u64New]
809 mov ecx, dword ptr [u64New + 4]
810 mov edi, [pu64]
811 mov eax, dword ptr [u64Old]
812 mov edx, dword ptr [u64Old + 4]
813 lock cmpxchg8b [edi]
814 setz al
815 movzx eax, al
816 mov dword ptr [u32Ret], eax
817 }
818 return !!u32Ret;
819# endif
820# endif /* !RT_ARCH_AMD64 */
821}
822#endif
823
824
825/**
826 * Atomically Compare and exchange a signed 64-bit value, ordered.
827 *
828 * @returns true if xchg was done.
829 * @returns false if xchg wasn't done.
830 *
831 * @param pi64 Pointer to the 64-bit variable to update.
832 * @param i64 The 64-bit value to assign to *pu64.
833 * @param i64Old The value to compare with.
834 */
835DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
836{
837 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
838}
839
840
841/**
842 * Atomically Compare and Exchange a pointer value, ordered.
843 *
844 * @returns true if xchg was done.
845 * @returns false if xchg wasn't done.
846 *
847 * @param ppv Pointer to the value to update.
848 * @param pvNew The new value to assigned to *ppv.
849 * @param pvOld The old value to *ppv compare with.
850 */
851DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, const void *pvNew, const void *pvOld)
852{
853#if ARCH_BITS == 32
854 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
855#elif ARCH_BITS == 64
856 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
857#else
858# error "ARCH_BITS is bogus"
859#endif
860}
861
862
863/** @def ASMAtomicCmpXchgHandle
864 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
865 *
866 * @param ph Pointer to the value to update.
867 * @param hNew The new value to assigned to *pu.
868 * @param hOld The old value to *pu compare with.
869 * @param fRc Where to store the result.
870 *
871 * @remarks This doesn't currently work for all handles (like RTFILE).
872 */
873#if HC_ARCH_BITS == 32
874# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
875 do { \
876 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
877 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
878 } while (0)
879#elif HC_ARCH_BITS == 64
880# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
881 do { \
882 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
883 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
884 } while (0)
885#else
886# error HC_ARCH_BITS
887#endif
888
889
890/** @def ASMAtomicCmpXchgSize
891 * Atomically Compare and Exchange a value which size might differ
892 * between platforms or compilers, ordered.
893 *
894 * @param pu Pointer to the value to update.
895 * @param uNew The new value to assigned to *pu.
896 * @param uOld The old value to *pu compare with.
897 * @param fRc Where to store the result.
898 */
899#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
900 do { \
901 switch (sizeof(*(pu))) { \
902 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
903 break; \
904 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
905 break; \
906 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
907 (fRc) = false; \
908 break; \
909 } \
910 } while (0)
911
912
913/**
914 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
915 * passes back old value, ordered.
916 *
917 * @returns true if xchg was done.
918 * @returns false if xchg wasn't done.
919 *
920 * @param pu32 Pointer to the value to update.
921 * @param u32New The new value to assigned to *pu32.
922 * @param u32Old The old value to *pu32 compare with.
923 * @param pu32Old Pointer store the old value at.
924 */
925#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
926DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
927#else
928DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
929{
930# if RT_INLINE_ASM_GNU_STYLE
931 uint8_t u8Ret;
932 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
933 "setz %1\n\t"
934 : "=m" (*pu32),
935 "=qm" (u8Ret),
936 "=a" (*pu32Old)
937 : "r" (u32New),
938 "a" (u32Old),
939 "m" (*pu32));
940 return (bool)u8Ret;
941
942# elif RT_INLINE_ASM_USES_INTRIN
943 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
944
945# else
946 uint32_t u32Ret;
947 __asm
948 {
949# ifdef RT_ARCH_AMD64
950 mov rdx, [pu32]
951# else
952 mov edx, [pu32]
953# endif
954 mov eax, [u32Old]
955 mov ecx, [u32New]
956# ifdef RT_ARCH_AMD64
957 lock cmpxchg [rdx], ecx
958 mov rdx, [pu32Old]
959 mov [rdx], eax
960# else
961 lock cmpxchg [edx], ecx
962 mov edx, [pu32Old]
963 mov [edx], eax
964# endif
965 setz al
966 movzx eax, al
967 mov [u32Ret], eax
968 }
969 return !!u32Ret;
970# endif
971}
972#endif
973
974
975/**
976 * Atomically Compare and Exchange a signed 32-bit value, additionally
977 * passes back old value, ordered.
978 *
979 * @returns true if xchg was done.
980 * @returns false if xchg wasn't done.
981 *
982 * @param pi32 Pointer to the value to update.
983 * @param i32New The new value to assigned to *pi32.
984 * @param i32Old The old value to *pi32 compare with.
985 * @param pi32Old Pointer store the old value at.
986 */
987DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
988{
989 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
990}
991
992
993/**
994 * Atomically Compare and exchange an unsigned 64-bit value, additionally
995 * passing back old value, ordered.
996 *
997 * @returns true if xchg was done.
998 * @returns false if xchg wasn't done.
999 *
1000 * @param pu64 Pointer to the 64-bit variable to update.
1001 * @param u64New The 64-bit value to assign to *pu64.
1002 * @param u64Old The value to compare with.
1003 * @param pu64Old Pointer store the old value at.
1004 */
1005#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1006 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1007DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
1008#else
1009DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
1010{
1011# if RT_INLINE_ASM_USES_INTRIN
1012 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
1013
1014# elif defined(RT_ARCH_AMD64)
1015# if RT_INLINE_ASM_GNU_STYLE
1016 uint8_t u8Ret;
1017 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1018 "setz %1\n\t"
1019 : "=m" (*pu64),
1020 "=qm" (u8Ret),
1021 "=a" (*pu64Old)
1022 : "r" (u64New),
1023 "a" (u64Old),
1024 "m" (*pu64));
1025 return (bool)u8Ret;
1026# else
1027 bool fRet;
1028 __asm
1029 {
1030 mov rdx, [pu32]
1031 mov rax, [u64Old]
1032 mov rcx, [u64New]
1033 lock cmpxchg [rdx], rcx
1034 mov rdx, [pu64Old]
1035 mov [rdx], rax
1036 setz al
1037 mov [fRet], al
1038 }
1039 return fRet;
1040# endif
1041# else /* !RT_ARCH_AMD64 */
1042# if RT_INLINE_ASM_GNU_STYLE
1043 uint64_t u64Ret;
1044# if defined(PIC) || defined(__PIC__)
1045 /* NB: this code uses a memory clobber description, because the clean
1046 * solution with an output value for *pu64 makes gcc run out of registers.
1047 * This will cause suboptimal code, and anyone with a better solution is
1048 * welcome to improve this. */
1049 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1050 "lock; cmpxchg8b %3\n\t"
1051 "xchgl %%ebx, %1\n\t"
1052 : "=A" (u64Ret)
1053 : "DS" ((uint32_t)u64New),
1054 "c" ((uint32_t)(u64New >> 32)),
1055 "m" (*pu64),
1056 "0" (u64Old)
1057 : "memory" );
1058# else /* !PIC */
1059 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1060 : "=A" (u64Ret),
1061 "=m" (*pu64)
1062 : "b" ((uint32_t)u64New),
1063 "c" ((uint32_t)(u64New >> 32)),
1064 "m" (*pu64),
1065 "0" (u64Old));
1066# endif
1067 *pu64Old = u64Ret;
1068 return u64Ret == u64Old;
1069# else
1070 uint32_t u32Ret;
1071 __asm
1072 {
1073 mov ebx, dword ptr [u64New]
1074 mov ecx, dword ptr [u64New + 4]
1075 mov edi, [pu64]
1076 mov eax, dword ptr [u64Old]
1077 mov edx, dword ptr [u64Old + 4]
1078 lock cmpxchg8b [edi]
1079 mov ebx, [pu64Old]
1080 mov [ebx], eax
1081 setz al
1082 movzx eax, al
1083 add ebx, 4
1084 mov [ebx], edx
1085 mov dword ptr [u32Ret], eax
1086 }
1087 return !!u32Ret;
1088# endif
1089# endif /* !RT_ARCH_AMD64 */
1090}
1091#endif
1092
1093
1094/**
1095 * Atomically Compare and exchange a signed 64-bit value, additionally
1096 * passing back old value, ordered.
1097 *
1098 * @returns true if xchg was done.
1099 * @returns false if xchg wasn't done.
1100 *
1101 * @param pi64 Pointer to the 64-bit variable to update.
1102 * @param i64 The 64-bit value to assign to *pu64.
1103 * @param i64Old The value to compare with.
1104 * @param pi64Old Pointer store the old value at.
1105 */
1106DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
1107{
1108 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
1109}
1110
1111/** @def ASMAtomicCmpXchgExHandle
1112 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1113 *
1114 * @param ph Pointer to the value to update.
1115 * @param hNew The new value to assigned to *pu.
1116 * @param hOld The old value to *pu compare with.
1117 * @param fRc Where to store the result.
1118 * @param phOldVal Pointer to where to store the old value.
1119 *
1120 * @remarks This doesn't currently work for all handles (like RTFILE).
1121 */
1122#if HC_ARCH_BITS == 32
1123# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1124 do { \
1125 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1126 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1127 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
1128 } while (0)
1129#elif HC_ARCH_BITS == 64
1130# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1131 do { \
1132 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1133 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1134 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
1135 } while (0)
1136#else
1137# error HC_ARCH_BITS
1138#endif
1139
1140
1141/** @def ASMAtomicCmpXchgExSize
1142 * Atomically Compare and Exchange a value which size might differ
1143 * between platforms or compilers. Additionally passes back old value.
1144 *
1145 * @param pu Pointer to the value to update.
1146 * @param uNew The new value to assigned to *pu.
1147 * @param uOld The old value to *pu compare with.
1148 * @param fRc Where to store the result.
1149 * @param puOldVal Pointer to where to store the old value.
1150 */
1151#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1152 do { \
1153 switch (sizeof(*(pu))) { \
1154 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
1155 break; \
1156 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
1157 break; \
1158 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1159 (fRc) = false; \
1160 (uOldVal) = 0; \
1161 break; \
1162 } \
1163 } while (0)
1164
1165
1166/**
1167 * Atomically Compare and Exchange a pointer value, additionally
1168 * passing back old value, ordered.
1169 *
1170 * @returns true if xchg was done.
1171 * @returns false if xchg wasn't done.
1172 *
1173 * @param ppv Pointer to the value to update.
1174 * @param pvNew The new value to assigned to *ppv.
1175 * @param pvOld The old value to *ppv compare with.
1176 * @param ppvOld Pointer store the old value at.
1177 */
1178DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
1179{
1180#if ARCH_BITS == 32
1181 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
1182#elif ARCH_BITS == 64
1183 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
1184#else
1185# error "ARCH_BITS is bogus"
1186#endif
1187}
1188
1189
1190/**
1191 * Atomically exchanges and adds to a 32-bit value, ordered.
1192 *
1193 * @returns The old value.
1194 * @param pu32 Pointer to the value.
1195 * @param u32 Number to add.
1196 */
1197#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1198DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
1199#else
1200DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
1201{
1202# if RT_INLINE_ASM_USES_INTRIN
1203 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
1204 return u32;
1205
1206# elif RT_INLINE_ASM_GNU_STYLE
1207 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
1208 : "=r" (u32),
1209 "=m" (*pu32)
1210 : "0" (u32),
1211 "m" (*pu32)
1212 : "memory");
1213 return u32;
1214# else
1215 __asm
1216 {
1217 mov eax, [u32]
1218# ifdef RT_ARCH_AMD64
1219 mov rdx, [pu32]
1220 lock xadd [rdx], eax
1221# else
1222 mov edx, [pu32]
1223 lock xadd [edx], eax
1224# endif
1225 mov [u32], eax
1226 }
1227 return u32;
1228# endif
1229}
1230#endif
1231
1232
1233/**
1234 * Atomically exchanges and adds to a signed 32-bit value, ordered.
1235 *
1236 * @returns The old value.
1237 * @param pi32 Pointer to the value.
1238 * @param i32 Number to add.
1239 */
1240DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
1241{
1242 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
1243}
1244
1245
1246/**
1247 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
1248 *
1249 * @returns The old value.
1250 * @param pu32 Pointer to the value.
1251 * @param u32 Number to subtract.
1252 */
1253DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile *pu32, uint32_t u32)
1254{
1255 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
1256}
1257
1258
1259/**
1260 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
1261 *
1262 * @returns The old value.
1263 * @param pi32 Pointer to the value.
1264 * @param i32 Number to subtract.
1265 */
1266DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
1267{
1268 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
1269}
1270
1271
1272/**
1273 * Atomically increment a 32-bit value, ordered.
1274 *
1275 * @returns The new value.
1276 * @param pu32 Pointer to the value to increment.
1277 */
1278#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1279DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
1280#else
1281DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
1282{
1283 uint32_t u32;
1284# if RT_INLINE_ASM_USES_INTRIN
1285 u32 = _InterlockedIncrement((long *)pu32);
1286 return u32;
1287
1288# elif RT_INLINE_ASM_GNU_STYLE
1289 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
1290 : "=r" (u32),
1291 "=m" (*pu32)
1292 : "0" (1),
1293 "m" (*pu32)
1294 : "memory");
1295 return u32+1;
1296# else
1297 __asm
1298 {
1299 mov eax, 1
1300# ifdef RT_ARCH_AMD64
1301 mov rdx, [pu32]
1302 lock xadd [rdx], eax
1303# else
1304 mov edx, [pu32]
1305 lock xadd [edx], eax
1306# endif
1307 mov u32, eax
1308 }
1309 return u32+1;
1310# endif
1311}
1312#endif
1313
1314
1315/**
1316 * Atomically increment a signed 32-bit value, ordered.
1317 *
1318 * @returns The new value.
1319 * @param pi32 Pointer to the value to increment.
1320 */
1321DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
1322{
1323 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
1324}
1325
1326
1327/**
1328 * Atomically decrement an unsigned 32-bit value, ordered.
1329 *
1330 * @returns The new value.
1331 * @param pu32 Pointer to the value to decrement.
1332 */
1333#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1334DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
1335#else
1336DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
1337{
1338 uint32_t u32;
1339# if RT_INLINE_ASM_USES_INTRIN
1340 u32 = _InterlockedDecrement((long *)pu32);
1341 return u32;
1342
1343# elif RT_INLINE_ASM_GNU_STYLE
1344 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
1345 : "=r" (u32),
1346 "=m" (*pu32)
1347 : "0" (-1),
1348 "m" (*pu32)
1349 : "memory");
1350 return u32-1;
1351# else
1352 __asm
1353 {
1354 mov eax, -1
1355# ifdef RT_ARCH_AMD64
1356 mov rdx, [pu32]
1357 lock xadd [rdx], eax
1358# else
1359 mov edx, [pu32]
1360 lock xadd [edx], eax
1361# endif
1362 mov u32, eax
1363 }
1364 return u32-1;
1365# endif
1366}
1367#endif
1368
1369
1370/**
1371 * Atomically decrement a signed 32-bit value, ordered.
1372 *
1373 * @returns The new value.
1374 * @param pi32 Pointer to the value to decrement.
1375 */
1376DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
1377{
1378 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
1379}
1380
1381
1382/**
1383 * Atomically Or an unsigned 32-bit value, ordered.
1384 *
1385 * @param pu32 Pointer to the pointer variable to OR u32 with.
1386 * @param u32 The value to OR *pu32 with.
1387 */
1388#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1389DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
1390#else
1391DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
1392{
1393# if RT_INLINE_ASM_USES_INTRIN
1394 _InterlockedOr((long volatile *)pu32, (long)u32);
1395
1396# elif RT_INLINE_ASM_GNU_STYLE
1397 __asm__ __volatile__("lock; orl %1, %0\n\t"
1398 : "=m" (*pu32)
1399 : "ir" (u32),
1400 "m" (*pu32));
1401# else
1402 __asm
1403 {
1404 mov eax, [u32]
1405# ifdef RT_ARCH_AMD64
1406 mov rdx, [pu32]
1407 lock or [rdx], eax
1408# else
1409 mov edx, [pu32]
1410 lock or [edx], eax
1411# endif
1412 }
1413# endif
1414}
1415#endif
1416
1417
1418/**
1419 * Atomically Or a signed 32-bit value, ordered.
1420 *
1421 * @param pi32 Pointer to the pointer variable to OR u32 with.
1422 * @param i32 The value to OR *pu32 with.
1423 */
1424DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
1425{
1426 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
1427}
1428
1429
1430/**
1431 * Atomically And an unsigned 32-bit value, ordered.
1432 *
1433 * @param pu32 Pointer to the pointer variable to AND u32 with.
1434 * @param u32 The value to AND *pu32 with.
1435 */
1436#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1437DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
1438#else
1439DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
1440{
1441# if RT_INLINE_ASM_USES_INTRIN
1442 _InterlockedAnd((long volatile *)pu32, u32);
1443
1444# elif RT_INLINE_ASM_GNU_STYLE
1445 __asm__ __volatile__("lock; andl %1, %0\n\t"
1446 : "=m" (*pu32)
1447 : "ir" (u32),
1448 "m" (*pu32));
1449# else
1450 __asm
1451 {
1452 mov eax, [u32]
1453# ifdef RT_ARCH_AMD64
1454 mov rdx, [pu32]
1455 lock and [rdx], eax
1456# else
1457 mov edx, [pu32]
1458 lock and [edx], eax
1459# endif
1460 }
1461# endif
1462}
1463#endif
1464
1465
1466/**
1467 * Atomically And a signed 32-bit value, ordered.
1468 *
1469 * @param pi32 Pointer to the pointer variable to AND i32 with.
1470 * @param i32 The value to AND *pi32 with.
1471 */
1472DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
1473{
1474 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
1475}
1476
1477
1478/**
1479 * Serialize Instruction.
1480 */
1481#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1482DECLASM(void) ASMSerializeInstruction(void);
1483#else
1484DECLINLINE(void) ASMSerializeInstruction(void)
1485{
1486# if RT_INLINE_ASM_GNU_STYLE
1487 RTCCUINTREG xAX = 0;
1488# ifdef RT_ARCH_AMD64
1489 __asm__ ("cpuid"
1490 : "=a" (xAX)
1491 : "0" (xAX)
1492 : "rbx", "rcx", "rdx");
1493# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1494 __asm__ ("push %%ebx\n\t"
1495 "cpuid\n\t"
1496 "pop %%ebx\n\t"
1497 : "=a" (xAX)
1498 : "0" (xAX)
1499 : "ecx", "edx");
1500# else
1501 __asm__ ("cpuid"
1502 : "=a" (xAX)
1503 : "0" (xAX)
1504 : "ebx", "ecx", "edx");
1505# endif
1506
1507# elif RT_INLINE_ASM_USES_INTRIN
1508 int aInfo[4];
1509 __cpuid(aInfo, 0);
1510
1511# else
1512 __asm
1513 {
1514 push ebx
1515 xor eax, eax
1516 cpuid
1517 pop ebx
1518 }
1519# endif
1520}
1521#endif
1522
1523
1524/**
1525 * Memory fence, waits for any pending writes and reads to complete.
1526 */
1527DECLINLINE(void) ASMMemoryFence(void)
1528{
1529 /** @todo use mfence? check if all cpus we care for support it. */
1530 uint32_t volatile u32;
1531 ASMAtomicXchgU32(&u32, 0);
1532}
1533
1534
1535/**
1536 * Write fence, waits for any pending writes to complete.
1537 */
1538DECLINLINE(void) ASMWriteFence(void)
1539{
1540 /** @todo use sfence? check if all cpus we care for support it. */
1541 ASMMemoryFence();
1542}
1543
1544
1545/**
1546 * Read fence, waits for any pending reads to complete.
1547 */
1548DECLINLINE(void) ASMReadFence(void)
1549{
1550 /** @todo use lfence? check if all cpus we care for support it. */
1551 ASMMemoryFence();
1552}
1553
1554
1555/**
1556 * Atomically reads an unsigned 8-bit value, ordered.
1557 *
1558 * @returns Current *pu8 value
1559 * @param pu8 Pointer to the 8-bit variable to read.
1560 */
1561DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
1562{
1563 ASMMemoryFence();
1564 return *pu8; /* byte reads are atomic on x86 */
1565}
1566
1567
1568/**
1569 * Atomically reads an unsigned 8-bit value, unordered.
1570 *
1571 * @returns Current *pu8 value
1572 * @param pu8 Pointer to the 8-bit variable to read.
1573 */
1574DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
1575{
1576 return *pu8; /* byte reads are atomic on x86 */
1577}
1578
1579
1580/**
1581 * Atomically reads a signed 8-bit value, ordered.
1582 *
1583 * @returns Current *pi8 value
1584 * @param pi8 Pointer to the 8-bit variable to read.
1585 */
1586DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
1587{
1588 ASMMemoryFence();
1589 return *pi8; /* byte reads are atomic on x86 */
1590}
1591
1592
1593/**
1594 * Atomically reads a signed 8-bit value, unordered.
1595 *
1596 * @returns Current *pi8 value
1597 * @param pi8 Pointer to the 8-bit variable to read.
1598 */
1599DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
1600{
1601 return *pi8; /* byte reads are atomic on x86 */
1602}
1603
1604
1605/**
1606 * Atomically reads an unsigned 16-bit value, ordered.
1607 *
1608 * @returns Current *pu16 value
1609 * @param pu16 Pointer to the 16-bit variable to read.
1610 */
1611DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
1612{
1613 ASMMemoryFence();
1614 Assert(!((uintptr_t)pu16 & 1));
1615 return *pu16;
1616}
1617
1618
1619/**
1620 * Atomically reads an unsigned 16-bit value, unordered.
1621 *
1622 * @returns Current *pu16 value
1623 * @param pu16 Pointer to the 16-bit variable to read.
1624 */
1625DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
1626{
1627 Assert(!((uintptr_t)pu16 & 1));
1628 return *pu16;
1629}
1630
1631
1632/**
1633 * Atomically reads a signed 16-bit value, ordered.
1634 *
1635 * @returns Current *pi16 value
1636 * @param pi16 Pointer to the 16-bit variable to read.
1637 */
1638DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
1639{
1640 ASMMemoryFence();
1641 Assert(!((uintptr_t)pi16 & 1));
1642 return *pi16;
1643}
1644
1645
1646/**
1647 * Atomically reads a signed 16-bit value, unordered.
1648 *
1649 * @returns Current *pi16 value
1650 * @param pi16 Pointer to the 16-bit variable to read.
1651 */
1652DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
1653{
1654 Assert(!((uintptr_t)pi16 & 1));
1655 return *pi16;
1656}
1657
1658
1659/**
1660 * Atomically reads an unsigned 32-bit value, ordered.
1661 *
1662 * @returns Current *pu32 value
1663 * @param pu32 Pointer to the 32-bit variable to read.
1664 */
1665DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
1666{
1667 ASMMemoryFence();
1668 Assert(!((uintptr_t)pu32 & 3));
1669 return *pu32;
1670}
1671
1672
1673/**
1674 * Atomically reads an unsigned 32-bit value, unordered.
1675 *
1676 * @returns Current *pu32 value
1677 * @param pu32 Pointer to the 32-bit variable to read.
1678 */
1679DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
1680{
1681 Assert(!((uintptr_t)pu32 & 3));
1682 return *pu32;
1683}
1684
1685
1686/**
1687 * Atomically reads a signed 32-bit value, ordered.
1688 *
1689 * @returns Current *pi32 value
1690 * @param pi32 Pointer to the 32-bit variable to read.
1691 */
1692DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
1693{
1694 ASMMemoryFence();
1695 Assert(!((uintptr_t)pi32 & 3));
1696 return *pi32;
1697}
1698
1699
1700/**
1701 * Atomically reads a signed 32-bit value, unordered.
1702 *
1703 * @returns Current *pi32 value
1704 * @param pi32 Pointer to the 32-bit variable to read.
1705 */
1706DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
1707{
1708 Assert(!((uintptr_t)pi32 & 3));
1709 return *pi32;
1710}
1711
1712
1713/**
1714 * Atomically reads an unsigned 64-bit value, ordered.
1715 *
1716 * @returns Current *pu64 value
1717 * @param pu64 Pointer to the 64-bit variable to read.
1718 * The memory pointed to must be writable.
1719 * @remark This will fault if the memory is read-only!
1720 */
1721#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1722 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1723DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
1724#else
1725DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
1726{
1727 uint64_t u64;
1728# ifdef RT_ARCH_AMD64
1729 Assert(!((uintptr_t)pu64 & 7));
1730/*# if RT_INLINE_ASM_GNU_STYLE
1731 __asm__ __volatile__( "mfence\n\t"
1732 "movq %1, %0\n\t"
1733 : "=r" (u64)
1734 : "m" (*pu64));
1735# else
1736 __asm
1737 {
1738 mfence
1739 mov rdx, [pu64]
1740 mov rax, [rdx]
1741 mov [u64], rax
1742 }
1743# endif*/
1744 ASMMemoryFence();
1745 u64 = *pu64;
1746# else /* !RT_ARCH_AMD64 */
1747# if RT_INLINE_ASM_GNU_STYLE
1748# if defined(PIC) || defined(__PIC__)
1749 uint32_t u32EBX = 0;
1750 Assert(!((uintptr_t)pu64 & 7));
1751 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1752 "lock; cmpxchg8b (%5)\n\t"
1753 "movl %3, %%ebx\n\t"
1754 : "=A" (u64),
1755# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1756 "+m" (*pu64)
1757# else
1758 "=m" (*pu64)
1759# endif
1760 : "0" (0),
1761 "m" (u32EBX),
1762 "c" (0),
1763 "S" (pu64));
1764# else /* !PIC */
1765 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1766 : "=A" (u64),
1767 "+m" (*pu64)
1768 : "0" (0),
1769 "b" (0),
1770 "c" (0));
1771# endif
1772# else
1773 Assert(!((uintptr_t)pu64 & 7));
1774 __asm
1775 {
1776 xor eax, eax
1777 xor edx, edx
1778 mov edi, pu64
1779 xor ecx, ecx
1780 xor ebx, ebx
1781 lock cmpxchg8b [edi]
1782 mov dword ptr [u64], eax
1783 mov dword ptr [u64 + 4], edx
1784 }
1785# endif
1786# endif /* !RT_ARCH_AMD64 */
1787 return u64;
1788}
1789#endif
1790
1791
1792/**
1793 * Atomically reads an unsigned 64-bit value, unordered.
1794 *
1795 * @returns Current *pu64 value
1796 * @param pu64 Pointer to the 64-bit variable to read.
1797 * The memory pointed to must be writable.
1798 * @remark This will fault if the memory is read-only!
1799 */
1800#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1801 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1802DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
1803#else
1804DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
1805{
1806 uint64_t u64;
1807# ifdef RT_ARCH_AMD64
1808 Assert(!((uintptr_t)pu64 & 7));
1809/*# if RT_INLINE_ASM_GNU_STYLE
1810 Assert(!((uintptr_t)pu64 & 7));
1811 __asm__ __volatile__("movq %1, %0\n\t"
1812 : "=r" (u64)
1813 : "m" (*pu64));
1814# else
1815 __asm
1816 {
1817 mov rdx, [pu64]
1818 mov rax, [rdx]
1819 mov [u64], rax
1820 }
1821# endif */
1822 u64 = *pu64;
1823# else /* !RT_ARCH_AMD64 */
1824# if RT_INLINE_ASM_GNU_STYLE
1825# if defined(PIC) || defined(__PIC__)
1826 uint32_t u32EBX = 0;
1827 uint32_t u32Spill;
1828 Assert(!((uintptr_t)pu64 & 7));
1829 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1830 "xor %%ecx,%%ecx\n\t"
1831 "xor %%edx,%%edx\n\t"
1832 "xchgl %%ebx, %3\n\t"
1833 "lock; cmpxchg8b (%4)\n\t"
1834 "movl %3, %%ebx\n\t"
1835 : "=A" (u64),
1836# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1837 "+m" (*pu64),
1838# else
1839 "=m" (*pu64),
1840# endif
1841 "=c" (u32Spill)
1842 : "m" (u32EBX),
1843 "S" (pu64));
1844# else /* !PIC */
1845 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1846 : "=A" (u64),
1847 "+m" (*pu64)
1848 : "0" (0),
1849 "b" (0),
1850 "c" (0));
1851# endif
1852# else
1853 Assert(!((uintptr_t)pu64 & 7));
1854 __asm
1855 {
1856 xor eax, eax
1857 xor edx, edx
1858 mov edi, pu64
1859 xor ecx, ecx
1860 xor ebx, ebx
1861 lock cmpxchg8b [edi]
1862 mov dword ptr [u64], eax
1863 mov dword ptr [u64 + 4], edx
1864 }
1865# endif
1866# endif /* !RT_ARCH_AMD64 */
1867 return u64;
1868}
1869#endif
1870
1871
1872/**
1873 * Atomically reads a signed 64-bit value, ordered.
1874 *
1875 * @returns Current *pi64 value
1876 * @param pi64 Pointer to the 64-bit variable to read.
1877 * The memory pointed to must be writable.
1878 * @remark This will fault if the memory is read-only!
1879 */
1880DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
1881{
1882 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
1883}
1884
1885
1886/**
1887 * Atomically reads a signed 64-bit value, unordered.
1888 *
1889 * @returns Current *pi64 value
1890 * @param pi64 Pointer to the 64-bit variable to read.
1891 * The memory pointed to must be writable.
1892 * @remark This will fault if the memory is read-only!
1893 */
1894DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
1895{
1896 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
1897}
1898
1899
1900/**
1901 * Atomically reads a pointer value, ordered.
1902 *
1903 * @returns Current *pv value
1904 * @param ppv Pointer to the pointer variable to read.
1905 */
1906DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
1907{
1908#if ARCH_BITS == 32
1909 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
1910#elif ARCH_BITS == 64
1911 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
1912#else
1913# error "ARCH_BITS is bogus"
1914#endif
1915}
1916
1917
1918/**
1919 * Atomically reads a pointer value, unordered.
1920 *
1921 * @returns Current *pv value
1922 * @param ppv Pointer to the pointer variable to read.
1923 */
1924DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
1925{
1926#if ARCH_BITS == 32
1927 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
1928#elif ARCH_BITS == 64
1929 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
1930#else
1931# error "ARCH_BITS is bogus"
1932#endif
1933}
1934
1935
1936/**
1937 * Atomically reads a boolean value, ordered.
1938 *
1939 * @returns Current *pf value
1940 * @param pf Pointer to the boolean variable to read.
1941 */
1942DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
1943{
1944 ASMMemoryFence();
1945 return *pf; /* byte reads are atomic on x86 */
1946}
1947
1948
1949/**
1950 * Atomically reads a boolean value, unordered.
1951 *
1952 * @returns Current *pf value
1953 * @param pf Pointer to the boolean variable to read.
1954 */
1955DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
1956{
1957 return *pf; /* byte reads are atomic on x86 */
1958}
1959
1960
1961/**
1962 * Atomically read a typical IPRT handle value, ordered.
1963 *
1964 * @param ph Pointer to the handle variable to read.
1965 * @param phRes Where to store the result.
1966 *
1967 * @remarks This doesn't currently work for all handles (like RTFILE).
1968 */
1969#if HC_ARCH_BITS == 32
1970# define ASMAtomicReadHandle(ph, phRes) \
1971 do { \
1972 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1973 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
1974 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
1975 } while (0)
1976#elif HC_ARCH_BITS == 64
1977# define ASMAtomicReadHandle(ph, phRes) \
1978 do { \
1979 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1980 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
1981 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
1982 } while (0)
1983#else
1984# error HC_ARCH_BITS
1985#endif
1986
1987
1988/**
1989 * Atomically read a typical IPRT handle value, unordered.
1990 *
1991 * @param ph Pointer to the handle variable to read.
1992 * @param phRes Where to store the result.
1993 *
1994 * @remarks This doesn't currently work for all handles (like RTFILE).
1995 */
1996#if HC_ARCH_BITS == 32
1997# define ASMAtomicUoReadHandle(ph, phRes) \
1998 do { \
1999 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2000 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2001 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
2002 } while (0)
2003#elif HC_ARCH_BITS == 64
2004# define ASMAtomicUoReadHandle(ph, phRes) \
2005 do { \
2006 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2007 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2008 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
2009 } while (0)
2010#else
2011# error HC_ARCH_BITS
2012#endif
2013
2014
2015/**
2016 * Atomically read a value which size might differ
2017 * between platforms or compilers, ordered.
2018 *
2019 * @param pu Pointer to the variable to update.
2020 * @param puRes Where to store the result.
2021 */
2022#define ASMAtomicReadSize(pu, puRes) \
2023 do { \
2024 switch (sizeof(*(pu))) { \
2025 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
2026 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
2027 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
2028 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
2029 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2030 } \
2031 } while (0)
2032
2033
2034/**
2035 * Atomically read a value which size might differ
2036 * between platforms or compilers, unordered.
2037 *
2038 * @param pu Pointer to the variable to read.
2039 * @param puRes Where to store the result.
2040 */
2041#define ASMAtomicUoReadSize(pu, puRes) \
2042 do { \
2043 switch (sizeof(*(pu))) { \
2044 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
2045 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
2046 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
2047 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
2048 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2049 } \
2050 } while (0)
2051
2052
2053/**
2054 * Atomically writes an unsigned 8-bit value, ordered.
2055 *
2056 * @param pu8 Pointer to the 8-bit variable.
2057 * @param u8 The 8-bit value to assign to *pu8.
2058 */
2059DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
2060{
2061 ASMAtomicXchgU8(pu8, u8);
2062}
2063
2064
2065/**
2066 * Atomically writes an unsigned 8-bit value, unordered.
2067 *
2068 * @param pu8 Pointer to the 8-bit variable.
2069 * @param u8 The 8-bit value to assign to *pu8.
2070 */
2071DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
2072{
2073 *pu8 = u8; /* byte writes are atomic on x86 */
2074}
2075
2076
2077/**
2078 * Atomically writes a signed 8-bit value, ordered.
2079 *
2080 * @param pi8 Pointer to the 8-bit variable to read.
2081 * @param i8 The 8-bit value to assign to *pi8.
2082 */
2083DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
2084{
2085 ASMAtomicXchgS8(pi8, i8);
2086}
2087
2088
2089/**
2090 * Atomically writes a signed 8-bit value, unordered.
2091 *
2092 * @param pi8 Pointer to the 8-bit variable to read.
2093 * @param i8 The 8-bit value to assign to *pi8.
2094 */
2095DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
2096{
2097 *pi8 = i8; /* byte writes are atomic on x86 */
2098}
2099
2100
2101/**
2102 * Atomically writes an unsigned 16-bit value, ordered.
2103 *
2104 * @param pu16 Pointer to the 16-bit variable.
2105 * @param u16 The 16-bit value to assign to *pu16.
2106 */
2107DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
2108{
2109 ASMAtomicXchgU16(pu16, u16);
2110}
2111
2112
2113/**
2114 * Atomically writes an unsigned 16-bit value, unordered.
2115 *
2116 * @param pu16 Pointer to the 16-bit variable.
2117 * @param u16 The 16-bit value to assign to *pu16.
2118 */
2119DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
2120{
2121 Assert(!((uintptr_t)pu16 & 1));
2122 *pu16 = u16;
2123}
2124
2125
2126/**
2127 * Atomically writes a signed 16-bit value, ordered.
2128 *
2129 * @param pi16 Pointer to the 16-bit variable to read.
2130 * @param i16 The 16-bit value to assign to *pi16.
2131 */
2132DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
2133{
2134 ASMAtomicXchgS16(pi16, i16);
2135}
2136
2137
2138/**
2139 * Atomically writes a signed 16-bit value, unordered.
2140 *
2141 * @param pi16 Pointer to the 16-bit variable to read.
2142 * @param i16 The 16-bit value to assign to *pi16.
2143 */
2144DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
2145{
2146 Assert(!((uintptr_t)pi16 & 1));
2147 *pi16 = i16;
2148}
2149
2150
2151/**
2152 * Atomically writes an unsigned 32-bit value, ordered.
2153 *
2154 * @param pu32 Pointer to the 32-bit variable.
2155 * @param u32 The 32-bit value to assign to *pu32.
2156 */
2157DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
2158{
2159 ASMAtomicXchgU32(pu32, u32);
2160}
2161
2162
2163/**
2164 * Atomically writes an unsigned 32-bit value, unordered.
2165 *
2166 * @param pu32 Pointer to the 32-bit variable.
2167 * @param u32 The 32-bit value to assign to *pu32.
2168 */
2169DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
2170{
2171 Assert(!((uintptr_t)pu32 & 3));
2172 *pu32 = u32;
2173}
2174
2175
2176/**
2177 * Atomically writes a signed 32-bit value, ordered.
2178 *
2179 * @param pi32 Pointer to the 32-bit variable to read.
2180 * @param i32 The 32-bit value to assign to *pi32.
2181 */
2182DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
2183{
2184 ASMAtomicXchgS32(pi32, i32);
2185}
2186
2187
2188/**
2189 * Atomically writes a signed 32-bit value, unordered.
2190 *
2191 * @param pi32 Pointer to the 32-bit variable to read.
2192 * @param i32 The 32-bit value to assign to *pi32.
2193 */
2194DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
2195{
2196 Assert(!((uintptr_t)pi32 & 3));
2197 *pi32 = i32;
2198}
2199
2200
2201/**
2202 * Atomically writes an unsigned 64-bit value, ordered.
2203 *
2204 * @param pu64 Pointer to the 64-bit variable.
2205 * @param u64 The 64-bit value to assign to *pu64.
2206 */
2207DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
2208{
2209 ASMAtomicXchgU64(pu64, u64);
2210}
2211
2212
2213/**
2214 * Atomically writes an unsigned 64-bit value, unordered.
2215 *
2216 * @param pu64 Pointer to the 64-bit variable.
2217 * @param u64 The 64-bit value to assign to *pu64.
2218 */
2219DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
2220{
2221 Assert(!((uintptr_t)pu64 & 7));
2222#if ARCH_BITS == 64
2223 *pu64 = u64;
2224#else
2225 ASMAtomicXchgU64(pu64, u64);
2226#endif
2227}
2228
2229
2230/**
2231 * Atomically writes a signed 64-bit value, ordered.
2232 *
2233 * @param pi64 Pointer to the 64-bit variable.
2234 * @param i64 The 64-bit value to assign to *pi64.
2235 */
2236DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
2237{
2238 ASMAtomicXchgS64(pi64, i64);
2239}
2240
2241
2242/**
2243 * Atomically writes a signed 64-bit value, unordered.
2244 *
2245 * @param pi64 Pointer to the 64-bit variable.
2246 * @param i64 The 64-bit value to assign to *pi64.
2247 */
2248DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
2249{
2250 Assert(!((uintptr_t)pi64 & 7));
2251#if ARCH_BITS == 64
2252 *pi64 = i64;
2253#else
2254 ASMAtomicXchgS64(pi64, i64);
2255#endif
2256}
2257
2258
2259/**
2260 * Atomically writes a boolean value, unordered.
2261 *
2262 * @param pf Pointer to the boolean variable.
2263 * @param f The boolean value to assign to *pf.
2264 */
2265DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
2266{
2267 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
2268}
2269
2270
2271/**
2272 * Atomically writes a boolean value, unordered.
2273 *
2274 * @param pf Pointer to the boolean variable.
2275 * @param f The boolean value to assign to *pf.
2276 */
2277DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
2278{
2279 *pf = f; /* byte writes are atomic on x86 */
2280}
2281
2282
2283/**
2284 * Atomically writes a pointer value, ordered.
2285 *
2286 * @returns Current *pv value
2287 * @param ppv Pointer to the pointer variable.
2288 * @param pv The pointer value to assigne to *ppv.
2289 */
2290DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, const void *pv)
2291{
2292#if ARCH_BITS == 32
2293 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2294#elif ARCH_BITS == 64
2295 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2296#else
2297# error "ARCH_BITS is bogus"
2298#endif
2299}
2300
2301
2302/**
2303 * Atomically writes a pointer value, unordered.
2304 *
2305 * @returns Current *pv value
2306 * @param ppv Pointer to the pointer variable.
2307 * @param pv The pointer value to assigne to *ppv.
2308 */
2309DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, const void *pv)
2310{
2311#if ARCH_BITS == 32
2312 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2313#elif ARCH_BITS == 64
2314 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2315#else
2316# error "ARCH_BITS is bogus"
2317#endif
2318}
2319
2320
2321/**
2322 * Atomically write a typical IPRT handle value, ordered.
2323 *
2324 * @param ph Pointer to the variable to update.
2325 * @param hNew The value to assign to *ph.
2326 *
2327 * @remarks This doesn't currently work for all handles (like RTFILE).
2328 */
2329#if HC_ARCH_BITS == 32
2330# define ASMAtomicWriteHandle(ph, hNew) \
2331 do { \
2332 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2333 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2334 } while (0)
2335#elif HC_ARCH_BITS == 64
2336# define ASMAtomicWriteHandle(ph, hNew) \
2337 do { \
2338 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2339 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
2340 } while (0)
2341#else
2342# error HC_ARCH_BITS
2343#endif
2344
2345
2346/**
2347 * Atomically write a typical IPRT handle value, unordered.
2348 *
2349 * @param ph Pointer to the variable to update.
2350 * @param hNew The value to assign to *ph.
2351 *
2352 * @remarks This doesn't currently work for all handles (like RTFILE).
2353 */
2354#if HC_ARCH_BITS == 32
2355# define ASMAtomicUoWriteHandle(ph, hNew) \
2356 do { \
2357 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2358 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
2359 } while (0)
2360#elif HC_ARCH_BITS == 64
2361# define ASMAtomicUoWriteHandle(ph, hNew) \
2362 do { \
2363 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2364 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
2365 } while (0)
2366#else
2367# error HC_ARCH_BITS
2368#endif
2369
2370
2371/**
2372 * Atomically write a value which size might differ
2373 * between platforms or compilers, ordered.
2374 *
2375 * @param pu Pointer to the variable to update.
2376 * @param uNew The value to assign to *pu.
2377 */
2378#define ASMAtomicWriteSize(pu, uNew) \
2379 do { \
2380 switch (sizeof(*(pu))) { \
2381 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2382 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2383 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2384 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2385 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2386 } \
2387 } while (0)
2388
2389/**
2390 * Atomically write a value which size might differ
2391 * between platforms or compilers, unordered.
2392 *
2393 * @param pu Pointer to the variable to update.
2394 * @param uNew The value to assign to *pu.
2395 */
2396#define ASMAtomicUoWriteSize(pu, uNew) \
2397 do { \
2398 switch (sizeof(*(pu))) { \
2399 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2400 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2401 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2402 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2403 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2404 } \
2405 } while (0)
2406
2407
2408
2409
2410/** @def RT_ASM_PAGE_SIZE
2411 * We try avoid dragging in iprt/param.h here.
2412 * @internal
2413 */
2414#if defined(RT_ARCH_SPARC64)
2415# define RT_ASM_PAGE_SIZE 0x2000
2416# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
2417# if PAGE_SIZE != 0x2000
2418# error "PAGE_SIZE is not 0x2000!"
2419# endif
2420# endif
2421#else
2422# define RT_ASM_PAGE_SIZE 0x1000
2423# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
2424# if PAGE_SIZE != 0x1000
2425# error "PAGE_SIZE is not 0x1000!"
2426# endif
2427# endif
2428#endif
2429
2430/**
2431 * Zeros a 4K memory page.
2432 *
2433 * @param pv Pointer to the memory block. This must be page aligned.
2434 */
2435#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2436DECLASM(void) ASMMemZeroPage(volatile void *pv);
2437# else
2438DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
2439{
2440# if RT_INLINE_ASM_USES_INTRIN
2441# ifdef RT_ARCH_AMD64
2442 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
2443# else
2444 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
2445# endif
2446
2447# elif RT_INLINE_ASM_GNU_STYLE
2448 RTCCUINTREG uDummy;
2449# ifdef RT_ARCH_AMD64
2450 __asm__ __volatile__("rep stosq"
2451 : "=D" (pv),
2452 "=c" (uDummy)
2453 : "0" (pv),
2454 "c" (RT_ASM_PAGE_SIZE >> 3),
2455 "a" (0)
2456 : "memory");
2457# else
2458 __asm__ __volatile__("rep stosl"
2459 : "=D" (pv),
2460 "=c" (uDummy)
2461 : "0" (pv),
2462 "c" (RT_ASM_PAGE_SIZE >> 2),
2463 "a" (0)
2464 : "memory");
2465# endif
2466# else
2467 __asm
2468 {
2469# ifdef RT_ARCH_AMD64
2470 xor rax, rax
2471 mov ecx, 0200h
2472 mov rdi, [pv]
2473 rep stosq
2474# else
2475 xor eax, eax
2476 mov ecx, 0400h
2477 mov edi, [pv]
2478 rep stosd
2479# endif
2480 }
2481# endif
2482}
2483# endif
2484
2485
2486/**
2487 * Zeros a memory block with a 32-bit aligned size.
2488 *
2489 * @param pv Pointer to the memory block.
2490 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2491 */
2492#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2493DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
2494#else
2495DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
2496{
2497# if RT_INLINE_ASM_USES_INTRIN
2498# ifdef RT_ARCH_AMD64
2499 if (!(cb & 7))
2500 __stosq((unsigned __int64 *)pv, 0, cb / 8);
2501 else
2502# endif
2503 __stosd((unsigned long *)pv, 0, cb / 4);
2504
2505# elif RT_INLINE_ASM_GNU_STYLE
2506 __asm__ __volatile__("rep stosl"
2507 : "=D" (pv),
2508 "=c" (cb)
2509 : "0" (pv),
2510 "1" (cb >> 2),
2511 "a" (0)
2512 : "memory");
2513# else
2514 __asm
2515 {
2516 xor eax, eax
2517# ifdef RT_ARCH_AMD64
2518 mov rcx, [cb]
2519 shr rcx, 2
2520 mov rdi, [pv]
2521# else
2522 mov ecx, [cb]
2523 shr ecx, 2
2524 mov edi, [pv]
2525# endif
2526 rep stosd
2527 }
2528# endif
2529}
2530#endif
2531
2532
2533/**
2534 * Fills a memory block with a 32-bit aligned size.
2535 *
2536 * @param pv Pointer to the memory block.
2537 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2538 * @param u32 The value to fill with.
2539 */
2540#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2541DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
2542#else
2543DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
2544{
2545# if RT_INLINE_ASM_USES_INTRIN
2546# ifdef RT_ARCH_AMD64
2547 if (!(cb & 7))
2548 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
2549 else
2550# endif
2551 __stosd((unsigned long *)pv, u32, cb / 4);
2552
2553# elif RT_INLINE_ASM_GNU_STYLE
2554 __asm__ __volatile__("rep stosl"
2555 : "=D" (pv),
2556 "=c" (cb)
2557 : "0" (pv),
2558 "1" (cb >> 2),
2559 "a" (u32)
2560 : "memory");
2561# else
2562 __asm
2563 {
2564# ifdef RT_ARCH_AMD64
2565 mov rcx, [cb]
2566 shr rcx, 2
2567 mov rdi, [pv]
2568# else
2569 mov ecx, [cb]
2570 shr ecx, 2
2571 mov edi, [pv]
2572# endif
2573 mov eax, [u32]
2574 rep stosd
2575 }
2576# endif
2577}
2578#endif
2579
2580
2581/**
2582 * Checks if a memory page is all zeros.
2583 *
2584 * @returns true / false.
2585 *
2586 * @param pvPage Pointer to the page. Must be aligned on 16 byte
2587 * boundrary
2588 */
2589DECLINLINE(bool) ASMMemIsZeroPage(void const *pvPage)
2590{
2591# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
2592 union { RTCCUINTREG r; bool f; } uAX;
2593 RTCCUINTREG xCX, xDI;
2594 Assert(!((uintptr_t)pvPage & 15));
2595 __asm__ __volatile__("repe; "
2596# ifdef RT_ARCH_AMD64
2597 "scasq\n\t"
2598# else
2599 "scasl\n\t"
2600# endif
2601 "setnc %%al\n\t"
2602 : "=&c" (xCX),
2603 "=&D" (xDI),
2604 "=&a" (uAX.r)
2605 : "mr" (pvPage),
2606# ifdef RT_ARCH_AMD64
2607 "0" (RT_ASM_PAGE_SIZE/8),
2608# else
2609 "0" (RT_ASM_PAGE_SIZE/4),
2610# endif
2611 "1" (pvPage),
2612 "2" (0));
2613 return uAX.f;
2614# else
2615 uintptr_t const *puPtr = (uintptr_t const *)pvPage;
2616 int cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
2617 Assert(!((uintptr_t)pvPage & 15));
2618 for (;;)
2619 {
2620 if (puPtr[0]) return false;
2621 if (puPtr[4]) return false;
2622
2623 if (puPtr[2]) return false;
2624 if (puPtr[6]) return false;
2625
2626 if (puPtr[1]) return false;
2627 if (puPtr[5]) return false;
2628
2629 if (puPtr[3]) return false;
2630 if (puPtr[7]) return false;
2631
2632 if (!--cLeft)
2633 return true;
2634 puPtr += 8;
2635 }
2636 return true;
2637# endif
2638}
2639
2640
2641/**
2642 * Checks if a memory block is filled with the specified byte.
2643 *
2644 * This is a sort of inverted memchr.
2645 *
2646 * @returns Pointer to the byte which doesn't equal u8.
2647 * @returns NULL if all equal to u8.
2648 *
2649 * @param pv Pointer to the memory block.
2650 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2651 * @param u8 The value it's supposed to be filled with.
2652 *
2653 * @todo Fix name, it is a predicate function but it's not returning boolean!
2654 */
2655DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
2656{
2657/** @todo rewrite this in inline assembly? */
2658 uint8_t const *pb = (uint8_t const *)pv;
2659 for (; cb; cb--, pb++)
2660 if (RT_UNLIKELY(*pb != u8))
2661 return (void *)pb;
2662 return NULL;
2663}
2664
2665
2666/**
2667 * Checks if a memory block is filled with the specified 32-bit value.
2668 *
2669 * This is a sort of inverted memchr.
2670 *
2671 * @returns Pointer to the first value which doesn't equal u32.
2672 * @returns NULL if all equal to u32.
2673 *
2674 * @param pv Pointer to the memory block.
2675 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2676 * @param u32 The value it's supposed to be filled with.
2677 *
2678 * @todo Fix name, it is a predicate function but it's not returning boolean!
2679 */
2680DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
2681{
2682/** @todo rewrite this in inline assembly? */
2683 uint32_t const *pu32 = (uint32_t const *)pv;
2684 for (; cb; cb -= 4, pu32++)
2685 if (RT_UNLIKELY(*pu32 != u32))
2686 return (uint32_t *)pu32;
2687 return NULL;
2688}
2689
2690
2691/**
2692 * Probes a byte pointer for read access.
2693 *
2694 * While the function will not fault if the byte is not read accessible,
2695 * the idea is to do this in a safe place like before acquiring locks
2696 * and such like.
2697 *
2698 * Also, this functions guarantees that an eager compiler is not going
2699 * to optimize the probing away.
2700 *
2701 * @param pvByte Pointer to the byte.
2702 */
2703#if RT_INLINE_ASM_EXTERNAL
2704DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
2705#else
2706DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
2707{
2708 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
2709 uint8_t u8;
2710# if RT_INLINE_ASM_GNU_STYLE
2711 __asm__ __volatile__("movb (%1), %0\n\t"
2712 : "=r" (u8)
2713 : "r" (pvByte));
2714# else
2715 __asm
2716 {
2717# ifdef RT_ARCH_AMD64
2718 mov rax, [pvByte]
2719 mov al, [rax]
2720# else
2721 mov eax, [pvByte]
2722 mov al, [eax]
2723# endif
2724 mov [u8], al
2725 }
2726# endif
2727 return u8;
2728}
2729#endif
2730
2731/**
2732 * Probes a buffer for read access page by page.
2733 *
2734 * While the function will fault if the buffer is not fully read
2735 * accessible, the idea is to do this in a safe place like before
2736 * acquiring locks and such like.
2737 *
2738 * Also, this functions guarantees that an eager compiler is not going
2739 * to optimize the probing away.
2740 *
2741 * @param pvBuf Pointer to the buffer.
2742 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
2743 */
2744DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
2745{
2746 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
2747 /* the first byte */
2748 const uint8_t *pu8 = (const uint8_t *)pvBuf;
2749 ASMProbeReadByte(pu8);
2750
2751 /* the pages in between pages. */
2752 while (cbBuf > RT_ASM_PAGE_SIZE)
2753 {
2754 ASMProbeReadByte(pu8);
2755 cbBuf -= RT_ASM_PAGE_SIZE;
2756 pu8 += RT_ASM_PAGE_SIZE;
2757 }
2758
2759 /* the last byte */
2760 ASMProbeReadByte(pu8 + cbBuf - 1);
2761}
2762
2763
2764/** @def ASMBreakpoint
2765 * Debugger Breakpoint.
2766 * @remark In the gnu world we add a nop instruction after the int3 to
2767 * force gdb to remain at the int3 source line.
2768 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
2769 * @internal
2770 */
2771#if RT_INLINE_ASM_GNU_STYLE
2772# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
2773# ifndef __L4ENV__
2774# define ASMBreakpoint() do { __asm__ __volatile__("int3\n\tnop"); } while (0)
2775# else
2776# define ASMBreakpoint() do { __asm__ __volatile__("int3; jmp 1f; 1:"); } while (0)
2777# endif
2778# elif defined(RT_ARCH_SPARC64)
2779# define ASMBreakpoint() do { __asm__ __volatile__("illtrap 0\n\t") } while (0) /** @todo Sparc64: this is just a wild guess. */
2780# elif defined(RT_ARCH_SPARC)
2781# define ASMBreakpoint() do { __asm__ __volatile__("unimp 0\n\t"); } while (0) /** @todo Sparc: this is just a wild guess (same as Sparc64, just different name). */
2782# else
2783# error "PORTME"
2784# endif
2785#else
2786# define ASMBreakpoint() __debugbreak()
2787#endif
2788
2789
2790/**
2791 * Spinloop hint for platforms that have these, empty function on the other
2792 * platforms.
2793 *
2794 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecing
2795 * spin locks.
2796 */
2797#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
2798DECLASM(void) ASMNopPause(void);
2799#else
2800DECLINLINE(void) ASMNopPause(void)
2801{
2802# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
2803# if RT_INLINE_ASM_GNU_STYLE
2804 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
2805# else
2806 __asm {
2807 _emit 0f3h
2808 _emit 090h
2809 }
2810# endif
2811# else
2812 /* dummy */
2813# endif
2814}
2815#endif
2816
2817
2818
2819/** @defgroup grp_inline_bits Bit Operations
2820 * @{
2821 */
2822
2823
2824/**
2825 * Sets a bit in a bitmap.
2826 *
2827 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
2828 * @param iBit The bit to set.
2829 *
2830 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
2831 * However, doing so will yield better performance as well as avoiding
2832 * traps accessing the last bits in the bitmap.
2833 */
2834#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2835DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
2836#else
2837DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
2838{
2839# if RT_INLINE_ASM_USES_INTRIN
2840 _bittestandset((long *)pvBitmap, iBit);
2841
2842# elif RT_INLINE_ASM_GNU_STYLE
2843 __asm__ __volatile__("btsl %1, %0"
2844 : "=m" (*(volatile long *)pvBitmap)
2845 : "Ir" (iBit),
2846 "m" (*(volatile long *)pvBitmap)
2847 : "memory");
2848# else
2849 __asm
2850 {
2851# ifdef RT_ARCH_AMD64
2852 mov rax, [pvBitmap]
2853 mov edx, [iBit]
2854 bts [rax], edx
2855# else
2856 mov eax, [pvBitmap]
2857 mov edx, [iBit]
2858 bts [eax], edx
2859# endif
2860 }
2861# endif
2862}
2863#endif
2864
2865
2866/**
2867 * Atomically sets a bit in a bitmap, ordered.
2868 *
2869 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
2870 * the memory access isn't atomic!
2871 * @param iBit The bit to set.
2872 */
2873#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2874DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
2875#else
2876DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
2877{
2878 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
2879# if RT_INLINE_ASM_USES_INTRIN
2880 _interlockedbittestandset((long *)pvBitmap, iBit);
2881# elif RT_INLINE_ASM_GNU_STYLE
2882 __asm__ __volatile__("lock; btsl %1, %0"
2883 : "=m" (*(volatile long *)pvBitmap)
2884 : "Ir" (iBit),
2885 "m" (*(volatile long *)pvBitmap)
2886 : "memory");
2887# else
2888 __asm
2889 {
2890# ifdef RT_ARCH_AMD64
2891 mov rax, [pvBitmap]
2892 mov edx, [iBit]
2893 lock bts [rax], edx
2894# else
2895 mov eax, [pvBitmap]
2896 mov edx, [iBit]
2897 lock bts [eax], edx
2898# endif
2899 }
2900# endif
2901}
2902#endif
2903
2904
2905/**
2906 * Clears a bit in a bitmap.
2907 *
2908 * @param pvBitmap Pointer to the bitmap.
2909 * @param iBit The bit to clear.
2910 *
2911 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
2912 * However, doing so will yield better performance as well as avoiding
2913 * traps accessing the last bits in the bitmap.
2914 */
2915#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2916DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
2917#else
2918DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
2919{
2920# if RT_INLINE_ASM_USES_INTRIN
2921 _bittestandreset((long *)pvBitmap, iBit);
2922
2923# elif RT_INLINE_ASM_GNU_STYLE
2924 __asm__ __volatile__("btrl %1, %0"
2925 : "=m" (*(volatile long *)pvBitmap)
2926 : "Ir" (iBit),
2927 "m" (*(volatile long *)pvBitmap)
2928 : "memory");
2929# else
2930 __asm
2931 {
2932# ifdef RT_ARCH_AMD64
2933 mov rax, [pvBitmap]
2934 mov edx, [iBit]
2935 btr [rax], edx
2936# else
2937 mov eax, [pvBitmap]
2938 mov edx, [iBit]
2939 btr [eax], edx
2940# endif
2941 }
2942# endif
2943}
2944#endif
2945
2946
2947/**
2948 * Atomically clears a bit in a bitmap, ordered.
2949 *
2950 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
2951 * the memory access isn't atomic!
2952 * @param iBit The bit to toggle set.
2953 * @remarks No memory barrier, take care on smp.
2954 */
2955#if RT_INLINE_ASM_EXTERNAL
2956DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
2957#else
2958DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
2959{
2960 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
2961# if RT_INLINE_ASM_GNU_STYLE
2962 __asm__ __volatile__("lock; btrl %1, %0"
2963 : "=m" (*(volatile long *)pvBitmap)
2964 : "Ir" (iBit),
2965 "m" (*(volatile long *)pvBitmap)
2966 : "memory");
2967# else
2968 __asm
2969 {
2970# ifdef RT_ARCH_AMD64
2971 mov rax, [pvBitmap]
2972 mov edx, [iBit]
2973 lock btr [rax], edx
2974# else
2975 mov eax, [pvBitmap]
2976 mov edx, [iBit]
2977 lock btr [eax], edx
2978# endif
2979 }
2980# endif
2981}
2982#endif
2983
2984
2985/**
2986 * Toggles a bit in a bitmap.
2987 *
2988 * @param pvBitmap Pointer to the bitmap.
2989 * @param iBit The bit to toggle.
2990 *
2991 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
2992 * However, doing so will yield better performance as well as avoiding
2993 * traps accessing the last bits in the bitmap.
2994 */
2995#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2996DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
2997#else
2998DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
2999{
3000# if RT_INLINE_ASM_USES_INTRIN
3001 _bittestandcomplement((long *)pvBitmap, iBit);
3002# elif RT_INLINE_ASM_GNU_STYLE
3003 __asm__ __volatile__("btcl %1, %0"
3004 : "=m" (*(volatile long *)pvBitmap)
3005 : "Ir" (iBit),
3006 "m" (*(volatile long *)pvBitmap)
3007 : "memory");
3008# else
3009 __asm
3010 {
3011# ifdef RT_ARCH_AMD64
3012 mov rax, [pvBitmap]
3013 mov edx, [iBit]
3014 btc [rax], edx
3015# else
3016 mov eax, [pvBitmap]
3017 mov edx, [iBit]
3018 btc [eax], edx
3019# endif
3020 }
3021# endif
3022}
3023#endif
3024
3025
3026/**
3027 * Atomically toggles a bit in a bitmap, ordered.
3028 *
3029 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3030 * the memory access isn't atomic!
3031 * @param iBit The bit to test and set.
3032 */
3033#if RT_INLINE_ASM_EXTERNAL
3034DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
3035#else
3036DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
3037{
3038 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3039# if RT_INLINE_ASM_GNU_STYLE
3040 __asm__ __volatile__("lock; btcl %1, %0"
3041 : "=m" (*(volatile long *)pvBitmap)
3042 : "Ir" (iBit),
3043 "m" (*(volatile long *)pvBitmap)
3044 : "memory");
3045# else
3046 __asm
3047 {
3048# ifdef RT_ARCH_AMD64
3049 mov rax, [pvBitmap]
3050 mov edx, [iBit]
3051 lock btc [rax], edx
3052# else
3053 mov eax, [pvBitmap]
3054 mov edx, [iBit]
3055 lock btc [eax], edx
3056# endif
3057 }
3058# endif
3059}
3060#endif
3061
3062
3063/**
3064 * Tests and sets a bit in a bitmap.
3065 *
3066 * @returns true if the bit was set.
3067 * @returns false if the bit was clear.
3068 *
3069 * @param pvBitmap Pointer to the bitmap.
3070 * @param iBit The bit to test and set.
3071 *
3072 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3073 * However, doing so will yield better performance as well as avoiding
3074 * traps accessing the last bits in the bitmap.
3075 */
3076#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3077DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3078#else
3079DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3080{
3081 union { bool f; uint32_t u32; uint8_t u8; } rc;
3082# if RT_INLINE_ASM_USES_INTRIN
3083 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
3084
3085# elif RT_INLINE_ASM_GNU_STYLE
3086 __asm__ __volatile__("btsl %2, %1\n\t"
3087 "setc %b0\n\t"
3088 "andl $1, %0\n\t"
3089 : "=q" (rc.u32),
3090 "=m" (*(volatile long *)pvBitmap)
3091 : "Ir" (iBit),
3092 "m" (*(volatile long *)pvBitmap)
3093 : "memory");
3094# else
3095 __asm
3096 {
3097 mov edx, [iBit]
3098# ifdef RT_ARCH_AMD64
3099 mov rax, [pvBitmap]
3100 bts [rax], edx
3101# else
3102 mov eax, [pvBitmap]
3103 bts [eax], edx
3104# endif
3105 setc al
3106 and eax, 1
3107 mov [rc.u32], eax
3108 }
3109# endif
3110 return rc.f;
3111}
3112#endif
3113
3114
3115/**
3116 * Atomically tests and sets a bit in a bitmap, ordered.
3117 *
3118 * @returns true if the bit was set.
3119 * @returns false if the bit was clear.
3120 *
3121 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3122 * the memory access isn't atomic!
3123 * @param iBit The bit to set.
3124 */
3125#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3126DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3127#else
3128DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3129{
3130 union { bool f; uint32_t u32; uint8_t u8; } rc;
3131 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3132# if RT_INLINE_ASM_USES_INTRIN
3133 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
3134# elif RT_INLINE_ASM_GNU_STYLE
3135 __asm__ __volatile__("lock; btsl %2, %1\n\t"
3136 "setc %b0\n\t"
3137 "andl $1, %0\n\t"
3138 : "=q" (rc.u32),
3139 "=m" (*(volatile long *)pvBitmap)
3140 : "Ir" (iBit),
3141 "m" (*(volatile long *)pvBitmap)
3142 : "memory");
3143# else
3144 __asm
3145 {
3146 mov edx, [iBit]
3147# ifdef RT_ARCH_AMD64
3148 mov rax, [pvBitmap]
3149 lock bts [rax], edx
3150# else
3151 mov eax, [pvBitmap]
3152 lock bts [eax], edx
3153# endif
3154 setc al
3155 and eax, 1
3156 mov [rc.u32], eax
3157 }
3158# endif
3159 return rc.f;
3160}
3161#endif
3162
3163
3164/**
3165 * Tests and clears a bit in a bitmap.
3166 *
3167 * @returns true if the bit was set.
3168 * @returns false if the bit was clear.
3169 *
3170 * @param pvBitmap Pointer to the bitmap.
3171 * @param iBit The bit to test and clear.
3172 *
3173 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3174 * However, doing so will yield better performance as well as avoiding
3175 * traps accessing the last bits in the bitmap.
3176 */
3177#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3178DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3179#else
3180DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3181{
3182 union { bool f; uint32_t u32; uint8_t u8; } rc;
3183# if RT_INLINE_ASM_USES_INTRIN
3184 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
3185
3186# elif RT_INLINE_ASM_GNU_STYLE
3187 __asm__ __volatile__("btrl %2, %1\n\t"
3188 "setc %b0\n\t"
3189 "andl $1, %0\n\t"
3190 : "=q" (rc.u32),
3191 "=m" (*(volatile long *)pvBitmap)
3192 : "Ir" (iBit),
3193 "m" (*(volatile long *)pvBitmap)
3194 : "memory");
3195# else
3196 __asm
3197 {
3198 mov edx, [iBit]
3199# ifdef RT_ARCH_AMD64
3200 mov rax, [pvBitmap]
3201 btr [rax], edx
3202# else
3203 mov eax, [pvBitmap]
3204 btr [eax], edx
3205# endif
3206 setc al
3207 and eax, 1
3208 mov [rc.u32], eax
3209 }
3210# endif
3211 return rc.f;
3212}
3213#endif
3214
3215
3216/**
3217 * Atomically tests and clears a bit in a bitmap, ordered.
3218 *
3219 * @returns true if the bit was set.
3220 * @returns false if the bit was clear.
3221 *
3222 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3223 * the memory access isn't atomic!
3224 * @param iBit The bit to test and clear.
3225 *
3226 * @remarks No memory barrier, take care on smp.
3227 */
3228#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3229DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3230#else
3231DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3232{
3233 union { bool f; uint32_t u32; uint8_t u8; } rc;
3234 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3235# if RT_INLINE_ASM_USES_INTRIN
3236 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
3237
3238# elif RT_INLINE_ASM_GNU_STYLE
3239 __asm__ __volatile__("lock; btrl %2, %1\n\t"
3240 "setc %b0\n\t"
3241 "andl $1, %0\n\t"
3242 : "=q" (rc.u32),
3243 "=m" (*(volatile long *)pvBitmap)
3244 : "Ir" (iBit),
3245 "m" (*(volatile long *)pvBitmap)
3246 : "memory");
3247# else
3248 __asm
3249 {
3250 mov edx, [iBit]
3251# ifdef RT_ARCH_AMD64
3252 mov rax, [pvBitmap]
3253 lock btr [rax], edx
3254# else
3255 mov eax, [pvBitmap]
3256 lock btr [eax], edx
3257# endif
3258 setc al
3259 and eax, 1
3260 mov [rc.u32], eax
3261 }
3262# endif
3263 return rc.f;
3264}
3265#endif
3266
3267
3268/**
3269 * Tests and toggles a bit in a bitmap.
3270 *
3271 * @returns true if the bit was set.
3272 * @returns false if the bit was clear.
3273 *
3274 * @param pvBitmap Pointer to the bitmap.
3275 * @param iBit The bit to test and toggle.
3276 *
3277 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3278 * However, doing so will yield better performance as well as avoiding
3279 * traps accessing the last bits in the bitmap.
3280 */
3281#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3282DECLASM(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3283#else
3284DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3285{
3286 union { bool f; uint32_t u32; uint8_t u8; } rc;
3287# if RT_INLINE_ASM_USES_INTRIN
3288 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
3289
3290# elif RT_INLINE_ASM_GNU_STYLE
3291 __asm__ __volatile__("btcl %2, %1\n\t"
3292 "setc %b0\n\t"
3293 "andl $1, %0\n\t"
3294 : "=q" (rc.u32),
3295 "=m" (*(volatile long *)pvBitmap)
3296 : "Ir" (iBit),
3297 "m" (*(volatile long *)pvBitmap)
3298 : "memory");
3299# else
3300 __asm
3301 {
3302 mov edx, [iBit]
3303# ifdef RT_ARCH_AMD64
3304 mov rax, [pvBitmap]
3305 btc [rax], edx
3306# else
3307 mov eax, [pvBitmap]
3308 btc [eax], edx
3309# endif
3310 setc al
3311 and eax, 1
3312 mov [rc.u32], eax
3313 }
3314# endif
3315 return rc.f;
3316}
3317#endif
3318
3319
3320/**
3321 * Atomically tests and toggles a bit in a bitmap, ordered.
3322 *
3323 * @returns true if the bit was set.
3324 * @returns false if the bit was clear.
3325 *
3326 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3327 * the memory access isn't atomic!
3328 * @param iBit The bit to test and toggle.
3329 */
3330#if RT_INLINE_ASM_EXTERNAL
3331DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3332#else
3333DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3334{
3335 union { bool f; uint32_t u32; uint8_t u8; } rc;
3336 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3337# if RT_INLINE_ASM_GNU_STYLE
3338 __asm__ __volatile__("lock; btcl %2, %1\n\t"
3339 "setc %b0\n\t"
3340 "andl $1, %0\n\t"
3341 : "=q" (rc.u32),
3342 "=m" (*(volatile long *)pvBitmap)
3343 : "Ir" (iBit),
3344 "m" (*(volatile long *)pvBitmap)
3345 : "memory");
3346# else
3347 __asm
3348 {
3349 mov edx, [iBit]
3350# ifdef RT_ARCH_AMD64
3351 mov rax, [pvBitmap]
3352 lock btc [rax], edx
3353# else
3354 mov eax, [pvBitmap]
3355 lock btc [eax], edx
3356# endif
3357 setc al
3358 and eax, 1
3359 mov [rc.u32], eax
3360 }
3361# endif
3362 return rc.f;
3363}
3364#endif
3365
3366
3367/**
3368 * Tests if a bit in a bitmap is set.
3369 *
3370 * @returns true if the bit is set.
3371 * @returns false if the bit is clear.
3372 *
3373 * @param pvBitmap Pointer to the bitmap.
3374 * @param iBit The bit to test.
3375 *
3376 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3377 * However, doing so will yield better performance as well as avoiding
3378 * traps accessing the last bits in the bitmap.
3379 */
3380#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3381DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
3382#else
3383DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
3384{
3385 union { bool f; uint32_t u32; uint8_t u8; } rc;
3386# if RT_INLINE_ASM_USES_INTRIN
3387 rc.u32 = _bittest((long *)pvBitmap, iBit);
3388# elif RT_INLINE_ASM_GNU_STYLE
3389
3390 __asm__ __volatile__("btl %2, %1\n\t"
3391 "setc %b0\n\t"
3392 "andl $1, %0\n\t"
3393 : "=q" (rc.u32)
3394 : "m" (*(const volatile long *)pvBitmap),
3395 "Ir" (iBit)
3396 : "memory");
3397# else
3398 __asm
3399 {
3400 mov edx, [iBit]
3401# ifdef RT_ARCH_AMD64
3402 mov rax, [pvBitmap]
3403 bt [rax], edx
3404# else
3405 mov eax, [pvBitmap]
3406 bt [eax], edx
3407# endif
3408 setc al
3409 and eax, 1
3410 mov [rc.u32], eax
3411 }
3412# endif
3413 return rc.f;
3414}
3415#endif
3416
3417
3418/**
3419 * Clears a bit range within a bitmap.
3420 *
3421 * @param pvBitmap Pointer to the bitmap.
3422 * @param iBitStart The First bit to clear.
3423 * @param iBitEnd The first bit not to clear.
3424 */
3425DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
3426{
3427 if (iBitStart < iBitEnd)
3428 {
3429 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
3430 int iStart = iBitStart & ~31;
3431 int iEnd = iBitEnd & ~31;
3432 if (iStart == iEnd)
3433 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
3434 else
3435 {
3436 /* bits in first dword. */
3437 if (iBitStart & 31)
3438 {
3439 *pu32 &= (1 << (iBitStart & 31)) - 1;
3440 pu32++;
3441 iBitStart = iStart + 32;
3442 }
3443
3444 /* whole dword. */
3445 if (iBitStart != iEnd)
3446 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
3447
3448 /* bits in last dword. */
3449 if (iBitEnd & 31)
3450 {
3451 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
3452 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
3453 }
3454 }
3455 }
3456}
3457
3458
3459/**
3460 * Sets a bit range within a bitmap.
3461 *
3462 * @param pvBitmap Pointer to the bitmap.
3463 * @param iBitStart The First bit to set.
3464 * @param iBitEnd The first bit not to set.
3465 */
3466DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
3467{
3468 if (iBitStart < iBitEnd)
3469 {
3470 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
3471 int iStart = iBitStart & ~31;
3472 int iEnd = iBitEnd & ~31;
3473 if (iStart == iEnd)
3474 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
3475 else
3476 {
3477 /* bits in first dword. */
3478 if (iBitStart & 31)
3479 {
3480 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
3481 pu32++;
3482 iBitStart = iStart + 32;
3483 }
3484
3485 /* whole dword. */
3486 if (iBitStart != iEnd)
3487 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
3488
3489 /* bits in last dword. */
3490 if (iBitEnd & 31)
3491 {
3492 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
3493 *pu32 |= (1 << (iBitEnd & 31)) - 1;
3494 }
3495 }
3496 }
3497}
3498
3499
3500/**
3501 * Finds the first clear bit in a bitmap.
3502 *
3503 * @returns Index of the first zero bit.
3504 * @returns -1 if no clear bit was found.
3505 * @param pvBitmap Pointer to the bitmap.
3506 * @param cBits The number of bits in the bitmap. Multiple of 32.
3507 */
3508#if RT_INLINE_ASM_EXTERNAL
3509DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
3510#else
3511DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
3512{
3513 if (cBits)
3514 {
3515 int32_t iBit;
3516# if RT_INLINE_ASM_GNU_STYLE
3517 RTCCUINTREG uEAX, uECX, uEDI;
3518 cBits = RT_ALIGN_32(cBits, 32);
3519 __asm__ __volatile__("repe; scasl\n\t"
3520 "je 1f\n\t"
3521# ifdef RT_ARCH_AMD64
3522 "lea -4(%%rdi), %%rdi\n\t"
3523 "xorl (%%rdi), %%eax\n\t"
3524 "subq %5, %%rdi\n\t"
3525# else
3526 "lea -4(%%edi), %%edi\n\t"
3527 "xorl (%%edi), %%eax\n\t"
3528 "subl %5, %%edi\n\t"
3529# endif
3530 "shll $3, %%edi\n\t"
3531 "bsfl %%eax, %%edx\n\t"
3532 "addl %%edi, %%edx\n\t"
3533 "1:\t\n"
3534 : "=d" (iBit),
3535 "=&c" (uECX),
3536 "=&D" (uEDI),
3537 "=&a" (uEAX)
3538 : "0" (0xffffffff),
3539 "mr" (pvBitmap),
3540 "1" (cBits >> 5),
3541 "2" (pvBitmap),
3542 "3" (0xffffffff));
3543# else
3544 cBits = RT_ALIGN_32(cBits, 32);
3545 __asm
3546 {
3547# ifdef RT_ARCH_AMD64
3548 mov rdi, [pvBitmap]
3549 mov rbx, rdi
3550# else
3551 mov edi, [pvBitmap]
3552 mov ebx, edi
3553# endif
3554 mov edx, 0ffffffffh
3555 mov eax, edx
3556 mov ecx, [cBits]
3557 shr ecx, 5
3558 repe scasd
3559 je done
3560
3561# ifdef RT_ARCH_AMD64
3562 lea rdi, [rdi - 4]
3563 xor eax, [rdi]
3564 sub rdi, rbx
3565# else
3566 lea edi, [edi - 4]
3567 xor eax, [edi]
3568 sub edi, ebx
3569# endif
3570 shl edi, 3
3571 bsf edx, eax
3572 add edx, edi
3573 done:
3574 mov [iBit], edx
3575 }
3576# endif
3577 return iBit;
3578 }
3579 return -1;
3580}
3581#endif
3582
3583
3584/**
3585 * Finds the next clear bit in a bitmap.
3586 *
3587 * @returns Index of the first zero bit.
3588 * @returns -1 if no clear bit was found.
3589 * @param pvBitmap Pointer to the bitmap.
3590 * @param cBits The number of bits in the bitmap. Multiple of 32.
3591 * @param iBitPrev The bit returned from the last search.
3592 * The search will start at iBitPrev + 1.
3593 */
3594#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3595DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3596#else
3597DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3598{
3599 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
3600 int iBit = ++iBitPrev & 31;
3601 if (iBit)
3602 {
3603 /*
3604 * Inspect the 32-bit word containing the unaligned bit.
3605 */
3606 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
3607
3608# if RT_INLINE_ASM_USES_INTRIN
3609 unsigned long ulBit = 0;
3610 if (_BitScanForward(&ulBit, u32))
3611 return ulBit + iBitPrev;
3612# else
3613# if RT_INLINE_ASM_GNU_STYLE
3614 __asm__ __volatile__("bsf %1, %0\n\t"
3615 "jnz 1f\n\t"
3616 "movl $-1, %0\n\t"
3617 "1:\n\t"
3618 : "=r" (iBit)
3619 : "r" (u32));
3620# else
3621 __asm
3622 {
3623 mov edx, [u32]
3624 bsf eax, edx
3625 jnz done
3626 mov eax, 0ffffffffh
3627 done:
3628 mov [iBit], eax
3629 }
3630# endif
3631 if (iBit >= 0)
3632 return iBit + iBitPrev;
3633# endif
3634
3635 /*
3636 * Skip ahead and see if there is anything left to search.
3637 */
3638 iBitPrev |= 31;
3639 iBitPrev++;
3640 if (cBits <= (uint32_t)iBitPrev)
3641 return -1;
3642 }
3643
3644 /*
3645 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
3646 */
3647 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
3648 if (iBit >= 0)
3649 iBit += iBitPrev;
3650 return iBit;
3651}
3652#endif
3653
3654
3655/**
3656 * Finds the first set bit in a bitmap.
3657 *
3658 * @returns Index of the first set bit.
3659 * @returns -1 if no clear bit was found.
3660 * @param pvBitmap Pointer to the bitmap.
3661 * @param cBits The number of bits in the bitmap. Multiple of 32.
3662 */
3663#if RT_INLINE_ASM_EXTERNAL
3664DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
3665#else
3666DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
3667{
3668 if (cBits)
3669 {
3670 int32_t iBit;
3671# if RT_INLINE_ASM_GNU_STYLE
3672 RTCCUINTREG uEAX, uECX, uEDI;
3673 cBits = RT_ALIGN_32(cBits, 32);
3674 __asm__ __volatile__("repe; scasl\n\t"
3675 "je 1f\n\t"
3676# ifdef RT_ARCH_AMD64
3677 "lea -4(%%rdi), %%rdi\n\t"
3678 "movl (%%rdi), %%eax\n\t"
3679 "subq %5, %%rdi\n\t"
3680# else
3681 "lea -4(%%edi), %%edi\n\t"
3682 "movl (%%edi), %%eax\n\t"
3683 "subl %5, %%edi\n\t"
3684# endif
3685 "shll $3, %%edi\n\t"
3686 "bsfl %%eax, %%edx\n\t"
3687 "addl %%edi, %%edx\n\t"
3688 "1:\t\n"
3689 : "=d" (iBit),
3690 "=&c" (uECX),
3691 "=&D" (uEDI),
3692 "=&a" (uEAX)
3693 : "0" (0xffffffff),
3694 "mr" (pvBitmap),
3695 "1" (cBits >> 5),
3696 "2" (pvBitmap),
3697 "3" (0));
3698# else
3699 cBits = RT_ALIGN_32(cBits, 32);
3700 __asm
3701 {
3702# ifdef RT_ARCH_AMD64
3703 mov rdi, [pvBitmap]
3704 mov rbx, rdi
3705# else
3706 mov edi, [pvBitmap]
3707 mov ebx, edi
3708# endif
3709 mov edx, 0ffffffffh
3710 xor eax, eax
3711 mov ecx, [cBits]
3712 shr ecx, 5
3713 repe scasd
3714 je done
3715# ifdef RT_ARCH_AMD64
3716 lea rdi, [rdi - 4]
3717 mov eax, [rdi]
3718 sub rdi, rbx
3719# else
3720 lea edi, [edi - 4]
3721 mov eax, [edi]
3722 sub edi, ebx
3723# endif
3724 shl edi, 3
3725 bsf edx, eax
3726 add edx, edi
3727 done:
3728 mov [iBit], edx
3729 }
3730# endif
3731 return iBit;
3732 }
3733 return -1;
3734}
3735#endif
3736
3737
3738/**
3739 * Finds the next set bit in a bitmap.
3740 *
3741 * @returns Index of the next set bit.
3742 * @returns -1 if no set bit was found.
3743 * @param pvBitmap Pointer to the bitmap.
3744 * @param cBits The number of bits in the bitmap. Multiple of 32.
3745 * @param iBitPrev The bit returned from the last search.
3746 * The search will start at iBitPrev + 1.
3747 */
3748#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3749DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3750#else
3751DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3752{
3753 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
3754 int iBit = ++iBitPrev & 31;
3755 if (iBit)
3756 {
3757 /*
3758 * Inspect the 32-bit word containing the unaligned bit.
3759 */
3760 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
3761
3762# if RT_INLINE_ASM_USES_INTRIN
3763 unsigned long ulBit = 0;
3764 if (_BitScanForward(&ulBit, u32))
3765 return ulBit + iBitPrev;
3766# else
3767# if RT_INLINE_ASM_GNU_STYLE
3768 __asm__ __volatile__("bsf %1, %0\n\t"
3769 "jnz 1f\n\t"
3770 "movl $-1, %0\n\t"
3771 "1:\n\t"
3772 : "=r" (iBit)
3773 : "r" (u32));
3774# else
3775 __asm
3776 {
3777 mov edx, [u32]
3778 bsf eax, edx
3779 jnz done
3780 mov eax, 0ffffffffh
3781 done:
3782 mov [iBit], eax
3783 }
3784# endif
3785 if (iBit >= 0)
3786 return iBit + iBitPrev;
3787# endif
3788
3789 /*
3790 * Skip ahead and see if there is anything left to search.
3791 */
3792 iBitPrev |= 31;
3793 iBitPrev++;
3794 if (cBits <= (uint32_t)iBitPrev)
3795 return -1;
3796 }
3797
3798 /*
3799 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
3800 */
3801 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
3802 if (iBit >= 0)
3803 iBit += iBitPrev;
3804 return iBit;
3805}
3806#endif
3807
3808
3809/**
3810 * Finds the first bit which is set in the given 32-bit integer.
3811 * Bits are numbered from 1 (least significant) to 32.
3812 *
3813 * @returns index [1..32] of the first set bit.
3814 * @returns 0 if all bits are cleared.
3815 * @param u32 Integer to search for set bits.
3816 * @remark Similar to ffs() in BSD.
3817 */
3818#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3819DECLASM(unsigned) ASMBitFirstSetU32(uint32_t u32);
3820#else
3821DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
3822{
3823# if RT_INLINE_ASM_USES_INTRIN
3824 unsigned long iBit;
3825 if (_BitScanForward(&iBit, u32))
3826 iBit++;
3827 else
3828 iBit = 0;
3829# elif RT_INLINE_ASM_GNU_STYLE
3830 uint32_t iBit;
3831 __asm__ __volatile__("bsf %1, %0\n\t"
3832 "jnz 1f\n\t"
3833 "xorl %0, %0\n\t"
3834 "jmp 2f\n"
3835 "1:\n\t"
3836 "incl %0\n"
3837 "2:\n\t"
3838 : "=r" (iBit)
3839 : "rm" (u32));
3840# else
3841 uint32_t iBit;
3842 _asm
3843 {
3844 bsf eax, [u32]
3845 jnz found
3846 xor eax, eax
3847 jmp done
3848 found:
3849 inc eax
3850 done:
3851 mov [iBit], eax
3852 }
3853# endif
3854 return iBit;
3855}
3856#endif
3857
3858
3859/**
3860 * Finds the first bit which is set in the given 32-bit integer.
3861 * Bits are numbered from 1 (least significant) to 32.
3862 *
3863 * @returns index [1..32] of the first set bit.
3864 * @returns 0 if all bits are cleared.
3865 * @param i32 Integer to search for set bits.
3866 * @remark Similar to ffs() in BSD.
3867 */
3868DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
3869{
3870 return ASMBitFirstSetU32((uint32_t)i32);
3871}
3872
3873
3874/**
3875 * Finds the last bit which is set in the given 32-bit integer.
3876 * Bits are numbered from 1 (least significant) to 32.
3877 *
3878 * @returns index [1..32] of the last set bit.
3879 * @returns 0 if all bits are cleared.
3880 * @param u32 Integer to search for set bits.
3881 * @remark Similar to fls() in BSD.
3882 */
3883#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3884DECLASM(unsigned) ASMBitLastSetU32(uint32_t u32);
3885#else
3886DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
3887{
3888# if RT_INLINE_ASM_USES_INTRIN
3889 unsigned long iBit;
3890 if (_BitScanReverse(&iBit, u32))
3891 iBit++;
3892 else
3893 iBit = 0;
3894# elif RT_INLINE_ASM_GNU_STYLE
3895 uint32_t iBit;
3896 __asm__ __volatile__("bsrl %1, %0\n\t"
3897 "jnz 1f\n\t"
3898 "xorl %0, %0\n\t"
3899 "jmp 2f\n"
3900 "1:\n\t"
3901 "incl %0\n"
3902 "2:\n\t"
3903 : "=r" (iBit)
3904 : "rm" (u32));
3905# else
3906 uint32_t iBit;
3907 _asm
3908 {
3909 bsr eax, [u32]
3910 jnz found
3911 xor eax, eax
3912 jmp done
3913 found:
3914 inc eax
3915 done:
3916 mov [iBit], eax
3917 }
3918# endif
3919 return iBit;
3920}
3921#endif
3922
3923
3924/**
3925 * Finds the last bit which is set in the given 32-bit integer.
3926 * Bits are numbered from 1 (least significant) to 32.
3927 *
3928 * @returns index [1..32] of the last set bit.
3929 * @returns 0 if all bits are cleared.
3930 * @param i32 Integer to search for set bits.
3931 * @remark Similar to fls() in BSD.
3932 */
3933DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
3934{
3935 return ASMBitLastSetU32((uint32_t)i32);
3936}
3937
3938/**
3939 * Reverse the byte order of the given 16-bit integer.
3940 *
3941 * @returns Revert
3942 * @param u16 16-bit integer value.
3943 */
3944#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3945DECLASM(uint16_t) ASMByteSwapU16(uint16_t u16);
3946#else
3947DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
3948{
3949# if RT_INLINE_ASM_USES_INTRIN
3950 u16 = _byteswap_ushort(u16);
3951# elif RT_INLINE_ASM_GNU_STYLE
3952 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
3953# else
3954 _asm
3955 {
3956 mov ax, [u16]
3957 ror ax, 8
3958 mov [u16], ax
3959 }
3960# endif
3961 return u16;
3962}
3963#endif
3964
3965
3966/**
3967 * Reverse the byte order of the given 32-bit integer.
3968 *
3969 * @returns Revert
3970 * @param u32 32-bit integer value.
3971 */
3972#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3973DECLASM(uint32_t) ASMByteSwapU32(uint32_t u32);
3974#else
3975DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
3976{
3977# if RT_INLINE_ASM_USES_INTRIN
3978 u32 = _byteswap_ulong(u32);
3979# elif RT_INLINE_ASM_GNU_STYLE
3980 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
3981# else
3982 _asm
3983 {
3984 mov eax, [u32]
3985 bswap eax
3986 mov [u32], eax
3987 }
3988# endif
3989 return u32;
3990}
3991#endif
3992
3993
3994/**
3995 * Reverse the byte order of the given 64-bit integer.
3996 *
3997 * @returns Revert
3998 * @param u64 64-bit integer value.
3999 */
4000DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
4001{
4002#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
4003 u64 = _byteswap_uint64(u64);
4004#else
4005 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
4006 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
4007#endif
4008 return u64;
4009}
4010
4011
4012/** @} */
4013
4014
4015/** @} */
4016
4017/* KLUDGE: Play safe for now as I cannot test all solaris and linux usages. */
4018#if !defined(__cplusplus) && !defined(DEBUG)
4019# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4020# include <iprt/asm-amd64-x86.h>
4021# endif
4022# include <iprt/asm-math.h>
4023#endif
4024
4025#endif
4026
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette