VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 29263

Last change on this file since 29263 was 29263, checked in by vboxsync, 15 years ago

sparc adjustments.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 113.6 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2010 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @todo @code #include <iprt/param.h> @endcode for PAGE_SIZE. */
33/** @def RT_INLINE_ASM_USES_INTRIN
34 * Defined as 1 if we're using a _MSC_VER 1400.
35 * Otherwise defined as 0.
36 */
37
38/* Solaris 10 header ugliness */
39#ifdef u
40# undef u
41#endif
42
43#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
44# include <intrin.h>
45 /* Emit the intrinsics at all optimization levels. */
46# pragma intrinsic(_ReadWriteBarrier)
47# pragma intrinsic(__cpuid)
48# pragma intrinsic(__stosd)
49# pragma intrinsic(__stosw)
50# pragma intrinsic(__stosb)
51# pragma intrinsic(_BitScanForward)
52# pragma intrinsic(_BitScanReverse)
53# pragma intrinsic(_bittest)
54# pragma intrinsic(_bittestandset)
55# pragma intrinsic(_bittestandreset)
56# pragma intrinsic(_bittestandcomplement)
57# pragma intrinsic(_byteswap_ushort)
58# pragma intrinsic(_byteswap_ulong)
59# pragma intrinsic(_interlockedbittestandset)
60# pragma intrinsic(_interlockedbittestandreset)
61# pragma intrinsic(_InterlockedAnd)
62# pragma intrinsic(_InterlockedOr)
63# pragma intrinsic(_InterlockedIncrement)
64# pragma intrinsic(_InterlockedDecrement)
65# pragma intrinsic(_InterlockedExchange)
66# pragma intrinsic(_InterlockedExchangeAdd)
67# pragma intrinsic(_InterlockedCompareExchange)
68# pragma intrinsic(_InterlockedCompareExchange64)
69# ifdef RT_ARCH_AMD64
70# pragma intrinsic(__stosq)
71# pragma intrinsic(_byteswap_uint64)
72# pragma intrinsic(_InterlockedExchange64)
73# endif
74#endif
75
76
77/** @defgroup grp_rt_asm ASM - Assembly Routines
78 * @ingroup grp_rt
79 *
80 * @remarks The difference between ordered and unordered atomic operations are that
81 * the former will complete outstanding reads and writes before continuing
82 * while the latter doesn't make any promisses about the order. Ordered
83 * operations doesn't, it seems, make any 100% promise wrt to whether
84 * the operation will complete before any subsequent memory access.
85 * (please, correct if wrong.)
86 *
87 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
88 * are unordered (note the Uo).
89 *
90 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
91 * or even optimize assembler instructions away. For instance, in the following code
92 * the second rdmsr instruction is optimized away because gcc treats that instruction
93 * as deterministic:
94 *
95 * @code
96 * static inline uint64_t rdmsr_low(int idx)
97 * {
98 * uint32_t low;
99 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
100 * }
101 * ...
102 * uint32_t msr1 = rdmsr_low(1);
103 * foo(msr1);
104 * msr1 = rdmsr_low(1);
105 * bar(msr1);
106 * @endcode
107 *
108 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
109 * use the result of the first call as input parameter for bar() as well. For rdmsr this
110 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
111 * machine status information in general.
112 *
113 * @{
114 */
115
116
117/** @def RT_INLINE_ASM_GCC_4_3_X_X86
118 * Used to work around some 4.3.x register allocation issues in this version of
119 * the compiler. So far this workaround is still required for 4.4 and 4.5. */
120#ifdef __GNUC__
121# define RT_INLINE_ASM_GCC_4_3_X_X86 (__GNUC__ == 4 && __GNUC_MINOR__ >= 3 && defined(__i386__))
122#endif
123#ifndef RT_INLINE_ASM_GCC_4_3_X_X86
124# define RT_INLINE_ASM_GCC_4_3_X_X86 0
125#endif
126
127/** @def RT_INLINE_DONT_USE_CMPXCHG8B
128 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
129 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
130 * mode, x86.
131 *
132 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
133 * when in PIC mode on x86.
134 */
135#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
136# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC \
137 ( (defined(PIC) || defined(__PIC__)) \
138 && defined(RT_ARCH_X86) \
139 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
140 || defined(RT_OS_DARWIN)) )
141#endif
142
143
144/** @def ASMReturnAddress
145 * Gets the return address of the current (or calling if you like) function or method.
146 */
147#ifdef _MSC_VER
148# ifdef __cplusplus
149extern "C"
150# endif
151void * _ReturnAddress(void);
152# pragma intrinsic(_ReturnAddress)
153# define ASMReturnAddress() _ReturnAddress()
154#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
155# define ASMReturnAddress() __builtin_return_address(0)
156#else
157# error "Unsupported compiler."
158#endif
159
160
161/**
162 * Compiler memory barrier.
163 *
164 * Ensure that the compiler does not use any cached (register/tmp stack) memory
165 * values or any outstanding writes when returning from this function.
166 *
167 * This function must be used if non-volatile data is modified by a
168 * device or the VMM. Typical cases are port access, MMIO access,
169 * trapping instruction, etc.
170 */
171#if RT_INLINE_ASM_GNU_STYLE
172# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
173#elif RT_INLINE_ASM_USES_INTRIN
174# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
175#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
176DECLINLINE(void) ASMCompilerBarrier(void)
177{
178 __asm
179 {
180 }
181}
182#endif
183
184
185
186/**
187 * Atomically Exchange an unsigned 8-bit value, ordered.
188 *
189 * @returns Current *pu8 value
190 * @param pu8 Pointer to the 8-bit variable to update.
191 * @param u8 The 8-bit value to assign to *pu8.
192 */
193#if RT_INLINE_ASM_EXTERNAL
194DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
195#else
196DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
197{
198# if RT_INLINE_ASM_GNU_STYLE
199 __asm__ __volatile__("xchgb %0, %1\n\t"
200 : "=m" (*pu8),
201 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
202 : "1" (u8),
203 "m" (*pu8));
204# else
205 __asm
206 {
207# ifdef RT_ARCH_AMD64
208 mov rdx, [pu8]
209 mov al, [u8]
210 xchg [rdx], al
211 mov [u8], al
212# else
213 mov edx, [pu8]
214 mov al, [u8]
215 xchg [edx], al
216 mov [u8], al
217# endif
218 }
219# endif
220 return u8;
221}
222#endif
223
224
225/**
226 * Atomically Exchange a signed 8-bit value, ordered.
227 *
228 * @returns Current *pu8 value
229 * @param pi8 Pointer to the 8-bit variable to update.
230 * @param i8 The 8-bit value to assign to *pi8.
231 */
232DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
233{
234 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
235}
236
237
238/**
239 * Atomically Exchange a bool value, ordered.
240 *
241 * @returns Current *pf value
242 * @param pf Pointer to the 8-bit variable to update.
243 * @param f The 8-bit value to assign to *pi8.
244 */
245DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
246{
247#ifdef _MSC_VER
248 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
249#else
250 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
251#endif
252}
253
254
255/**
256 * Atomically Exchange an unsigned 16-bit value, ordered.
257 *
258 * @returns Current *pu16 value
259 * @param pu16 Pointer to the 16-bit variable to update.
260 * @param u16 The 16-bit value to assign to *pu16.
261 */
262#if RT_INLINE_ASM_EXTERNAL
263DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
264#else
265DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
266{
267# if RT_INLINE_ASM_GNU_STYLE
268 __asm__ __volatile__("xchgw %0, %1\n\t"
269 : "=m" (*pu16),
270 "=r" (u16)
271 : "1" (u16),
272 "m" (*pu16));
273# else
274 __asm
275 {
276# ifdef RT_ARCH_AMD64
277 mov rdx, [pu16]
278 mov ax, [u16]
279 xchg [rdx], ax
280 mov [u16], ax
281# else
282 mov edx, [pu16]
283 mov ax, [u16]
284 xchg [edx], ax
285 mov [u16], ax
286# endif
287 }
288# endif
289 return u16;
290}
291#endif
292
293
294/**
295 * Atomically Exchange a signed 16-bit value, ordered.
296 *
297 * @returns Current *pu16 value
298 * @param pi16 Pointer to the 16-bit variable to update.
299 * @param i16 The 16-bit value to assign to *pi16.
300 */
301DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
302{
303 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
304}
305
306
307/**
308 * Atomically Exchange an unsigned 32-bit value, ordered.
309 *
310 * @returns Current *pu32 value
311 * @param pu32 Pointer to the 32-bit variable to update.
312 * @param u32 The 32-bit value to assign to *pu32.
313 */
314#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
315DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
316#else
317DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
318{
319# if RT_INLINE_ASM_GNU_STYLE
320 __asm__ __volatile__("xchgl %0, %1\n\t"
321 : "=m" (*pu32),
322 "=r" (u32)
323 : "1" (u32),
324 "m" (*pu32));
325
326# elif RT_INLINE_ASM_USES_INTRIN
327 u32 = _InterlockedExchange((long *)pu32, u32);
328
329# else
330 __asm
331 {
332# ifdef RT_ARCH_AMD64
333 mov rdx, [pu32]
334 mov eax, u32
335 xchg [rdx], eax
336 mov [u32], eax
337# else
338 mov edx, [pu32]
339 mov eax, u32
340 xchg [edx], eax
341 mov [u32], eax
342# endif
343 }
344# endif
345 return u32;
346}
347#endif
348
349
350/**
351 * Atomically Exchange a signed 32-bit value, ordered.
352 *
353 * @returns Current *pu32 value
354 * @param pi32 Pointer to the 32-bit variable to update.
355 * @param i32 The 32-bit value to assign to *pi32.
356 */
357DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
358{
359 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
360}
361
362
363/**
364 * Atomically Exchange an unsigned 64-bit value, ordered.
365 *
366 * @returns Current *pu64 value
367 * @param pu64 Pointer to the 64-bit variable to update.
368 * @param u64 The 64-bit value to assign to *pu64.
369 */
370#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
371 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
372DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
373#else
374DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
375{
376# if defined(RT_ARCH_AMD64)
377# if RT_INLINE_ASM_USES_INTRIN
378 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
379
380# elif RT_INLINE_ASM_GNU_STYLE
381 __asm__ __volatile__("xchgq %0, %1\n\t"
382 : "=m" (*pu64),
383 "=r" (u64)
384 : "1" (u64),
385 "m" (*pu64));
386# else
387 __asm
388 {
389 mov rdx, [pu64]
390 mov rax, [u64]
391 xchg [rdx], rax
392 mov [u64], rax
393 }
394# endif
395# else /* !RT_ARCH_AMD64 */
396# if RT_INLINE_ASM_GNU_STYLE
397# if defined(PIC) || defined(__PIC__)
398 uint32_t u32EBX = (uint32_t)u64;
399 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
400 "xchgl %%ebx, %3\n\t"
401 "1:\n\t"
402 "lock; cmpxchg8b (%5)\n\t"
403 "jnz 1b\n\t"
404 "movl %3, %%ebx\n\t"
405 /*"xchgl %%esi, %5\n\t"*/
406 : "=A" (u64),
407 "=m" (*pu64)
408 : "0" (*pu64),
409 "m" ( u32EBX ),
410 "c" ( (uint32_t)(u64 >> 32) ),
411 "S" (pu64));
412# else /* !PIC */
413 __asm__ __volatile__("1:\n\t"
414 "lock; cmpxchg8b %1\n\t"
415 "jnz 1b\n\t"
416 : "=A" (u64),
417 "=m" (*pu64)
418 : "0" (*pu64),
419 "b" ( (uint32_t)u64 ),
420 "c" ( (uint32_t)(u64 >> 32) ));
421# endif
422# else
423 __asm
424 {
425 mov ebx, dword ptr [u64]
426 mov ecx, dword ptr [u64 + 4]
427 mov edi, pu64
428 mov eax, dword ptr [edi]
429 mov edx, dword ptr [edi + 4]
430 retry:
431 lock cmpxchg8b [edi]
432 jnz retry
433 mov dword ptr [u64], eax
434 mov dword ptr [u64 + 4], edx
435 }
436# endif
437# endif /* !RT_ARCH_AMD64 */
438 return u64;
439}
440#endif
441
442
443/**
444 * Atomically Exchange an signed 64-bit value, ordered.
445 *
446 * @returns Current *pi64 value
447 * @param pi64 Pointer to the 64-bit variable to update.
448 * @param i64 The 64-bit value to assign to *pi64.
449 */
450DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
451{
452 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
453}
454
455
456/**
457 * Atomically Exchange a pointer value, ordered.
458 *
459 * @returns Current *ppv value
460 * @param ppv Pointer to the pointer variable to update.
461 * @param pv The pointer value to assign to *ppv.
462 */
463DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
464{
465#if ARCH_BITS == 32
466 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
467#elif ARCH_BITS == 64
468 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
469#else
470# error "ARCH_BITS is bogus"
471#endif
472}
473
474
475/**
476 * Atomically Exchange a raw-mode context pointer value, ordered.
477 *
478 * @returns Current *ppv value
479 * @param ppvRC Pointer to the pointer variable to update.
480 * @param pvRC The pointer value to assign to *ppv.
481 */
482DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
483{
484 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
485}
486
487
488/**
489 * Atomically Exchange a ring-0 pointer value, ordered.
490 *
491 * @returns Current *ppv value
492 * @param ppvR0 Pointer to the pointer variable to update.
493 * @param pvR0 The pointer value to assign to *ppv.
494 */
495DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
496{
497#if R0_ARCH_BITS == 32
498 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
499#elif R0_ARCH_BITS == 64
500 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
501#else
502# error "R0_ARCH_BITS is bogus"
503#endif
504}
505
506
507/**
508 * Atomically Exchange a ring-3 pointer value, ordered.
509 *
510 * @returns Current *ppv value
511 * @param ppvR3 Pointer to the pointer variable to update.
512 * @param pvR3 The pointer value to assign to *ppv.
513 */
514DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
515{
516#if R3_ARCH_BITS == 32
517 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
518#elif R3_ARCH_BITS == 64
519 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
520#else
521# error "R3_ARCH_BITS is bogus"
522#endif
523}
524
525
526/** @def ASMAtomicXchgHandle
527 * Atomically Exchange a typical IPRT handle value, ordered.
528 *
529 * @param ph Pointer to the value to update.
530 * @param hNew The new value to assigned to *pu.
531 * @param phRes Where to store the current *ph value.
532 *
533 * @remarks This doesn't currently work for all handles (like RTFILE).
534 */
535#if HC_ARCH_BITS == 32
536# define ASMAtomicXchgHandle(ph, hNew, phRes) \
537 do { \
538 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
539 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
540 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
541 } while (0)
542#elif HC_ARCH_BITS == 64
543# define ASMAtomicXchgHandle(ph, hNew, phRes) \
544 do { \
545 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
546 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
547 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
548 } while (0)
549#else
550# error HC_ARCH_BITS
551#endif
552
553
554/**
555 * Atomically Exchange a value which size might differ
556 * between platforms or compilers, ordered.
557 *
558 * @param pu Pointer to the variable to update.
559 * @param uNew The value to assign to *pu.
560 * @todo This is busted as its missing the result argument.
561 */
562#define ASMAtomicXchgSize(pu, uNew) \
563 do { \
564 switch (sizeof(*(pu))) { \
565 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
566 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
567 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
568 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
569 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
570 } \
571 } while (0)
572
573/**
574 * Atomically Exchange a value which size might differ
575 * between platforms or compilers, ordered.
576 *
577 * @param pu Pointer to the variable to update.
578 * @param uNew The value to assign to *pu.
579 * @param puRes Where to store the current *pu value.
580 */
581#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
582 do { \
583 switch (sizeof(*(pu))) { \
584 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
585 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
586 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
587 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
588 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
589 } \
590 } while (0)
591
592
593
594/**
595 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
596 *
597 * @returns true if xchg was done.
598 * @returns false if xchg wasn't done.
599 *
600 * @param pu8 Pointer to the value to update.
601 * @param u8New The new value to assigned to *pu8.
602 * @param u8Old The old value to *pu8 compare with.
603 */
604#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
605DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, const uint8_t u8Old);
606#else
607DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, uint8_t u8Old)
608{
609 uint8_t u8Ret;
610 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
611 "setz %1\n\t"
612 : "=m" (*pu8),
613 "=qm" (u8Ret),
614 "=a" (u8Old)
615 : "q" (u8New),
616 "2" (u8Old),
617 "m" (*pu8));
618 return (bool)u8Ret;
619}
620#endif
621
622
623/**
624 * Atomically Compare and Exchange a signed 8-bit value, ordered.
625 *
626 * @returns true if xchg was done.
627 * @returns false if xchg wasn't done.
628 *
629 * @param pi8 Pointer to the value to update.
630 * @param i8New The new value to assigned to *pi8.
631 * @param i8Old The old value to *pi8 compare with.
632 */
633DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t *pi8, const int8_t i8New, const int8_t i8Old)
634{
635 return ASMAtomicCmpXchgU8((volatile uint8_t *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old);
636}
637
638
639/**
640 * Atomically Compare and Exchange a bool value, ordered.
641 *
642 * @returns true if xchg was done.
643 * @returns false if xchg wasn't done.
644 *
645 * @param pf Pointer to the value to update.
646 * @param fNew The new value to assigned to *pf.
647 * @param fOld The old value to *pf compare with.
648 */
649DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool *pf, const bool fNew, const bool fOld)
650{
651 return ASMAtomicCmpXchgU8((volatile uint8_t *)pf, (const uint8_t)fNew, (const uint8_t)fOld);
652}
653
654
655/**
656 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
657 *
658 * @returns true if xchg was done.
659 * @returns false if xchg wasn't done.
660 *
661 * @param pu32 Pointer to the value to update.
662 * @param u32New The new value to assigned to *pu32.
663 * @param u32Old The old value to *pu32 compare with.
664 */
665#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
666DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
667#else
668DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
669{
670# if RT_INLINE_ASM_GNU_STYLE
671 uint8_t u8Ret;
672 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
673 "setz %1\n\t"
674 : "=m" (*pu32),
675 "=qm" (u8Ret),
676 "=a" (u32Old)
677 : "r" (u32New),
678 "2" (u32Old),
679 "m" (*pu32));
680 return (bool)u8Ret;
681
682# elif RT_INLINE_ASM_USES_INTRIN
683 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
684
685# else
686 uint32_t u32Ret;
687 __asm
688 {
689# ifdef RT_ARCH_AMD64
690 mov rdx, [pu32]
691# else
692 mov edx, [pu32]
693# endif
694 mov eax, [u32Old]
695 mov ecx, [u32New]
696# ifdef RT_ARCH_AMD64
697 lock cmpxchg [rdx], ecx
698# else
699 lock cmpxchg [edx], ecx
700# endif
701 setz al
702 movzx eax, al
703 mov [u32Ret], eax
704 }
705 return !!u32Ret;
706# endif
707}
708#endif
709
710
711/**
712 * Atomically Compare and Exchange a signed 32-bit value, ordered.
713 *
714 * @returns true if xchg was done.
715 * @returns false if xchg wasn't done.
716 *
717 * @param pi32 Pointer to the value to update.
718 * @param i32New The new value to assigned to *pi32.
719 * @param i32Old The old value to *pi32 compare with.
720 */
721DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
722{
723 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
724}
725
726
727/**
728 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
729 *
730 * @returns true if xchg was done.
731 * @returns false if xchg wasn't done.
732 *
733 * @param pu64 Pointer to the 64-bit variable to update.
734 * @param u64New The 64-bit value to assign to *pu64.
735 * @param u64Old The value to compare with.
736 */
737#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
738 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
739DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
740#else
741DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, uint64_t u64New, uint64_t u64Old)
742{
743# if RT_INLINE_ASM_USES_INTRIN
744 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
745
746# elif defined(RT_ARCH_AMD64)
747# if RT_INLINE_ASM_GNU_STYLE
748 uint8_t u8Ret;
749 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
750 "setz %1\n\t"
751 : "=m" (*pu64),
752 "=qm" (u8Ret),
753 "=a" (u64Old)
754 : "r" (u64New),
755 "2" (u64Old),
756 "m" (*pu64));
757 return (bool)u8Ret;
758# else
759 bool fRet;
760 __asm
761 {
762 mov rdx, [pu32]
763 mov rax, [u64Old]
764 mov rcx, [u64New]
765 lock cmpxchg [rdx], rcx
766 setz al
767 mov [fRet], al
768 }
769 return fRet;
770# endif
771# else /* !RT_ARCH_AMD64 */
772 uint32_t u32Ret;
773# if RT_INLINE_ASM_GNU_STYLE
774# if defined(PIC) || defined(__PIC__)
775 uint32_t u32EBX = (uint32_t)u64New;
776 uint32_t u32Spill;
777 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
778 "lock; cmpxchg8b (%6)\n\t"
779 "setz %%al\n\t"
780 "movl %4, %%ebx\n\t"
781 "movzbl %%al, %%eax\n\t"
782 : "=a" (u32Ret),
783 "=d" (u32Spill),
784# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
785 "+m" (*pu64)
786# else
787 "=m" (*pu64)
788# endif
789 : "A" (u64Old),
790 "m" ( u32EBX ),
791 "c" ( (uint32_t)(u64New >> 32) ),
792 "S" (pu64));
793# else /* !PIC */
794 uint32_t u32Spill;
795 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
796 "setz %%al\n\t"
797 "movzbl %%al, %%eax\n\t"
798 : "=a" (u32Ret),
799 "=d" (u32Spill),
800 "+m" (*pu64)
801 : "A" (u64Old),
802 "b" ( (uint32_t)u64New ),
803 "c" ( (uint32_t)(u64New >> 32) ));
804# endif
805 return (bool)u32Ret;
806# else
807 __asm
808 {
809 mov ebx, dword ptr [u64New]
810 mov ecx, dword ptr [u64New + 4]
811 mov edi, [pu64]
812 mov eax, dword ptr [u64Old]
813 mov edx, dword ptr [u64Old + 4]
814 lock cmpxchg8b [edi]
815 setz al
816 movzx eax, al
817 mov dword ptr [u32Ret], eax
818 }
819 return !!u32Ret;
820# endif
821# endif /* !RT_ARCH_AMD64 */
822}
823#endif
824
825
826/**
827 * Atomically Compare and exchange a signed 64-bit value, ordered.
828 *
829 * @returns true if xchg was done.
830 * @returns false if xchg wasn't done.
831 *
832 * @param pi64 Pointer to the 64-bit variable to update.
833 * @param i64 The 64-bit value to assign to *pu64.
834 * @param i64Old The value to compare with.
835 */
836DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
837{
838 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
839}
840
841
842/**
843 * Atomically Compare and Exchange a pointer value, ordered.
844 *
845 * @returns true if xchg was done.
846 * @returns false if xchg wasn't done.
847 *
848 * @param ppv Pointer to the value to update.
849 * @param pvNew The new value to assigned to *ppv.
850 * @param pvOld The old value to *ppv compare with.
851 */
852DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, const void *pvNew, const void *pvOld)
853{
854#if ARCH_BITS == 32
855 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
856#elif ARCH_BITS == 64
857 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
858#else
859# error "ARCH_BITS is bogus"
860#endif
861}
862
863
864/** @def ASMAtomicCmpXchgHandle
865 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
866 *
867 * @param ph Pointer to the value to update.
868 * @param hNew The new value to assigned to *pu.
869 * @param hOld The old value to *pu compare with.
870 * @param fRc Where to store the result.
871 *
872 * @remarks This doesn't currently work for all handles (like RTFILE).
873 */
874#if HC_ARCH_BITS == 32
875# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
876 do { \
877 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
878 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
879 } while (0)
880#elif HC_ARCH_BITS == 64
881# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
882 do { \
883 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
884 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
885 } while (0)
886#else
887# error HC_ARCH_BITS
888#endif
889
890
891/** @def ASMAtomicCmpXchgSize
892 * Atomically Compare and Exchange a value which size might differ
893 * between platforms or compilers, ordered.
894 *
895 * @param pu Pointer to the value to update.
896 * @param uNew The new value to assigned to *pu.
897 * @param uOld The old value to *pu compare with.
898 * @param fRc Where to store the result.
899 */
900#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
901 do { \
902 switch (sizeof(*(pu))) { \
903 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
904 break; \
905 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
906 break; \
907 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
908 (fRc) = false; \
909 break; \
910 } \
911 } while (0)
912
913
914/**
915 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
916 * passes back old value, ordered.
917 *
918 * @returns true if xchg was done.
919 * @returns false if xchg wasn't done.
920 *
921 * @param pu32 Pointer to the value to update.
922 * @param u32New The new value to assigned to *pu32.
923 * @param u32Old The old value to *pu32 compare with.
924 * @param pu32Old Pointer store the old value at.
925 */
926#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
927DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
928#else
929DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
930{
931# if RT_INLINE_ASM_GNU_STYLE
932 uint8_t u8Ret;
933 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
934 "setz %1\n\t"
935 : "=m" (*pu32),
936 "=qm" (u8Ret),
937 "=a" (*pu32Old)
938 : "r" (u32New),
939 "a" (u32Old),
940 "m" (*pu32));
941 return (bool)u8Ret;
942
943# elif RT_INLINE_ASM_USES_INTRIN
944 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
945
946# else
947 uint32_t u32Ret;
948 __asm
949 {
950# ifdef RT_ARCH_AMD64
951 mov rdx, [pu32]
952# else
953 mov edx, [pu32]
954# endif
955 mov eax, [u32Old]
956 mov ecx, [u32New]
957# ifdef RT_ARCH_AMD64
958 lock cmpxchg [rdx], ecx
959 mov rdx, [pu32Old]
960 mov [rdx], eax
961# else
962 lock cmpxchg [edx], ecx
963 mov edx, [pu32Old]
964 mov [edx], eax
965# endif
966 setz al
967 movzx eax, al
968 mov [u32Ret], eax
969 }
970 return !!u32Ret;
971# endif
972}
973#endif
974
975
976/**
977 * Atomically Compare and Exchange a signed 32-bit value, additionally
978 * passes back old value, ordered.
979 *
980 * @returns true if xchg was done.
981 * @returns false if xchg wasn't done.
982 *
983 * @param pi32 Pointer to the value to update.
984 * @param i32New The new value to assigned to *pi32.
985 * @param i32Old The old value to *pi32 compare with.
986 * @param pi32Old Pointer store the old value at.
987 */
988DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
989{
990 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
991}
992
993
994/**
995 * Atomically Compare and exchange an unsigned 64-bit value, additionally
996 * passing back old value, ordered.
997 *
998 * @returns true if xchg was done.
999 * @returns false if xchg wasn't done.
1000 *
1001 * @param pu64 Pointer to the 64-bit variable to update.
1002 * @param u64New The 64-bit value to assign to *pu64.
1003 * @param u64Old The value to compare with.
1004 * @param pu64Old Pointer store the old value at.
1005 */
1006#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1007 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1008DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
1009#else
1010DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
1011{
1012# if RT_INLINE_ASM_USES_INTRIN
1013 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
1014
1015# elif defined(RT_ARCH_AMD64)
1016# if RT_INLINE_ASM_GNU_STYLE
1017 uint8_t u8Ret;
1018 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1019 "setz %1\n\t"
1020 : "=m" (*pu64),
1021 "=qm" (u8Ret),
1022 "=a" (*pu64Old)
1023 : "r" (u64New),
1024 "a" (u64Old),
1025 "m" (*pu64));
1026 return (bool)u8Ret;
1027# else
1028 bool fRet;
1029 __asm
1030 {
1031 mov rdx, [pu32]
1032 mov rax, [u64Old]
1033 mov rcx, [u64New]
1034 lock cmpxchg [rdx], rcx
1035 mov rdx, [pu64Old]
1036 mov [rdx], rax
1037 setz al
1038 mov [fRet], al
1039 }
1040 return fRet;
1041# endif
1042# else /* !RT_ARCH_AMD64 */
1043# if RT_INLINE_ASM_GNU_STYLE
1044 uint64_t u64Ret;
1045# if defined(PIC) || defined(__PIC__)
1046 /* NB: this code uses a memory clobber description, because the clean
1047 * solution with an output value for *pu64 makes gcc run out of registers.
1048 * This will cause suboptimal code, and anyone with a better solution is
1049 * welcome to improve this. */
1050 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1051 "lock; cmpxchg8b %3\n\t"
1052 "xchgl %%ebx, %1\n\t"
1053 : "=A" (u64Ret)
1054 : "DS" ((uint32_t)u64New),
1055 "c" ((uint32_t)(u64New >> 32)),
1056 "m" (*pu64),
1057 "0" (u64Old)
1058 : "memory" );
1059# else /* !PIC */
1060 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1061 : "=A" (u64Ret),
1062 "=m" (*pu64)
1063 : "b" ((uint32_t)u64New),
1064 "c" ((uint32_t)(u64New >> 32)),
1065 "m" (*pu64),
1066 "0" (u64Old));
1067# endif
1068 *pu64Old = u64Ret;
1069 return u64Ret == u64Old;
1070# else
1071 uint32_t u32Ret;
1072 __asm
1073 {
1074 mov ebx, dword ptr [u64New]
1075 mov ecx, dword ptr [u64New + 4]
1076 mov edi, [pu64]
1077 mov eax, dword ptr [u64Old]
1078 mov edx, dword ptr [u64Old + 4]
1079 lock cmpxchg8b [edi]
1080 mov ebx, [pu64Old]
1081 mov [ebx], eax
1082 setz al
1083 movzx eax, al
1084 add ebx, 4
1085 mov [ebx], edx
1086 mov dword ptr [u32Ret], eax
1087 }
1088 return !!u32Ret;
1089# endif
1090# endif /* !RT_ARCH_AMD64 */
1091}
1092#endif
1093
1094
1095/**
1096 * Atomically Compare and exchange a signed 64-bit value, additionally
1097 * passing back old value, ordered.
1098 *
1099 * @returns true if xchg was done.
1100 * @returns false if xchg wasn't done.
1101 *
1102 * @param pi64 Pointer to the 64-bit variable to update.
1103 * @param i64 The 64-bit value to assign to *pu64.
1104 * @param i64Old The value to compare with.
1105 * @param pi64Old Pointer store the old value at.
1106 */
1107DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
1108{
1109 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
1110}
1111
1112/** @def ASMAtomicCmpXchgExHandle
1113 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1114 *
1115 * @param ph Pointer to the value to update.
1116 * @param hNew The new value to assigned to *pu.
1117 * @param hOld The old value to *pu compare with.
1118 * @param fRc Where to store the result.
1119 * @param phOldVal Pointer to where to store the old value.
1120 *
1121 * @remarks This doesn't currently work for all handles (like RTFILE).
1122 */
1123#if HC_ARCH_BITS == 32
1124# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1125 do { \
1126 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1127 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1128 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
1129 } while (0)
1130#elif HC_ARCH_BITS == 64
1131# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1132 do { \
1133 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1134 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1135 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
1136 } while (0)
1137#else
1138# error HC_ARCH_BITS
1139#endif
1140
1141
1142/** @def ASMAtomicCmpXchgExSize
1143 * Atomically Compare and Exchange a value which size might differ
1144 * between platforms or compilers. Additionally passes back old value.
1145 *
1146 * @param pu Pointer to the value to update.
1147 * @param uNew The new value to assigned to *pu.
1148 * @param uOld The old value to *pu compare with.
1149 * @param fRc Where to store the result.
1150 * @param puOldVal Pointer to where to store the old value.
1151 */
1152#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1153 do { \
1154 switch (sizeof(*(pu))) { \
1155 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
1156 break; \
1157 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
1158 break; \
1159 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1160 (fRc) = false; \
1161 (uOldVal) = 0; \
1162 break; \
1163 } \
1164 } while (0)
1165
1166
1167/**
1168 * Atomically Compare and Exchange a pointer value, additionally
1169 * passing back old value, ordered.
1170 *
1171 * @returns true if xchg was done.
1172 * @returns false if xchg wasn't done.
1173 *
1174 * @param ppv Pointer to the value to update.
1175 * @param pvNew The new value to assigned to *ppv.
1176 * @param pvOld The old value to *ppv compare with.
1177 * @param ppvOld Pointer store the old value at.
1178 */
1179DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
1180{
1181#if ARCH_BITS == 32
1182 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
1183#elif ARCH_BITS == 64
1184 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
1185#else
1186# error "ARCH_BITS is bogus"
1187#endif
1188}
1189
1190
1191/**
1192 * Atomically exchanges and adds to a 32-bit value, ordered.
1193 *
1194 * @returns The old value.
1195 * @param pu32 Pointer to the value.
1196 * @param u32 Number to add.
1197 */
1198#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1199DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
1200#else
1201DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
1202{
1203# if RT_INLINE_ASM_USES_INTRIN
1204 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
1205 return u32;
1206
1207# elif RT_INLINE_ASM_GNU_STYLE
1208 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
1209 : "=r" (u32),
1210 "=m" (*pu32)
1211 : "0" (u32),
1212 "m" (*pu32)
1213 : "memory");
1214 return u32;
1215# else
1216 __asm
1217 {
1218 mov eax, [u32]
1219# ifdef RT_ARCH_AMD64
1220 mov rdx, [pu32]
1221 lock xadd [rdx], eax
1222# else
1223 mov edx, [pu32]
1224 lock xadd [edx], eax
1225# endif
1226 mov [u32], eax
1227 }
1228 return u32;
1229# endif
1230}
1231#endif
1232
1233
1234/**
1235 * Atomically exchanges and adds to a signed 32-bit value, ordered.
1236 *
1237 * @returns The old value.
1238 * @param pi32 Pointer to the value.
1239 * @param i32 Number to add.
1240 */
1241DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
1242{
1243 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
1244}
1245
1246
1247/**
1248 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
1249 *
1250 * @returns The old value.
1251 * @param pu32 Pointer to the value.
1252 * @param u32 Number to subtract.
1253 */
1254DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile *pu32, uint32_t u32)
1255{
1256 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
1257}
1258
1259
1260/**
1261 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
1262 *
1263 * @returns The old value.
1264 * @param pi32 Pointer to the value.
1265 * @param i32 Number to subtract.
1266 */
1267DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
1268{
1269 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
1270}
1271
1272
1273/**
1274 * Atomically increment a 32-bit value, ordered.
1275 *
1276 * @returns The new value.
1277 * @param pu32 Pointer to the value to increment.
1278 */
1279#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1280DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
1281#else
1282DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
1283{
1284 uint32_t u32;
1285# if RT_INLINE_ASM_USES_INTRIN
1286 u32 = _InterlockedIncrement((long *)pu32);
1287 return u32;
1288
1289# elif RT_INLINE_ASM_GNU_STYLE
1290 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
1291 : "=r" (u32),
1292 "=m" (*pu32)
1293 : "0" (1),
1294 "m" (*pu32)
1295 : "memory");
1296 return u32+1;
1297# else
1298 __asm
1299 {
1300 mov eax, 1
1301# ifdef RT_ARCH_AMD64
1302 mov rdx, [pu32]
1303 lock xadd [rdx], eax
1304# else
1305 mov edx, [pu32]
1306 lock xadd [edx], eax
1307# endif
1308 mov u32, eax
1309 }
1310 return u32+1;
1311# endif
1312}
1313#endif
1314
1315
1316/**
1317 * Atomically increment a signed 32-bit value, ordered.
1318 *
1319 * @returns The new value.
1320 * @param pi32 Pointer to the value to increment.
1321 */
1322DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
1323{
1324 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
1325}
1326
1327
1328/**
1329 * Atomically decrement an unsigned 32-bit value, ordered.
1330 *
1331 * @returns The new value.
1332 * @param pu32 Pointer to the value to decrement.
1333 */
1334#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1335DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
1336#else
1337DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
1338{
1339 uint32_t u32;
1340# if RT_INLINE_ASM_USES_INTRIN
1341 u32 = _InterlockedDecrement((long *)pu32);
1342 return u32;
1343
1344# elif RT_INLINE_ASM_GNU_STYLE
1345 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
1346 : "=r" (u32),
1347 "=m" (*pu32)
1348 : "0" (-1),
1349 "m" (*pu32)
1350 : "memory");
1351 return u32-1;
1352# else
1353 __asm
1354 {
1355 mov eax, -1
1356# ifdef RT_ARCH_AMD64
1357 mov rdx, [pu32]
1358 lock xadd [rdx], eax
1359# else
1360 mov edx, [pu32]
1361 lock xadd [edx], eax
1362# endif
1363 mov u32, eax
1364 }
1365 return u32-1;
1366# endif
1367}
1368#endif
1369
1370
1371/**
1372 * Atomically decrement a signed 32-bit value, ordered.
1373 *
1374 * @returns The new value.
1375 * @param pi32 Pointer to the value to decrement.
1376 */
1377DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
1378{
1379 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
1380}
1381
1382
1383/**
1384 * Atomically Or an unsigned 32-bit value, ordered.
1385 *
1386 * @param pu32 Pointer to the pointer variable to OR u32 with.
1387 * @param u32 The value to OR *pu32 with.
1388 */
1389#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1390DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
1391#else
1392DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
1393{
1394# if RT_INLINE_ASM_USES_INTRIN
1395 _InterlockedOr((long volatile *)pu32, (long)u32);
1396
1397# elif RT_INLINE_ASM_GNU_STYLE
1398 __asm__ __volatile__("lock; orl %1, %0\n\t"
1399 : "=m" (*pu32)
1400 : "ir" (u32),
1401 "m" (*pu32));
1402# else
1403 __asm
1404 {
1405 mov eax, [u32]
1406# ifdef RT_ARCH_AMD64
1407 mov rdx, [pu32]
1408 lock or [rdx], eax
1409# else
1410 mov edx, [pu32]
1411 lock or [edx], eax
1412# endif
1413 }
1414# endif
1415}
1416#endif
1417
1418
1419/**
1420 * Atomically Or a signed 32-bit value, ordered.
1421 *
1422 * @param pi32 Pointer to the pointer variable to OR u32 with.
1423 * @param i32 The value to OR *pu32 with.
1424 */
1425DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
1426{
1427 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
1428}
1429
1430
1431/**
1432 * Atomically And an unsigned 32-bit value, ordered.
1433 *
1434 * @param pu32 Pointer to the pointer variable to AND u32 with.
1435 * @param u32 The value to AND *pu32 with.
1436 */
1437#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1438DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
1439#else
1440DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
1441{
1442# if RT_INLINE_ASM_USES_INTRIN
1443 _InterlockedAnd((long volatile *)pu32, u32);
1444
1445# elif RT_INLINE_ASM_GNU_STYLE
1446 __asm__ __volatile__("lock; andl %1, %0\n\t"
1447 : "=m" (*pu32)
1448 : "ir" (u32),
1449 "m" (*pu32));
1450# else
1451 __asm
1452 {
1453 mov eax, [u32]
1454# ifdef RT_ARCH_AMD64
1455 mov rdx, [pu32]
1456 lock and [rdx], eax
1457# else
1458 mov edx, [pu32]
1459 lock and [edx], eax
1460# endif
1461 }
1462# endif
1463}
1464#endif
1465
1466
1467/**
1468 * Atomically And a signed 32-bit value, ordered.
1469 *
1470 * @param pi32 Pointer to the pointer variable to AND i32 with.
1471 * @param i32 The value to AND *pi32 with.
1472 */
1473DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
1474{
1475 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
1476}
1477
1478
1479/**
1480 * Serialize Instruction.
1481 */
1482#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1483DECLASM(void) ASMSerializeInstruction(void);
1484#else
1485DECLINLINE(void) ASMSerializeInstruction(void)
1486{
1487# if RT_INLINE_ASM_GNU_STYLE
1488 RTCCUINTREG xAX = 0;
1489# ifdef RT_ARCH_AMD64
1490 __asm__ ("cpuid"
1491 : "=a" (xAX)
1492 : "0" (xAX)
1493 : "rbx", "rcx", "rdx");
1494# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1495 __asm__ ("push %%ebx\n\t"
1496 "cpuid\n\t"
1497 "pop %%ebx\n\t"
1498 : "=a" (xAX)
1499 : "0" (xAX)
1500 : "ecx", "edx");
1501# else
1502 __asm__ ("cpuid"
1503 : "=a" (xAX)
1504 : "0" (xAX)
1505 : "ebx", "ecx", "edx");
1506# endif
1507
1508# elif RT_INLINE_ASM_USES_INTRIN
1509 int aInfo[4];
1510 __cpuid(aInfo, 0);
1511
1512# else
1513 __asm
1514 {
1515 push ebx
1516 xor eax, eax
1517 cpuid
1518 pop ebx
1519 }
1520# endif
1521}
1522#endif
1523
1524
1525/**
1526 * Memory fence, waits for any pending writes and reads to complete.
1527 */
1528DECLINLINE(void) ASMMemoryFence(void)
1529{
1530 /** @todo use mfence? check if all cpus we care for support it. */
1531 uint32_t volatile u32;
1532 ASMAtomicXchgU32(&u32, 0);
1533}
1534
1535
1536/**
1537 * Write fence, waits for any pending writes to complete.
1538 */
1539DECLINLINE(void) ASMWriteFence(void)
1540{
1541 /** @todo use sfence? check if all cpus we care for support it. */
1542 ASMMemoryFence();
1543}
1544
1545
1546/**
1547 * Read fence, waits for any pending reads to complete.
1548 */
1549DECLINLINE(void) ASMReadFence(void)
1550{
1551 /** @todo use lfence? check if all cpus we care for support it. */
1552 ASMMemoryFence();
1553}
1554
1555
1556/**
1557 * Atomically reads an unsigned 8-bit value, ordered.
1558 *
1559 * @returns Current *pu8 value
1560 * @param pu8 Pointer to the 8-bit variable to read.
1561 */
1562DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
1563{
1564 ASMMemoryFence();
1565 return *pu8; /* byte reads are atomic on x86 */
1566}
1567
1568
1569/**
1570 * Atomically reads an unsigned 8-bit value, unordered.
1571 *
1572 * @returns Current *pu8 value
1573 * @param pu8 Pointer to the 8-bit variable to read.
1574 */
1575DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
1576{
1577 return *pu8; /* byte reads are atomic on x86 */
1578}
1579
1580
1581/**
1582 * Atomically reads a signed 8-bit value, ordered.
1583 *
1584 * @returns Current *pi8 value
1585 * @param pi8 Pointer to the 8-bit variable to read.
1586 */
1587DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
1588{
1589 ASMMemoryFence();
1590 return *pi8; /* byte reads are atomic on x86 */
1591}
1592
1593
1594/**
1595 * Atomically reads a signed 8-bit value, unordered.
1596 *
1597 * @returns Current *pi8 value
1598 * @param pi8 Pointer to the 8-bit variable to read.
1599 */
1600DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
1601{
1602 return *pi8; /* byte reads are atomic on x86 */
1603}
1604
1605
1606/**
1607 * Atomically reads an unsigned 16-bit value, ordered.
1608 *
1609 * @returns Current *pu16 value
1610 * @param pu16 Pointer to the 16-bit variable to read.
1611 */
1612DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
1613{
1614 ASMMemoryFence();
1615 Assert(!((uintptr_t)pu16 & 1));
1616 return *pu16;
1617}
1618
1619
1620/**
1621 * Atomically reads an unsigned 16-bit value, unordered.
1622 *
1623 * @returns Current *pu16 value
1624 * @param pu16 Pointer to the 16-bit variable to read.
1625 */
1626DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
1627{
1628 Assert(!((uintptr_t)pu16 & 1));
1629 return *pu16;
1630}
1631
1632
1633/**
1634 * Atomically reads a signed 16-bit value, ordered.
1635 *
1636 * @returns Current *pi16 value
1637 * @param pi16 Pointer to the 16-bit variable to read.
1638 */
1639DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
1640{
1641 ASMMemoryFence();
1642 Assert(!((uintptr_t)pi16 & 1));
1643 return *pi16;
1644}
1645
1646
1647/**
1648 * Atomically reads a signed 16-bit value, unordered.
1649 *
1650 * @returns Current *pi16 value
1651 * @param pi16 Pointer to the 16-bit variable to read.
1652 */
1653DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
1654{
1655 Assert(!((uintptr_t)pi16 & 1));
1656 return *pi16;
1657}
1658
1659
1660/**
1661 * Atomically reads an unsigned 32-bit value, ordered.
1662 *
1663 * @returns Current *pu32 value
1664 * @param pu32 Pointer to the 32-bit variable to read.
1665 */
1666DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
1667{
1668 ASMMemoryFence();
1669 Assert(!((uintptr_t)pu32 & 3));
1670 return *pu32;
1671}
1672
1673
1674/**
1675 * Atomically reads an unsigned 32-bit value, unordered.
1676 *
1677 * @returns Current *pu32 value
1678 * @param pu32 Pointer to the 32-bit variable to read.
1679 */
1680DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
1681{
1682 Assert(!((uintptr_t)pu32 & 3));
1683 return *pu32;
1684}
1685
1686
1687/**
1688 * Atomically reads a signed 32-bit value, ordered.
1689 *
1690 * @returns Current *pi32 value
1691 * @param pi32 Pointer to the 32-bit variable to read.
1692 */
1693DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
1694{
1695 ASMMemoryFence();
1696 Assert(!((uintptr_t)pi32 & 3));
1697 return *pi32;
1698}
1699
1700
1701/**
1702 * Atomically reads a signed 32-bit value, unordered.
1703 *
1704 * @returns Current *pi32 value
1705 * @param pi32 Pointer to the 32-bit variable to read.
1706 */
1707DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
1708{
1709 Assert(!((uintptr_t)pi32 & 3));
1710 return *pi32;
1711}
1712
1713
1714/**
1715 * Atomically reads an unsigned 64-bit value, ordered.
1716 *
1717 * @returns Current *pu64 value
1718 * @param pu64 Pointer to the 64-bit variable to read.
1719 * The memory pointed to must be writable.
1720 * @remark This will fault if the memory is read-only!
1721 */
1722#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1723 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1724DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
1725#else
1726DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
1727{
1728 uint64_t u64;
1729# ifdef RT_ARCH_AMD64
1730 Assert(!((uintptr_t)pu64 & 7));
1731/*# if RT_INLINE_ASM_GNU_STYLE
1732 __asm__ __volatile__( "mfence\n\t"
1733 "movq %1, %0\n\t"
1734 : "=r" (u64)
1735 : "m" (*pu64));
1736# else
1737 __asm
1738 {
1739 mfence
1740 mov rdx, [pu64]
1741 mov rax, [rdx]
1742 mov [u64], rax
1743 }
1744# endif*/
1745 ASMMemoryFence();
1746 u64 = *pu64;
1747# else /* !RT_ARCH_AMD64 */
1748# if RT_INLINE_ASM_GNU_STYLE
1749# if defined(PIC) || defined(__PIC__)
1750 uint32_t u32EBX = 0;
1751 Assert(!((uintptr_t)pu64 & 7));
1752 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1753 "lock; cmpxchg8b (%5)\n\t"
1754 "movl %3, %%ebx\n\t"
1755 : "=A" (u64),
1756# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1757 "+m" (*pu64)
1758# else
1759 "=m" (*pu64)
1760# endif
1761 : "0" (0),
1762 "m" (u32EBX),
1763 "c" (0),
1764 "S" (pu64));
1765# else /* !PIC */
1766 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1767 : "=A" (u64),
1768 "+m" (*pu64)
1769 : "0" (0),
1770 "b" (0),
1771 "c" (0));
1772# endif
1773# else
1774 Assert(!((uintptr_t)pu64 & 7));
1775 __asm
1776 {
1777 xor eax, eax
1778 xor edx, edx
1779 mov edi, pu64
1780 xor ecx, ecx
1781 xor ebx, ebx
1782 lock cmpxchg8b [edi]
1783 mov dword ptr [u64], eax
1784 mov dword ptr [u64 + 4], edx
1785 }
1786# endif
1787# endif /* !RT_ARCH_AMD64 */
1788 return u64;
1789}
1790#endif
1791
1792
1793/**
1794 * Atomically reads an unsigned 64-bit value, unordered.
1795 *
1796 * @returns Current *pu64 value
1797 * @param pu64 Pointer to the 64-bit variable to read.
1798 * The memory pointed to must be writable.
1799 * @remark This will fault if the memory is read-only!
1800 */
1801#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1802 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1803DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
1804#else
1805DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
1806{
1807 uint64_t u64;
1808# ifdef RT_ARCH_AMD64
1809 Assert(!((uintptr_t)pu64 & 7));
1810/*# if RT_INLINE_ASM_GNU_STYLE
1811 Assert(!((uintptr_t)pu64 & 7));
1812 __asm__ __volatile__("movq %1, %0\n\t"
1813 : "=r" (u64)
1814 : "m" (*pu64));
1815# else
1816 __asm
1817 {
1818 mov rdx, [pu64]
1819 mov rax, [rdx]
1820 mov [u64], rax
1821 }
1822# endif */
1823 u64 = *pu64;
1824# else /* !RT_ARCH_AMD64 */
1825# if RT_INLINE_ASM_GNU_STYLE
1826# if defined(PIC) || defined(__PIC__)
1827 uint32_t u32EBX = 0;
1828 uint32_t u32Spill;
1829 Assert(!((uintptr_t)pu64 & 7));
1830 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1831 "xor %%ecx,%%ecx\n\t"
1832 "xor %%edx,%%edx\n\t"
1833 "xchgl %%ebx, %3\n\t"
1834 "lock; cmpxchg8b (%4)\n\t"
1835 "movl %3, %%ebx\n\t"
1836 : "=A" (u64),
1837# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1838 "+m" (*pu64),
1839# else
1840 "=m" (*pu64),
1841# endif
1842 "=c" (u32Spill)
1843 : "m" (u32EBX),
1844 "S" (pu64));
1845# else /* !PIC */
1846 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1847 : "=A" (u64),
1848 "+m" (*pu64)
1849 : "0" (0),
1850 "b" (0),
1851 "c" (0));
1852# endif
1853# else
1854 Assert(!((uintptr_t)pu64 & 7));
1855 __asm
1856 {
1857 xor eax, eax
1858 xor edx, edx
1859 mov edi, pu64
1860 xor ecx, ecx
1861 xor ebx, ebx
1862 lock cmpxchg8b [edi]
1863 mov dword ptr [u64], eax
1864 mov dword ptr [u64 + 4], edx
1865 }
1866# endif
1867# endif /* !RT_ARCH_AMD64 */
1868 return u64;
1869}
1870#endif
1871
1872
1873/**
1874 * Atomically reads a signed 64-bit value, ordered.
1875 *
1876 * @returns Current *pi64 value
1877 * @param pi64 Pointer to the 64-bit variable to read.
1878 * The memory pointed to must be writable.
1879 * @remark This will fault if the memory is read-only!
1880 */
1881DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
1882{
1883 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
1884}
1885
1886
1887/**
1888 * Atomically reads a signed 64-bit value, unordered.
1889 *
1890 * @returns Current *pi64 value
1891 * @param pi64 Pointer to the 64-bit variable to read.
1892 * The memory pointed to must be writable.
1893 * @remark This will fault if the memory is read-only!
1894 */
1895DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
1896{
1897 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
1898}
1899
1900
1901/**
1902 * Atomically reads a pointer value, ordered.
1903 *
1904 * @returns Current *pv value
1905 * @param ppv Pointer to the pointer variable to read.
1906 */
1907DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
1908{
1909#if ARCH_BITS == 32
1910 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
1911#elif ARCH_BITS == 64
1912 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
1913#else
1914# error "ARCH_BITS is bogus"
1915#endif
1916}
1917
1918
1919/**
1920 * Atomically reads a pointer value, unordered.
1921 *
1922 * @returns Current *pv value
1923 * @param ppv Pointer to the pointer variable to read.
1924 */
1925DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
1926{
1927#if ARCH_BITS == 32
1928 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
1929#elif ARCH_BITS == 64
1930 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
1931#else
1932# error "ARCH_BITS is bogus"
1933#endif
1934}
1935
1936
1937/**
1938 * Atomically reads a boolean value, ordered.
1939 *
1940 * @returns Current *pf value
1941 * @param pf Pointer to the boolean variable to read.
1942 */
1943DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
1944{
1945 ASMMemoryFence();
1946 return *pf; /* byte reads are atomic on x86 */
1947}
1948
1949
1950/**
1951 * Atomically reads a boolean value, unordered.
1952 *
1953 * @returns Current *pf value
1954 * @param pf Pointer to the boolean variable to read.
1955 */
1956DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
1957{
1958 return *pf; /* byte reads are atomic on x86 */
1959}
1960
1961
1962/**
1963 * Atomically read a typical IPRT handle value, ordered.
1964 *
1965 * @param ph Pointer to the handle variable to read.
1966 * @param phRes Where to store the result.
1967 *
1968 * @remarks This doesn't currently work for all handles (like RTFILE).
1969 */
1970#if HC_ARCH_BITS == 32
1971# define ASMAtomicReadHandle(ph, phRes) \
1972 do { \
1973 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1974 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
1975 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
1976 } while (0)
1977#elif HC_ARCH_BITS == 64
1978# define ASMAtomicReadHandle(ph, phRes) \
1979 do { \
1980 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1981 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
1982 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
1983 } while (0)
1984#else
1985# error HC_ARCH_BITS
1986#endif
1987
1988
1989/**
1990 * Atomically read a typical IPRT handle value, unordered.
1991 *
1992 * @param ph Pointer to the handle variable to read.
1993 * @param phRes Where to store the result.
1994 *
1995 * @remarks This doesn't currently work for all handles (like RTFILE).
1996 */
1997#if HC_ARCH_BITS == 32
1998# define ASMAtomicUoReadHandle(ph, phRes) \
1999 do { \
2000 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2001 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2002 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
2003 } while (0)
2004#elif HC_ARCH_BITS == 64
2005# define ASMAtomicUoReadHandle(ph, phRes) \
2006 do { \
2007 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2008 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2009 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
2010 } while (0)
2011#else
2012# error HC_ARCH_BITS
2013#endif
2014
2015
2016/**
2017 * Atomically read a value which size might differ
2018 * between platforms or compilers, ordered.
2019 *
2020 * @param pu Pointer to the variable to update.
2021 * @param puRes Where to store the result.
2022 */
2023#define ASMAtomicReadSize(pu, puRes) \
2024 do { \
2025 switch (sizeof(*(pu))) { \
2026 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
2027 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
2028 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
2029 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
2030 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2031 } \
2032 } while (0)
2033
2034
2035/**
2036 * Atomically read a value which size might differ
2037 * between platforms or compilers, unordered.
2038 *
2039 * @param pu Pointer to the variable to read.
2040 * @param puRes Where to store the result.
2041 */
2042#define ASMAtomicUoReadSize(pu, puRes) \
2043 do { \
2044 switch (sizeof(*(pu))) { \
2045 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
2046 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
2047 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
2048 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
2049 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2050 } \
2051 } while (0)
2052
2053
2054/**
2055 * Atomically writes an unsigned 8-bit value, ordered.
2056 *
2057 * @param pu8 Pointer to the 8-bit variable.
2058 * @param u8 The 8-bit value to assign to *pu8.
2059 */
2060DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
2061{
2062 ASMAtomicXchgU8(pu8, u8);
2063}
2064
2065
2066/**
2067 * Atomically writes an unsigned 8-bit value, unordered.
2068 *
2069 * @param pu8 Pointer to the 8-bit variable.
2070 * @param u8 The 8-bit value to assign to *pu8.
2071 */
2072DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
2073{
2074 *pu8 = u8; /* byte writes are atomic on x86 */
2075}
2076
2077
2078/**
2079 * Atomically writes a signed 8-bit value, ordered.
2080 *
2081 * @param pi8 Pointer to the 8-bit variable to read.
2082 * @param i8 The 8-bit value to assign to *pi8.
2083 */
2084DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
2085{
2086 ASMAtomicXchgS8(pi8, i8);
2087}
2088
2089
2090/**
2091 * Atomically writes a signed 8-bit value, unordered.
2092 *
2093 * @param pi8 Pointer to the 8-bit variable to read.
2094 * @param i8 The 8-bit value to assign to *pi8.
2095 */
2096DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
2097{
2098 *pi8 = i8; /* byte writes are atomic on x86 */
2099}
2100
2101
2102/**
2103 * Atomically writes an unsigned 16-bit value, ordered.
2104 *
2105 * @param pu16 Pointer to the 16-bit variable.
2106 * @param u16 The 16-bit value to assign to *pu16.
2107 */
2108DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
2109{
2110 ASMAtomicXchgU16(pu16, u16);
2111}
2112
2113
2114/**
2115 * Atomically writes an unsigned 16-bit value, unordered.
2116 *
2117 * @param pu16 Pointer to the 16-bit variable.
2118 * @param u16 The 16-bit value to assign to *pu16.
2119 */
2120DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
2121{
2122 Assert(!((uintptr_t)pu16 & 1));
2123 *pu16 = u16;
2124}
2125
2126
2127/**
2128 * Atomically writes a signed 16-bit value, ordered.
2129 *
2130 * @param pi16 Pointer to the 16-bit variable to read.
2131 * @param i16 The 16-bit value to assign to *pi16.
2132 */
2133DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
2134{
2135 ASMAtomicXchgS16(pi16, i16);
2136}
2137
2138
2139/**
2140 * Atomically writes a signed 16-bit value, unordered.
2141 *
2142 * @param pi16 Pointer to the 16-bit variable to read.
2143 * @param i16 The 16-bit value to assign to *pi16.
2144 */
2145DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
2146{
2147 Assert(!((uintptr_t)pi16 & 1));
2148 *pi16 = i16;
2149}
2150
2151
2152/**
2153 * Atomically writes an unsigned 32-bit value, ordered.
2154 *
2155 * @param pu32 Pointer to the 32-bit variable.
2156 * @param u32 The 32-bit value to assign to *pu32.
2157 */
2158DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
2159{
2160 ASMAtomicXchgU32(pu32, u32);
2161}
2162
2163
2164/**
2165 * Atomically writes an unsigned 32-bit value, unordered.
2166 *
2167 * @param pu32 Pointer to the 32-bit variable.
2168 * @param u32 The 32-bit value to assign to *pu32.
2169 */
2170DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
2171{
2172 Assert(!((uintptr_t)pu32 & 3));
2173 *pu32 = u32;
2174}
2175
2176
2177/**
2178 * Atomically writes a signed 32-bit value, ordered.
2179 *
2180 * @param pi32 Pointer to the 32-bit variable to read.
2181 * @param i32 The 32-bit value to assign to *pi32.
2182 */
2183DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
2184{
2185 ASMAtomicXchgS32(pi32, i32);
2186}
2187
2188
2189/**
2190 * Atomically writes a signed 32-bit value, unordered.
2191 *
2192 * @param pi32 Pointer to the 32-bit variable to read.
2193 * @param i32 The 32-bit value to assign to *pi32.
2194 */
2195DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
2196{
2197 Assert(!((uintptr_t)pi32 & 3));
2198 *pi32 = i32;
2199}
2200
2201
2202/**
2203 * Atomically writes an unsigned 64-bit value, ordered.
2204 *
2205 * @param pu64 Pointer to the 64-bit variable.
2206 * @param u64 The 64-bit value to assign to *pu64.
2207 */
2208DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
2209{
2210 ASMAtomicXchgU64(pu64, u64);
2211}
2212
2213
2214/**
2215 * Atomically writes an unsigned 64-bit value, unordered.
2216 *
2217 * @param pu64 Pointer to the 64-bit variable.
2218 * @param u64 The 64-bit value to assign to *pu64.
2219 */
2220DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
2221{
2222 Assert(!((uintptr_t)pu64 & 7));
2223#if ARCH_BITS == 64
2224 *pu64 = u64;
2225#else
2226 ASMAtomicXchgU64(pu64, u64);
2227#endif
2228}
2229
2230
2231/**
2232 * Atomically writes a signed 64-bit value, ordered.
2233 *
2234 * @param pi64 Pointer to the 64-bit variable.
2235 * @param i64 The 64-bit value to assign to *pi64.
2236 */
2237DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
2238{
2239 ASMAtomicXchgS64(pi64, i64);
2240}
2241
2242
2243/**
2244 * Atomically writes a signed 64-bit value, unordered.
2245 *
2246 * @param pi64 Pointer to the 64-bit variable.
2247 * @param i64 The 64-bit value to assign to *pi64.
2248 */
2249DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
2250{
2251 Assert(!((uintptr_t)pi64 & 7));
2252#if ARCH_BITS == 64
2253 *pi64 = i64;
2254#else
2255 ASMAtomicXchgS64(pi64, i64);
2256#endif
2257}
2258
2259
2260/**
2261 * Atomically writes a boolean value, unordered.
2262 *
2263 * @param pf Pointer to the boolean variable.
2264 * @param f The boolean value to assign to *pf.
2265 */
2266DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
2267{
2268 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
2269}
2270
2271
2272/**
2273 * Atomically writes a boolean value, unordered.
2274 *
2275 * @param pf Pointer to the boolean variable.
2276 * @param f The boolean value to assign to *pf.
2277 */
2278DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
2279{
2280 *pf = f; /* byte writes are atomic on x86 */
2281}
2282
2283
2284/**
2285 * Atomically writes a pointer value, ordered.
2286 *
2287 * @returns Current *pv value
2288 * @param ppv Pointer to the pointer variable.
2289 * @param pv The pointer value to assigne to *ppv.
2290 */
2291DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, const void *pv)
2292{
2293#if ARCH_BITS == 32
2294 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2295#elif ARCH_BITS == 64
2296 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2297#else
2298# error "ARCH_BITS is bogus"
2299#endif
2300}
2301
2302
2303/**
2304 * Atomically writes a pointer value, unordered.
2305 *
2306 * @returns Current *pv value
2307 * @param ppv Pointer to the pointer variable.
2308 * @param pv The pointer value to assigne to *ppv.
2309 */
2310DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, const void *pv)
2311{
2312#if ARCH_BITS == 32
2313 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2314#elif ARCH_BITS == 64
2315 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2316#else
2317# error "ARCH_BITS is bogus"
2318#endif
2319}
2320
2321
2322/**
2323 * Atomically write a typical IPRT handle value, ordered.
2324 *
2325 * @param ph Pointer to the variable to update.
2326 * @param hNew The value to assign to *ph.
2327 *
2328 * @remarks This doesn't currently work for all handles (like RTFILE).
2329 */
2330#if HC_ARCH_BITS == 32
2331# define ASMAtomicWriteHandle(ph, hNew) \
2332 do { \
2333 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2334 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2335 } while (0)
2336#elif HC_ARCH_BITS == 64
2337# define ASMAtomicWriteHandle(ph, hNew) \
2338 do { \
2339 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2340 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
2341 } while (0)
2342#else
2343# error HC_ARCH_BITS
2344#endif
2345
2346
2347/**
2348 * Atomically write a typical IPRT handle value, unordered.
2349 *
2350 * @param ph Pointer to the variable to update.
2351 * @param hNew The value to assign to *ph.
2352 *
2353 * @remarks This doesn't currently work for all handles (like RTFILE).
2354 */
2355#if HC_ARCH_BITS == 32
2356# define ASMAtomicUoWriteHandle(ph, hNew) \
2357 do { \
2358 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2359 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
2360 } while (0)
2361#elif HC_ARCH_BITS == 64
2362# define ASMAtomicUoWriteHandle(ph, hNew) \
2363 do { \
2364 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2365 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
2366 } while (0)
2367#else
2368# error HC_ARCH_BITS
2369#endif
2370
2371
2372/**
2373 * Atomically write a value which size might differ
2374 * between platforms or compilers, ordered.
2375 *
2376 * @param pu Pointer to the variable to update.
2377 * @param uNew The value to assign to *pu.
2378 */
2379#define ASMAtomicWriteSize(pu, uNew) \
2380 do { \
2381 switch (sizeof(*(pu))) { \
2382 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2383 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2384 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2385 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2386 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2387 } \
2388 } while (0)
2389
2390/**
2391 * Atomically write a value which size might differ
2392 * between platforms or compilers, unordered.
2393 *
2394 * @param pu Pointer to the variable to update.
2395 * @param uNew The value to assign to *pu.
2396 */
2397#define ASMAtomicUoWriteSize(pu, uNew) \
2398 do { \
2399 switch (sizeof(*(pu))) { \
2400 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2401 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2402 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2403 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2404 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2405 } \
2406 } while (0)
2407
2408
2409
2410
2411#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
2412# if PAGE_SIZE != 0x1000
2413# error "PAGE_SIZE is not 0x1000!"
2414# endif
2415#endif
2416
2417/**
2418 * Zeros a 4K memory page.
2419 *
2420 * @param pv Pointer to the memory block. This must be page aligned.
2421 */
2422#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2423DECLASM(void) ASMMemZeroPage(volatile void *pv);
2424# else
2425DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
2426{
2427# if RT_INLINE_ASM_USES_INTRIN
2428# ifdef RT_ARCH_AMD64
2429 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
2430# else
2431 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
2432# endif
2433
2434# elif RT_INLINE_ASM_GNU_STYLE
2435 RTCCUINTREG uDummy;
2436# ifdef RT_ARCH_AMD64
2437 __asm__ __volatile__("rep stosq"
2438 : "=D" (pv),
2439 "=c" (uDummy)
2440 : "0" (pv),
2441 "c" (0x1000 >> 3),
2442 "a" (0)
2443 : "memory");
2444# else
2445 __asm__ __volatile__("rep stosl"
2446 : "=D" (pv),
2447 "=c" (uDummy)
2448 : "0" (pv),
2449 "c" (0x1000 >> 2),
2450 "a" (0)
2451 : "memory");
2452# endif
2453# else
2454 __asm
2455 {
2456# ifdef RT_ARCH_AMD64
2457 xor rax, rax
2458 mov ecx, 0200h
2459 mov rdi, [pv]
2460 rep stosq
2461# else
2462 xor eax, eax
2463 mov ecx, 0400h
2464 mov edi, [pv]
2465 rep stosd
2466# endif
2467 }
2468# endif
2469}
2470# endif
2471
2472
2473/**
2474 * Zeros a memory block with a 32-bit aligned size.
2475 *
2476 * @param pv Pointer to the memory block.
2477 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2478 */
2479#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2480DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
2481#else
2482DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
2483{
2484# if RT_INLINE_ASM_USES_INTRIN
2485# ifdef RT_ARCH_AMD64
2486 if (!(cb & 7))
2487 __stosq((unsigned __int64 *)pv, 0, cb / 8);
2488 else
2489# endif
2490 __stosd((unsigned long *)pv, 0, cb / 4);
2491
2492# elif RT_INLINE_ASM_GNU_STYLE
2493 __asm__ __volatile__("rep stosl"
2494 : "=D" (pv),
2495 "=c" (cb)
2496 : "0" (pv),
2497 "1" (cb >> 2),
2498 "a" (0)
2499 : "memory");
2500# else
2501 __asm
2502 {
2503 xor eax, eax
2504# ifdef RT_ARCH_AMD64
2505 mov rcx, [cb]
2506 shr rcx, 2
2507 mov rdi, [pv]
2508# else
2509 mov ecx, [cb]
2510 shr ecx, 2
2511 mov edi, [pv]
2512# endif
2513 rep stosd
2514 }
2515# endif
2516}
2517#endif
2518
2519
2520/**
2521 * Fills a memory block with a 32-bit aligned size.
2522 *
2523 * @param pv Pointer to the memory block.
2524 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2525 * @param u32 The value to fill with.
2526 */
2527#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2528DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
2529#else
2530DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
2531{
2532# if RT_INLINE_ASM_USES_INTRIN
2533# ifdef RT_ARCH_AMD64
2534 if (!(cb & 7))
2535 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
2536 else
2537# endif
2538 __stosd((unsigned long *)pv, u32, cb / 4);
2539
2540# elif RT_INLINE_ASM_GNU_STYLE
2541 __asm__ __volatile__("rep stosl"
2542 : "=D" (pv),
2543 "=c" (cb)
2544 : "0" (pv),
2545 "1" (cb >> 2),
2546 "a" (u32)
2547 : "memory");
2548# else
2549 __asm
2550 {
2551# ifdef RT_ARCH_AMD64
2552 mov rcx, [cb]
2553 shr rcx, 2
2554 mov rdi, [pv]
2555# else
2556 mov ecx, [cb]
2557 shr ecx, 2
2558 mov edi, [pv]
2559# endif
2560 mov eax, [u32]
2561 rep stosd
2562 }
2563# endif
2564}
2565#endif
2566
2567
2568/**
2569 * Checks if a memory page is all zeros.
2570 *
2571 * @returns true / false.
2572 *
2573 * @param pvPage Pointer to the page. Must be aligned on 16 byte
2574 * boundrary
2575 */
2576DECLINLINE(bool) ASMMemIsZeroPage(void const *pvPage)
2577{
2578# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
2579 union { RTCCUINTREG r; bool f; } uAX;
2580 RTCCUINTREG xCX, xDI;
2581 Assert(!((uintptr_t)pvPage & 15));
2582 __asm__ __volatile__("repe; "
2583# ifdef RT_ARCH_AMD64
2584 "scasq\n\t"
2585# else
2586 "scasl\n\t"
2587# endif
2588 "setnc %%al\n\t"
2589 : "=&c" (xCX),
2590 "=&D" (xDI),
2591 "=&a" (uAX.r)
2592 : "mr" (pvPage),
2593# ifdef RT_ARCH_AMD64
2594 "0" (0x1000/8),
2595# else
2596 "0" (0x1000/4),
2597# endif
2598 "1" (pvPage),
2599 "2" (0));
2600 return uAX.f;
2601# else
2602 uintptr_t const *puPtr = (uintptr_t const *)pvPage;
2603 int cLeft = 0x1000 / sizeof(uintptr_t) / 8;
2604 Assert(!((uintptr_t)pvPage & 15));
2605 for (;;)
2606 {
2607 if (puPtr[0]) return false;
2608 if (puPtr[4]) return false;
2609
2610 if (puPtr[2]) return false;
2611 if (puPtr[6]) return false;
2612
2613 if (puPtr[1]) return false;
2614 if (puPtr[5]) return false;
2615
2616 if (puPtr[3]) return false;
2617 if (puPtr[7]) return false;
2618
2619 if (!--cLeft)
2620 return true;
2621 puPtr += 8;
2622 }
2623 return true;
2624# endif
2625}
2626
2627
2628/**
2629 * Checks if a memory block is filled with the specified byte.
2630 *
2631 * This is a sort of inverted memchr.
2632 *
2633 * @returns Pointer to the byte which doesn't equal u8.
2634 * @returns NULL if all equal to u8.
2635 *
2636 * @param pv Pointer to the memory block.
2637 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2638 * @param u8 The value it's supposed to be filled with.
2639 *
2640 * @todo Fix name, it is a predicate function but it's not returning boolean!
2641 */
2642#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2643DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
2644#else
2645DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
2646{
2647/** @todo rewrite this in inline assembly? */
2648 uint8_t const *pb = (uint8_t const *)pv;
2649 for (; cb; cb--, pb++)
2650 if (RT_UNLIKELY(*pb != u8))
2651 return (void *)pb;
2652 return NULL;
2653}
2654#endif
2655
2656
2657/**
2658 * Checks if a memory block is filled with the specified 32-bit value.
2659 *
2660 * This is a sort of inverted memchr.
2661 *
2662 * @returns Pointer to the first value which doesn't equal u32.
2663 * @returns NULL if all equal to u32.
2664 *
2665 * @param pv Pointer to the memory block.
2666 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2667 * @param u32 The value it's supposed to be filled with.
2668 *
2669 * @todo Fix name, it is a predicate function but it's not returning boolean!
2670 */
2671#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2672DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
2673#else
2674DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
2675{
2676/** @todo rewrite this in inline assembly? */
2677 uint32_t const *pu32 = (uint32_t const *)pv;
2678 for (; cb; cb -= 4, pu32++)
2679 if (RT_UNLIKELY(*pu32 != u32))
2680 return (uint32_t *)pu32;
2681 return NULL;
2682}
2683#endif
2684
2685
2686/**
2687 * Probes a byte pointer for read access.
2688 *
2689 * While the function will not fault if the byte is not read accessible,
2690 * the idea is to do this in a safe place like before acquiring locks
2691 * and such like.
2692 *
2693 * Also, this functions guarantees that an eager compiler is not going
2694 * to optimize the probing away.
2695 *
2696 * @param pvByte Pointer to the byte.
2697 */
2698#if RT_INLINE_ASM_EXTERNAL
2699DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
2700#else
2701DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
2702{
2703 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
2704 uint8_t u8;
2705# if RT_INLINE_ASM_GNU_STYLE
2706 __asm__ __volatile__("movb (%1), %0\n\t"
2707 : "=r" (u8)
2708 : "r" (pvByte));
2709# else
2710 __asm
2711 {
2712# ifdef RT_ARCH_AMD64
2713 mov rax, [pvByte]
2714 mov al, [rax]
2715# else
2716 mov eax, [pvByte]
2717 mov al, [eax]
2718# endif
2719 mov [u8], al
2720 }
2721# endif
2722 return u8;
2723}
2724#endif
2725
2726/**
2727 * Probes a buffer for read access page by page.
2728 *
2729 * While the function will fault if the buffer is not fully read
2730 * accessible, the idea is to do this in a safe place like before
2731 * acquiring locks and such like.
2732 *
2733 * Also, this functions guarantees that an eager compiler is not going
2734 * to optimize the probing away.
2735 *
2736 * @param pvBuf Pointer to the buffer.
2737 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
2738 */
2739DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
2740{
2741 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
2742 /* the first byte */
2743 const uint8_t *pu8 = (const uint8_t *)pvBuf;
2744 ASMProbeReadByte(pu8);
2745
2746 /* the pages in between pages. */
2747 while (cbBuf > /*PAGE_SIZE*/0x1000)
2748 {
2749 ASMProbeReadByte(pu8);
2750 cbBuf -= /*PAGE_SIZE*/0x1000;
2751 pu8 += /*PAGE_SIZE*/0x1000;
2752 }
2753
2754 /* the last byte */
2755 ASMProbeReadByte(pu8 + cbBuf - 1);
2756}
2757
2758
2759/** @def ASMBreakpoint
2760 * Debugger Breakpoint.
2761 * @remark In the gnu world we add a nop instruction after the int3 to
2762 * force gdb to remain at the int3 source line.
2763 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
2764 * @internal
2765 */
2766#if RT_INLINE_ASM_GNU_STYLE
2767# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
2768# ifndef __L4ENV__
2769# define ASMBreakpoint() do { __asm__ __volatile__("int3\n\tnop"); } while (0)
2770# else
2771# define ASMBreakpoint() do { __asm__ __volatile__("int3; jmp 1f; 1:"); } while (0)
2772# endif
2773# elif defined(RT_ARCH_SPARC64)
2774# define ASMBreakpoint() do { __asm__ __volatile__("illtrap $0\n\t") } while (0) /** @todo Sparc64: this is just a wild guess. */
2775# elif defined(RT_ARCH_SPARC)
2776# define ASMBreakpoint() do { __asm__ __volatile__("unimp 0\n\t"); } while (0) /** @todo Sparc: this is just a wild guess (same as Sparc64, just different name). */
2777# else
2778# error "PORTME"
2779# endif
2780#else
2781# define ASMBreakpoint() __debugbreak()
2782#endif
2783
2784
2785/**
2786 * Spinloop hint for platforms that have these, empty function on the other
2787 * platforms.
2788 *
2789 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecing
2790 * spin locks.
2791 */
2792#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
2793DECLASM(void) ASMNopPause(void);
2794#else
2795DECLINLINE(void) ASMNopPause(void)
2796{
2797# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
2798# if RT_INLINE_ASM_GNU_STYLE
2799 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
2800# else
2801 __asm {
2802 _emit 0f3h
2803 _emit 090h
2804 }
2805# endif
2806# else
2807 /* dummy */
2808# endif
2809}
2810#endif
2811
2812
2813
2814/** @defgroup grp_inline_bits Bit Operations
2815 * @{
2816 */
2817
2818
2819/**
2820 * Sets a bit in a bitmap.
2821 *
2822 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
2823 * @param iBit The bit to set.
2824 *
2825 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
2826 * However, doing so will yield better performance as well as avoiding
2827 * traps accessing the last bits in the bitmap.
2828 */
2829#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2830DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
2831#else
2832DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
2833{
2834# if RT_INLINE_ASM_USES_INTRIN
2835 _bittestandset((long *)pvBitmap, iBit);
2836
2837# elif RT_INLINE_ASM_GNU_STYLE
2838 __asm__ __volatile__("btsl %1, %0"
2839 : "=m" (*(volatile long *)pvBitmap)
2840 : "Ir" (iBit),
2841 "m" (*(volatile long *)pvBitmap)
2842 : "memory");
2843# else
2844 __asm
2845 {
2846# ifdef RT_ARCH_AMD64
2847 mov rax, [pvBitmap]
2848 mov edx, [iBit]
2849 bts [rax], edx
2850# else
2851 mov eax, [pvBitmap]
2852 mov edx, [iBit]
2853 bts [eax], edx
2854# endif
2855 }
2856# endif
2857}
2858#endif
2859
2860
2861/**
2862 * Atomically sets a bit in a bitmap, ordered.
2863 *
2864 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
2865 * the memory access isn't atomic!
2866 * @param iBit The bit to set.
2867 */
2868#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2869DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
2870#else
2871DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
2872{
2873 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
2874# if RT_INLINE_ASM_USES_INTRIN
2875 _interlockedbittestandset((long *)pvBitmap, iBit);
2876# elif RT_INLINE_ASM_GNU_STYLE
2877 __asm__ __volatile__("lock; btsl %1, %0"
2878 : "=m" (*(volatile long *)pvBitmap)
2879 : "Ir" (iBit),
2880 "m" (*(volatile long *)pvBitmap)
2881 : "memory");
2882# else
2883 __asm
2884 {
2885# ifdef RT_ARCH_AMD64
2886 mov rax, [pvBitmap]
2887 mov edx, [iBit]
2888 lock bts [rax], edx
2889# else
2890 mov eax, [pvBitmap]
2891 mov edx, [iBit]
2892 lock bts [eax], edx
2893# endif
2894 }
2895# endif
2896}
2897#endif
2898
2899
2900/**
2901 * Clears a bit in a bitmap.
2902 *
2903 * @param pvBitmap Pointer to the bitmap.
2904 * @param iBit The bit to clear.
2905 *
2906 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
2907 * However, doing so will yield better performance as well as avoiding
2908 * traps accessing the last bits in the bitmap.
2909 */
2910#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2911DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
2912#else
2913DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
2914{
2915# if RT_INLINE_ASM_USES_INTRIN
2916 _bittestandreset((long *)pvBitmap, iBit);
2917
2918# elif RT_INLINE_ASM_GNU_STYLE
2919 __asm__ __volatile__("btrl %1, %0"
2920 : "=m" (*(volatile long *)pvBitmap)
2921 : "Ir" (iBit),
2922 "m" (*(volatile long *)pvBitmap)
2923 : "memory");
2924# else
2925 __asm
2926 {
2927# ifdef RT_ARCH_AMD64
2928 mov rax, [pvBitmap]
2929 mov edx, [iBit]
2930 btr [rax], edx
2931# else
2932 mov eax, [pvBitmap]
2933 mov edx, [iBit]
2934 btr [eax], edx
2935# endif
2936 }
2937# endif
2938}
2939#endif
2940
2941
2942/**
2943 * Atomically clears a bit in a bitmap, ordered.
2944 *
2945 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
2946 * the memory access isn't atomic!
2947 * @param iBit The bit to toggle set.
2948 * @remarks No memory barrier, take care on smp.
2949 */
2950#if RT_INLINE_ASM_EXTERNAL
2951DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
2952#else
2953DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
2954{
2955 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
2956# if RT_INLINE_ASM_GNU_STYLE
2957 __asm__ __volatile__("lock; btrl %1, %0"
2958 : "=m" (*(volatile long *)pvBitmap)
2959 : "Ir" (iBit),
2960 "m" (*(volatile long *)pvBitmap)
2961 : "memory");
2962# else
2963 __asm
2964 {
2965# ifdef RT_ARCH_AMD64
2966 mov rax, [pvBitmap]
2967 mov edx, [iBit]
2968 lock btr [rax], edx
2969# else
2970 mov eax, [pvBitmap]
2971 mov edx, [iBit]
2972 lock btr [eax], edx
2973# endif
2974 }
2975# endif
2976}
2977#endif
2978
2979
2980/**
2981 * Toggles a bit in a bitmap.
2982 *
2983 * @param pvBitmap Pointer to the bitmap.
2984 * @param iBit The bit to toggle.
2985 *
2986 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
2987 * However, doing so will yield better performance as well as avoiding
2988 * traps accessing the last bits in the bitmap.
2989 */
2990#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2991DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
2992#else
2993DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
2994{
2995# if RT_INLINE_ASM_USES_INTRIN
2996 _bittestandcomplement((long *)pvBitmap, iBit);
2997# elif RT_INLINE_ASM_GNU_STYLE
2998 __asm__ __volatile__("btcl %1, %0"
2999 : "=m" (*(volatile long *)pvBitmap)
3000 : "Ir" (iBit),
3001 "m" (*(volatile long *)pvBitmap)
3002 : "memory");
3003# else
3004 __asm
3005 {
3006# ifdef RT_ARCH_AMD64
3007 mov rax, [pvBitmap]
3008 mov edx, [iBit]
3009 btc [rax], edx
3010# else
3011 mov eax, [pvBitmap]
3012 mov edx, [iBit]
3013 btc [eax], edx
3014# endif
3015 }
3016# endif
3017}
3018#endif
3019
3020
3021/**
3022 * Atomically toggles a bit in a bitmap, ordered.
3023 *
3024 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3025 * the memory access isn't atomic!
3026 * @param iBit The bit to test and set.
3027 */
3028#if RT_INLINE_ASM_EXTERNAL
3029DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
3030#else
3031DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
3032{
3033 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3034# if RT_INLINE_ASM_GNU_STYLE
3035 __asm__ __volatile__("lock; btcl %1, %0"
3036 : "=m" (*(volatile long *)pvBitmap)
3037 : "Ir" (iBit),
3038 "m" (*(volatile long *)pvBitmap)
3039 : "memory");
3040# else
3041 __asm
3042 {
3043# ifdef RT_ARCH_AMD64
3044 mov rax, [pvBitmap]
3045 mov edx, [iBit]
3046 lock btc [rax], edx
3047# else
3048 mov eax, [pvBitmap]
3049 mov edx, [iBit]
3050 lock btc [eax], edx
3051# endif
3052 }
3053# endif
3054}
3055#endif
3056
3057
3058/**
3059 * Tests and sets a bit in a bitmap.
3060 *
3061 * @returns true if the bit was set.
3062 * @returns false if the bit was clear.
3063 *
3064 * @param pvBitmap Pointer to the bitmap.
3065 * @param iBit The bit to test and set.
3066 *
3067 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3068 * However, doing so will yield better performance as well as avoiding
3069 * traps accessing the last bits in the bitmap.
3070 */
3071#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3072DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3073#else
3074DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3075{
3076 union { bool f; uint32_t u32; uint8_t u8; } rc;
3077# if RT_INLINE_ASM_USES_INTRIN
3078 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
3079
3080# elif RT_INLINE_ASM_GNU_STYLE
3081 __asm__ __volatile__("btsl %2, %1\n\t"
3082 "setc %b0\n\t"
3083 "andl $1, %0\n\t"
3084 : "=q" (rc.u32),
3085 "=m" (*(volatile long *)pvBitmap)
3086 : "Ir" (iBit),
3087 "m" (*(volatile long *)pvBitmap)
3088 : "memory");
3089# else
3090 __asm
3091 {
3092 mov edx, [iBit]
3093# ifdef RT_ARCH_AMD64
3094 mov rax, [pvBitmap]
3095 bts [rax], edx
3096# else
3097 mov eax, [pvBitmap]
3098 bts [eax], edx
3099# endif
3100 setc al
3101 and eax, 1
3102 mov [rc.u32], eax
3103 }
3104# endif
3105 return rc.f;
3106}
3107#endif
3108
3109
3110/**
3111 * Atomically tests and sets a bit in a bitmap, ordered.
3112 *
3113 * @returns true if the bit was set.
3114 * @returns false if the bit was clear.
3115 *
3116 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3117 * the memory access isn't atomic!
3118 * @param iBit The bit to set.
3119 */
3120#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3121DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3122#else
3123DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3124{
3125 union { bool f; uint32_t u32; uint8_t u8; } rc;
3126 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3127# if RT_INLINE_ASM_USES_INTRIN
3128 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
3129# elif RT_INLINE_ASM_GNU_STYLE
3130 __asm__ __volatile__("lock; btsl %2, %1\n\t"
3131 "setc %b0\n\t"
3132 "andl $1, %0\n\t"
3133 : "=q" (rc.u32),
3134 "=m" (*(volatile long *)pvBitmap)
3135 : "Ir" (iBit),
3136 "m" (*(volatile long *)pvBitmap)
3137 : "memory");
3138# else
3139 __asm
3140 {
3141 mov edx, [iBit]
3142# ifdef RT_ARCH_AMD64
3143 mov rax, [pvBitmap]
3144 lock bts [rax], edx
3145# else
3146 mov eax, [pvBitmap]
3147 lock bts [eax], edx
3148# endif
3149 setc al
3150 and eax, 1
3151 mov [rc.u32], eax
3152 }
3153# endif
3154 return rc.f;
3155}
3156#endif
3157
3158
3159/**
3160 * Tests and clears a bit in a bitmap.
3161 *
3162 * @returns true if the bit was set.
3163 * @returns false if the bit was clear.
3164 *
3165 * @param pvBitmap Pointer to the bitmap.
3166 * @param iBit The bit to test and clear.
3167 *
3168 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3169 * However, doing so will yield better performance as well as avoiding
3170 * traps accessing the last bits in the bitmap.
3171 */
3172#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3173DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3174#else
3175DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3176{
3177 union { bool f; uint32_t u32; uint8_t u8; } rc;
3178# if RT_INLINE_ASM_USES_INTRIN
3179 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
3180
3181# elif RT_INLINE_ASM_GNU_STYLE
3182 __asm__ __volatile__("btrl %2, %1\n\t"
3183 "setc %b0\n\t"
3184 "andl $1, %0\n\t"
3185 : "=q" (rc.u32),
3186 "=m" (*(volatile long *)pvBitmap)
3187 : "Ir" (iBit),
3188 "m" (*(volatile long *)pvBitmap)
3189 : "memory");
3190# else
3191 __asm
3192 {
3193 mov edx, [iBit]
3194# ifdef RT_ARCH_AMD64
3195 mov rax, [pvBitmap]
3196 btr [rax], edx
3197# else
3198 mov eax, [pvBitmap]
3199 btr [eax], edx
3200# endif
3201 setc al
3202 and eax, 1
3203 mov [rc.u32], eax
3204 }
3205# endif
3206 return rc.f;
3207}
3208#endif
3209
3210
3211/**
3212 * Atomically tests and clears a bit in a bitmap, ordered.
3213 *
3214 * @returns true if the bit was set.
3215 * @returns false if the bit was clear.
3216 *
3217 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3218 * the memory access isn't atomic!
3219 * @param iBit The bit to test and clear.
3220 *
3221 * @remarks No memory barrier, take care on smp.
3222 */
3223#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3224DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3225#else
3226DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3227{
3228 union { bool f; uint32_t u32; uint8_t u8; } rc;
3229 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3230# if RT_INLINE_ASM_USES_INTRIN
3231 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
3232
3233# elif RT_INLINE_ASM_GNU_STYLE
3234 __asm__ __volatile__("lock; btrl %2, %1\n\t"
3235 "setc %b0\n\t"
3236 "andl $1, %0\n\t"
3237 : "=q" (rc.u32),
3238 "=m" (*(volatile long *)pvBitmap)
3239 : "Ir" (iBit),
3240 "m" (*(volatile long *)pvBitmap)
3241 : "memory");
3242# else
3243 __asm
3244 {
3245 mov edx, [iBit]
3246# ifdef RT_ARCH_AMD64
3247 mov rax, [pvBitmap]
3248 lock btr [rax], edx
3249# else
3250 mov eax, [pvBitmap]
3251 lock btr [eax], edx
3252# endif
3253 setc al
3254 and eax, 1
3255 mov [rc.u32], eax
3256 }
3257# endif
3258 return rc.f;
3259}
3260#endif
3261
3262
3263/**
3264 * Tests and toggles a bit in a bitmap.
3265 *
3266 * @returns true if the bit was set.
3267 * @returns false if the bit was clear.
3268 *
3269 * @param pvBitmap Pointer to the bitmap.
3270 * @param iBit The bit to test and toggle.
3271 *
3272 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3273 * However, doing so will yield better performance as well as avoiding
3274 * traps accessing the last bits in the bitmap.
3275 */
3276#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3277DECLASM(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3278#else
3279DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3280{
3281 union { bool f; uint32_t u32; uint8_t u8; } rc;
3282# if RT_INLINE_ASM_USES_INTRIN
3283 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
3284
3285# elif RT_INLINE_ASM_GNU_STYLE
3286 __asm__ __volatile__("btcl %2, %1\n\t"
3287 "setc %b0\n\t"
3288 "andl $1, %0\n\t"
3289 : "=q" (rc.u32),
3290 "=m" (*(volatile long *)pvBitmap)
3291 : "Ir" (iBit),
3292 "m" (*(volatile long *)pvBitmap)
3293 : "memory");
3294# else
3295 __asm
3296 {
3297 mov edx, [iBit]
3298# ifdef RT_ARCH_AMD64
3299 mov rax, [pvBitmap]
3300 btc [rax], edx
3301# else
3302 mov eax, [pvBitmap]
3303 btc [eax], edx
3304# endif
3305 setc al
3306 and eax, 1
3307 mov [rc.u32], eax
3308 }
3309# endif
3310 return rc.f;
3311}
3312#endif
3313
3314
3315/**
3316 * Atomically tests and toggles a bit in a bitmap, ordered.
3317 *
3318 * @returns true if the bit was set.
3319 * @returns false if the bit was clear.
3320 *
3321 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3322 * the memory access isn't atomic!
3323 * @param iBit The bit to test and toggle.
3324 */
3325#if RT_INLINE_ASM_EXTERNAL
3326DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3327#else
3328DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3329{
3330 union { bool f; uint32_t u32; uint8_t u8; } rc;
3331 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3332# if RT_INLINE_ASM_GNU_STYLE
3333 __asm__ __volatile__("lock; btcl %2, %1\n\t"
3334 "setc %b0\n\t"
3335 "andl $1, %0\n\t"
3336 : "=q" (rc.u32),
3337 "=m" (*(volatile long *)pvBitmap)
3338 : "Ir" (iBit),
3339 "m" (*(volatile long *)pvBitmap)
3340 : "memory");
3341# else
3342 __asm
3343 {
3344 mov edx, [iBit]
3345# ifdef RT_ARCH_AMD64
3346 mov rax, [pvBitmap]
3347 lock btc [rax], edx
3348# else
3349 mov eax, [pvBitmap]
3350 lock btc [eax], edx
3351# endif
3352 setc al
3353 and eax, 1
3354 mov [rc.u32], eax
3355 }
3356# endif
3357 return rc.f;
3358}
3359#endif
3360
3361
3362/**
3363 * Tests if a bit in a bitmap is set.
3364 *
3365 * @returns true if the bit is set.
3366 * @returns false if the bit is clear.
3367 *
3368 * @param pvBitmap Pointer to the bitmap.
3369 * @param iBit The bit to test.
3370 *
3371 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3372 * However, doing so will yield better performance as well as avoiding
3373 * traps accessing the last bits in the bitmap.
3374 */
3375#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3376DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
3377#else
3378DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
3379{
3380 union { bool f; uint32_t u32; uint8_t u8; } rc;
3381# if RT_INLINE_ASM_USES_INTRIN
3382 rc.u32 = _bittest((long *)pvBitmap, iBit);
3383# elif RT_INLINE_ASM_GNU_STYLE
3384
3385 __asm__ __volatile__("btl %2, %1\n\t"
3386 "setc %b0\n\t"
3387 "andl $1, %0\n\t"
3388 : "=q" (rc.u32)
3389 : "m" (*(const volatile long *)pvBitmap),
3390 "Ir" (iBit)
3391 : "memory");
3392# else
3393 __asm
3394 {
3395 mov edx, [iBit]
3396# ifdef RT_ARCH_AMD64
3397 mov rax, [pvBitmap]
3398 bt [rax], edx
3399# else
3400 mov eax, [pvBitmap]
3401 bt [eax], edx
3402# endif
3403 setc al
3404 and eax, 1
3405 mov [rc.u32], eax
3406 }
3407# endif
3408 return rc.f;
3409}
3410#endif
3411
3412
3413/**
3414 * Clears a bit range within a bitmap.
3415 *
3416 * @param pvBitmap Pointer to the bitmap.
3417 * @param iBitStart The First bit to clear.
3418 * @param iBitEnd The first bit not to clear.
3419 */
3420DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
3421{
3422 if (iBitStart < iBitEnd)
3423 {
3424 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
3425 int iStart = iBitStart & ~31;
3426 int iEnd = iBitEnd & ~31;
3427 if (iStart == iEnd)
3428 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
3429 else
3430 {
3431 /* bits in first dword. */
3432 if (iBitStart & 31)
3433 {
3434 *pu32 &= (1 << (iBitStart & 31)) - 1;
3435 pu32++;
3436 iBitStart = iStart + 32;
3437 }
3438
3439 /* whole dword. */
3440 if (iBitStart != iEnd)
3441 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
3442
3443 /* bits in last dword. */
3444 if (iBitEnd & 31)
3445 {
3446 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
3447 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
3448 }
3449 }
3450 }
3451}
3452
3453
3454/**
3455 * Sets a bit range within a bitmap.
3456 *
3457 * @param pvBitmap Pointer to the bitmap.
3458 * @param iBitStart The First bit to set.
3459 * @param iBitEnd The first bit not to set.
3460 */
3461DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
3462{
3463 if (iBitStart < iBitEnd)
3464 {
3465 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
3466 int iStart = iBitStart & ~31;
3467 int iEnd = iBitEnd & ~31;
3468 if (iStart == iEnd)
3469 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
3470 else
3471 {
3472 /* bits in first dword. */
3473 if (iBitStart & 31)
3474 {
3475 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
3476 pu32++;
3477 iBitStart = iStart + 32;
3478 }
3479
3480 /* whole dword. */
3481 if (iBitStart != iEnd)
3482 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
3483
3484 /* bits in last dword. */
3485 if (iBitEnd & 31)
3486 {
3487 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
3488 *pu32 |= (1 << (iBitEnd & 31)) - 1;
3489 }
3490 }
3491 }
3492}
3493
3494
3495/**
3496 * Finds the first clear bit in a bitmap.
3497 *
3498 * @returns Index of the first zero bit.
3499 * @returns -1 if no clear bit was found.
3500 * @param pvBitmap Pointer to the bitmap.
3501 * @param cBits The number of bits in the bitmap. Multiple of 32.
3502 */
3503#if RT_INLINE_ASM_EXTERNAL
3504DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
3505#else
3506DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
3507{
3508 if (cBits)
3509 {
3510 int32_t iBit;
3511# if RT_INLINE_ASM_GNU_STYLE
3512 RTCCUINTREG uEAX, uECX, uEDI;
3513 cBits = RT_ALIGN_32(cBits, 32);
3514 __asm__ __volatile__("repe; scasl\n\t"
3515 "je 1f\n\t"
3516# ifdef RT_ARCH_AMD64
3517 "lea -4(%%rdi), %%rdi\n\t"
3518 "xorl (%%rdi), %%eax\n\t"
3519 "subq %5, %%rdi\n\t"
3520# else
3521 "lea -4(%%edi), %%edi\n\t"
3522 "xorl (%%edi), %%eax\n\t"
3523 "subl %5, %%edi\n\t"
3524# endif
3525 "shll $3, %%edi\n\t"
3526 "bsfl %%eax, %%edx\n\t"
3527 "addl %%edi, %%edx\n\t"
3528 "1:\t\n"
3529 : "=d" (iBit),
3530 "=&c" (uECX),
3531 "=&D" (uEDI),
3532 "=&a" (uEAX)
3533 : "0" (0xffffffff),
3534 "mr" (pvBitmap),
3535 "1" (cBits >> 5),
3536 "2" (pvBitmap),
3537 "3" (0xffffffff));
3538# else
3539 cBits = RT_ALIGN_32(cBits, 32);
3540 __asm
3541 {
3542# ifdef RT_ARCH_AMD64
3543 mov rdi, [pvBitmap]
3544 mov rbx, rdi
3545# else
3546 mov edi, [pvBitmap]
3547 mov ebx, edi
3548# endif
3549 mov edx, 0ffffffffh
3550 mov eax, edx
3551 mov ecx, [cBits]
3552 shr ecx, 5
3553 repe scasd
3554 je done
3555
3556# ifdef RT_ARCH_AMD64
3557 lea rdi, [rdi - 4]
3558 xor eax, [rdi]
3559 sub rdi, rbx
3560# else
3561 lea edi, [edi - 4]
3562 xor eax, [edi]
3563 sub edi, ebx
3564# endif
3565 shl edi, 3
3566 bsf edx, eax
3567 add edx, edi
3568 done:
3569 mov [iBit], edx
3570 }
3571# endif
3572 return iBit;
3573 }
3574 return -1;
3575}
3576#endif
3577
3578
3579/**
3580 * Finds the next clear bit in a bitmap.
3581 *
3582 * @returns Index of the first zero bit.
3583 * @returns -1 if no clear bit was found.
3584 * @param pvBitmap Pointer to the bitmap.
3585 * @param cBits The number of bits in the bitmap. Multiple of 32.
3586 * @param iBitPrev The bit returned from the last search.
3587 * The search will start at iBitPrev + 1.
3588 */
3589#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3590DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3591#else
3592DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3593{
3594 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
3595 int iBit = ++iBitPrev & 31;
3596 if (iBit)
3597 {
3598 /*
3599 * Inspect the 32-bit word containing the unaligned bit.
3600 */
3601 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
3602
3603# if RT_INLINE_ASM_USES_INTRIN
3604 unsigned long ulBit = 0;
3605 if (_BitScanForward(&ulBit, u32))
3606 return ulBit + iBitPrev;
3607# else
3608# if RT_INLINE_ASM_GNU_STYLE
3609 __asm__ __volatile__("bsf %1, %0\n\t"
3610 "jnz 1f\n\t"
3611 "movl $-1, %0\n\t"
3612 "1:\n\t"
3613 : "=r" (iBit)
3614 : "r" (u32));
3615# else
3616 __asm
3617 {
3618 mov edx, [u32]
3619 bsf eax, edx
3620 jnz done
3621 mov eax, 0ffffffffh
3622 done:
3623 mov [iBit], eax
3624 }
3625# endif
3626 if (iBit >= 0)
3627 return iBit + iBitPrev;
3628# endif
3629
3630 /*
3631 * Skip ahead and see if there is anything left to search.
3632 */
3633 iBitPrev |= 31;
3634 iBitPrev++;
3635 if (cBits <= (uint32_t)iBitPrev)
3636 return -1;
3637 }
3638
3639 /*
3640 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
3641 */
3642 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
3643 if (iBit >= 0)
3644 iBit += iBitPrev;
3645 return iBit;
3646}
3647#endif
3648
3649
3650/**
3651 * Finds the first set bit in a bitmap.
3652 *
3653 * @returns Index of the first set bit.
3654 * @returns -1 if no clear bit was found.
3655 * @param pvBitmap Pointer to the bitmap.
3656 * @param cBits The number of bits in the bitmap. Multiple of 32.
3657 */
3658#if RT_INLINE_ASM_EXTERNAL
3659DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
3660#else
3661DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
3662{
3663 if (cBits)
3664 {
3665 int32_t iBit;
3666# if RT_INLINE_ASM_GNU_STYLE
3667 RTCCUINTREG uEAX, uECX, uEDI;
3668 cBits = RT_ALIGN_32(cBits, 32);
3669 __asm__ __volatile__("repe; scasl\n\t"
3670 "je 1f\n\t"
3671# ifdef RT_ARCH_AMD64
3672 "lea -4(%%rdi), %%rdi\n\t"
3673 "movl (%%rdi), %%eax\n\t"
3674 "subq %5, %%rdi\n\t"
3675# else
3676 "lea -4(%%edi), %%edi\n\t"
3677 "movl (%%edi), %%eax\n\t"
3678 "subl %5, %%edi\n\t"
3679# endif
3680 "shll $3, %%edi\n\t"
3681 "bsfl %%eax, %%edx\n\t"
3682 "addl %%edi, %%edx\n\t"
3683 "1:\t\n"
3684 : "=d" (iBit),
3685 "=&c" (uECX),
3686 "=&D" (uEDI),
3687 "=&a" (uEAX)
3688 : "0" (0xffffffff),
3689 "mr" (pvBitmap),
3690 "1" (cBits >> 5),
3691 "2" (pvBitmap),
3692 "3" (0));
3693# else
3694 cBits = RT_ALIGN_32(cBits, 32);
3695 __asm
3696 {
3697# ifdef RT_ARCH_AMD64
3698 mov rdi, [pvBitmap]
3699 mov rbx, rdi
3700# else
3701 mov edi, [pvBitmap]
3702 mov ebx, edi
3703# endif
3704 mov edx, 0ffffffffh
3705 xor eax, eax
3706 mov ecx, [cBits]
3707 shr ecx, 5
3708 repe scasd
3709 je done
3710# ifdef RT_ARCH_AMD64
3711 lea rdi, [rdi - 4]
3712 mov eax, [rdi]
3713 sub rdi, rbx
3714# else
3715 lea edi, [edi - 4]
3716 mov eax, [edi]
3717 sub edi, ebx
3718# endif
3719 shl edi, 3
3720 bsf edx, eax
3721 add edx, edi
3722 done:
3723 mov [iBit], edx
3724 }
3725# endif
3726 return iBit;
3727 }
3728 return -1;
3729}
3730#endif
3731
3732
3733/**
3734 * Finds the next set bit in a bitmap.
3735 *
3736 * @returns Index of the next set bit.
3737 * @returns -1 if no set bit was found.
3738 * @param pvBitmap Pointer to the bitmap.
3739 * @param cBits The number of bits in the bitmap. Multiple of 32.
3740 * @param iBitPrev The bit returned from the last search.
3741 * The search will start at iBitPrev + 1.
3742 */
3743#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3744DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3745#else
3746DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3747{
3748 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
3749 int iBit = ++iBitPrev & 31;
3750 if (iBit)
3751 {
3752 /*
3753 * Inspect the 32-bit word containing the unaligned bit.
3754 */
3755 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
3756
3757# if RT_INLINE_ASM_USES_INTRIN
3758 unsigned long ulBit = 0;
3759 if (_BitScanForward(&ulBit, u32))
3760 return ulBit + iBitPrev;
3761# else
3762# if RT_INLINE_ASM_GNU_STYLE
3763 __asm__ __volatile__("bsf %1, %0\n\t"
3764 "jnz 1f\n\t"
3765 "movl $-1, %0\n\t"
3766 "1:\n\t"
3767 : "=r" (iBit)
3768 : "r" (u32));
3769# else
3770 __asm
3771 {
3772 mov edx, [u32]
3773 bsf eax, edx
3774 jnz done
3775 mov eax, 0ffffffffh
3776 done:
3777 mov [iBit], eax
3778 }
3779# endif
3780 if (iBit >= 0)
3781 return iBit + iBitPrev;
3782# endif
3783
3784 /*
3785 * Skip ahead and see if there is anything left to search.
3786 */
3787 iBitPrev |= 31;
3788 iBitPrev++;
3789 if (cBits <= (uint32_t)iBitPrev)
3790 return -1;
3791 }
3792
3793 /*
3794 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
3795 */
3796 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
3797 if (iBit >= 0)
3798 iBit += iBitPrev;
3799 return iBit;
3800}
3801#endif
3802
3803
3804/**
3805 * Finds the first bit which is set in the given 32-bit integer.
3806 * Bits are numbered from 1 (least significant) to 32.
3807 *
3808 * @returns index [1..32] of the first set bit.
3809 * @returns 0 if all bits are cleared.
3810 * @param u32 Integer to search for set bits.
3811 * @remark Similar to ffs() in BSD.
3812 */
3813#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3814DECLASM(unsigned) ASMBitFirstSetU32(uint32_t u32);
3815#else
3816DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
3817{
3818# if RT_INLINE_ASM_USES_INTRIN
3819 unsigned long iBit;
3820 if (_BitScanForward(&iBit, u32))
3821 iBit++;
3822 else
3823 iBit = 0;
3824# elif RT_INLINE_ASM_GNU_STYLE
3825 uint32_t iBit;
3826 __asm__ __volatile__("bsf %1, %0\n\t"
3827 "jnz 1f\n\t"
3828 "xorl %0, %0\n\t"
3829 "jmp 2f\n"
3830 "1:\n\t"
3831 "incl %0\n"
3832 "2:\n\t"
3833 : "=r" (iBit)
3834 : "rm" (u32));
3835# else
3836 uint32_t iBit;
3837 _asm
3838 {
3839 bsf eax, [u32]
3840 jnz found
3841 xor eax, eax
3842 jmp done
3843 found:
3844 inc eax
3845 done:
3846 mov [iBit], eax
3847 }
3848# endif
3849 return iBit;
3850}
3851#endif
3852
3853
3854/**
3855 * Finds the first bit which is set in the given 32-bit integer.
3856 * Bits are numbered from 1 (least significant) to 32.
3857 *
3858 * @returns index [1..32] of the first set bit.
3859 * @returns 0 if all bits are cleared.
3860 * @param i32 Integer to search for set bits.
3861 * @remark Similar to ffs() in BSD.
3862 */
3863DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
3864{
3865 return ASMBitFirstSetU32((uint32_t)i32);
3866}
3867
3868
3869/**
3870 * Finds the last bit which is set in the given 32-bit integer.
3871 * Bits are numbered from 1 (least significant) to 32.
3872 *
3873 * @returns index [1..32] of the last set bit.
3874 * @returns 0 if all bits are cleared.
3875 * @param u32 Integer to search for set bits.
3876 * @remark Similar to fls() in BSD.
3877 */
3878#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3879DECLASM(unsigned) ASMBitLastSetU32(uint32_t u32);
3880#else
3881DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
3882{
3883# if RT_INLINE_ASM_USES_INTRIN
3884 unsigned long iBit;
3885 if (_BitScanReverse(&iBit, u32))
3886 iBit++;
3887 else
3888 iBit = 0;
3889# elif RT_INLINE_ASM_GNU_STYLE
3890 uint32_t iBit;
3891 __asm__ __volatile__("bsrl %1, %0\n\t"
3892 "jnz 1f\n\t"
3893 "xorl %0, %0\n\t"
3894 "jmp 2f\n"
3895 "1:\n\t"
3896 "incl %0\n"
3897 "2:\n\t"
3898 : "=r" (iBit)
3899 : "rm" (u32));
3900# else
3901 uint32_t iBit;
3902 _asm
3903 {
3904 bsr eax, [u32]
3905 jnz found
3906 xor eax, eax
3907 jmp done
3908 found:
3909 inc eax
3910 done:
3911 mov [iBit], eax
3912 }
3913# endif
3914 return iBit;
3915}
3916#endif
3917
3918
3919/**
3920 * Finds the last bit which is set in the given 32-bit integer.
3921 * Bits are numbered from 1 (least significant) to 32.
3922 *
3923 * @returns index [1..32] of the last set bit.
3924 * @returns 0 if all bits are cleared.
3925 * @param i32 Integer to search for set bits.
3926 * @remark Similar to fls() in BSD.
3927 */
3928DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
3929{
3930 return ASMBitLastSetU32((uint32_t)i32);
3931}
3932
3933/**
3934 * Reverse the byte order of the given 16-bit integer.
3935 *
3936 * @returns Revert
3937 * @param u16 16-bit integer value.
3938 */
3939#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3940DECLASM(uint16_t) ASMByteSwapU16(uint16_t u16);
3941#else
3942DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
3943{
3944# if RT_INLINE_ASM_USES_INTRIN
3945 u16 = _byteswap_ushort(u16);
3946# elif RT_INLINE_ASM_GNU_STYLE
3947 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
3948# else
3949 _asm
3950 {
3951 mov ax, [u16]
3952 ror ax, 8
3953 mov [u16], ax
3954 }
3955# endif
3956 return u16;
3957}
3958#endif
3959
3960
3961/**
3962 * Reverse the byte order of the given 32-bit integer.
3963 *
3964 * @returns Revert
3965 * @param u32 32-bit integer value.
3966 */
3967#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3968DECLASM(uint32_t) ASMByteSwapU32(uint32_t u32);
3969#else
3970DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
3971{
3972# if RT_INLINE_ASM_USES_INTRIN
3973 u32 = _byteswap_ulong(u32);
3974# elif RT_INLINE_ASM_GNU_STYLE
3975 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
3976# else
3977 _asm
3978 {
3979 mov eax, [u32]
3980 bswap eax
3981 mov [u32], eax
3982 }
3983# endif
3984 return u32;
3985}
3986#endif
3987
3988
3989/**
3990 * Reverse the byte order of the given 64-bit integer.
3991 *
3992 * @returns Revert
3993 * @param u64 64-bit integer value.
3994 */
3995DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
3996{
3997#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
3998 u64 = _byteswap_uint64(u64);
3999#else
4000 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
4001 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
4002#endif
4003 return u64;
4004}
4005
4006
4007/** @} */
4008
4009
4010/** @} */
4011
4012#if 0 /* fallback if stuff does not work right. */
4013# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4014# include <iprt/asm-amd64-x86.h>
4015# endif
4016# include <iprt/asm-math.h>
4017#endif
4018
4019#endif
4020
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette