VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 7103

Last change on this file since 7103 was 7103, checked in by vboxsync, 17 years ago

backed out r28333 as it didn't fix anything

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 130.0 KB
Line 
1/** @file
2 * innotek Portable Runtime - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 innotek GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @todo #include <iprt/param.h> for PAGE_SIZE. */
33/** @def RT_INLINE_ASM_USES_INTRIN
34 * Defined as 1 if we're using a _MSC_VER 1400.
35 * Otherwise defined as 0.
36 */
37
38#ifdef _MSC_VER
39# if _MSC_VER >= 1400
40# define RT_INLINE_ASM_USES_INTRIN 1
41# include <intrin.h>
42 /* Emit the intrinsics at all optimization levels. */
43# pragma intrinsic(_ReadWriteBarrier)
44# pragma intrinsic(__cpuid)
45# pragma intrinsic(_enable)
46# pragma intrinsic(_disable)
47# pragma intrinsic(__rdtsc)
48# pragma intrinsic(__readmsr)
49# pragma intrinsic(__writemsr)
50# pragma intrinsic(__outbyte)
51# pragma intrinsic(__outword)
52# pragma intrinsic(__outdword)
53# pragma intrinsic(__inbyte)
54# pragma intrinsic(__inword)
55# pragma intrinsic(__indword)
56# pragma intrinsic(__invlpg)
57# pragma intrinsic(__stosd)
58# pragma intrinsic(__stosw)
59# pragma intrinsic(__stosb)
60# pragma intrinsic(__readcr0)
61# pragma intrinsic(__readcr2)
62# pragma intrinsic(__readcr3)
63# pragma intrinsic(__readcr4)
64# pragma intrinsic(__writecr0)
65# pragma intrinsic(__writecr3)
66# pragma intrinsic(__writecr4)
67# pragma intrinsic(_BitScanForward)
68# pragma intrinsic(_BitScanReverse)
69# pragma intrinsic(_bittest)
70# pragma intrinsic(_bittestandset)
71# pragma intrinsic(_bittestandreset)
72# pragma intrinsic(_bittestandcomplement)
73# pragma intrinsic(_byteswap_ushort)
74# pragma intrinsic(_byteswap_ulong)
75# pragma intrinsic(_interlockedbittestandset)
76# pragma intrinsic(_interlockedbittestandreset)
77# pragma intrinsic(_InterlockedAnd)
78# pragma intrinsic(_InterlockedOr)
79# pragma intrinsic(_InterlockedIncrement)
80# pragma intrinsic(_InterlockedDecrement)
81# pragma intrinsic(_InterlockedExchange)
82# pragma intrinsic(_InterlockedCompareExchange)
83# pragma intrinsic(_InterlockedCompareExchange64)
84# ifdef RT_ARCH_AMD64
85# pragma intrinsic(__stosq)
86# pragma intrinsic(__readcr8)
87# pragma intrinsic(__writecr8)
88# pragma intrinsic(_byteswap_uint64)
89# pragma intrinsic(_InterlockedExchange64)
90# endif
91# endif
92#endif
93#ifndef RT_INLINE_ASM_USES_INTRIN
94# define RT_INLINE_ASM_USES_INTRIN 0
95#endif
96
97
98
99/** @defgroup grp_asm ASM - Assembly Routines
100 * @ingroup grp_rt
101 *
102 * @remarks The difference between ordered and unordered atomic operations are that
103 * the former will complete outstanding reads and writes before continuing
104 * while the latter doesn't make any promisses about the order. Ordered
105 * operations doesn't, it seems, make any 100% promise wrt to whether
106 * the operation will complete before any subsequent memory access.
107 * (please, correct if wrong.)
108 *
109 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
110 * are unordered (note the Uo).
111 *
112 * @{
113 */
114
115/** @def RT_INLINE_ASM_EXTERNAL
116 * Defined as 1 if the compiler does not support inline assembly.
117 * The ASM* functions will then be implemented in an external .asm file.
118 *
119 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
120 * inline assmebly in their AMD64 compiler.
121 */
122#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
123# define RT_INLINE_ASM_EXTERNAL 1
124#else
125# define RT_INLINE_ASM_EXTERNAL 0
126#endif
127
128/** @def RT_INLINE_ASM_GNU_STYLE
129 * Defined as 1 if the compiler understand GNU style inline assembly.
130 */
131#if defined(_MSC_VER)
132# define RT_INLINE_ASM_GNU_STYLE 0
133#else
134# define RT_INLINE_ASM_GNU_STYLE 1
135#endif
136
137
138/** @todo find a more proper place for this structure? */
139#pragma pack(1)
140/** IDTR */
141typedef struct RTIDTR
142{
143 /** Size of the IDT. */
144 uint16_t cbIdt;
145 /** Address of the IDT. */
146 uintptr_t pIdt;
147} RTIDTR, *PRTIDTR;
148#pragma pack()
149
150#pragma pack(1)
151/** GDTR */
152typedef struct RTGDTR
153{
154 /** Size of the GDT. */
155 uint16_t cbGdt;
156 /** Address of the GDT. */
157 uintptr_t pGdt;
158} RTGDTR, *PRTGDTR;
159#pragma pack()
160
161
162/** @def ASMReturnAddress
163 * Gets the return address of the current (or calling if you like) function or method.
164 */
165#ifdef _MSC_VER
166# ifdef __cplusplus
167extern "C"
168# endif
169void * _ReturnAddress(void);
170# pragma intrinsic(_ReturnAddress)
171# define ASMReturnAddress() _ReturnAddress()
172#elif defined(__GNUC__) || defined(__DOXYGEN__)
173# define ASMReturnAddress() __builtin_return_address(0)
174#else
175# error "Unsupported compiler."
176#endif
177
178
179/**
180 * Gets the content of the IDTR CPU register.
181 * @param pIdtr Where to store the IDTR contents.
182 */
183#if RT_INLINE_ASM_EXTERNAL
184DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
185#else
186DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
187{
188# if RT_INLINE_ASM_GNU_STYLE
189 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
190# else
191 __asm
192 {
193# ifdef RT_ARCH_AMD64
194 mov rax, [pIdtr]
195 sidt [rax]
196# else
197 mov eax, [pIdtr]
198 sidt [eax]
199# endif
200 }
201# endif
202}
203#endif
204
205
206/**
207 * Sets the content of the IDTR CPU register.
208 * @param pIdtr Where to load the IDTR contents from
209 */
210#if RT_INLINE_ASM_EXTERNAL
211DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
212#else
213DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
214{
215# if RT_INLINE_ASM_GNU_STYLE
216 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
217# else
218 __asm
219 {
220# ifdef RT_ARCH_AMD64
221 mov rax, [pIdtr]
222 lidt [rax]
223# else
224 mov eax, [pIdtr]
225 lidt [eax]
226# endif
227 }
228# endif
229}
230#endif
231
232
233/**
234 * Gets the content of the GDTR CPU register.
235 * @param pGdtr Where to store the GDTR contents.
236 */
237#if RT_INLINE_ASM_EXTERNAL
238DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
239#else
240DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
241{
242# if RT_INLINE_ASM_GNU_STYLE
243 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
244# else
245 __asm
246 {
247# ifdef RT_ARCH_AMD64
248 mov rax, [pGdtr]
249 sgdt [rax]
250# else
251 mov eax, [pGdtr]
252 sgdt [eax]
253# endif
254 }
255# endif
256}
257#endif
258
259/**
260 * Get the cs register.
261 * @returns cs.
262 */
263#if RT_INLINE_ASM_EXTERNAL
264DECLASM(RTSEL) ASMGetCS(void);
265#else
266DECLINLINE(RTSEL) ASMGetCS(void)
267{
268 RTSEL SelCS;
269# if RT_INLINE_ASM_GNU_STYLE
270 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
271# else
272 __asm
273 {
274 mov ax, cs
275 mov [SelCS], ax
276 }
277# endif
278 return SelCS;
279}
280#endif
281
282
283/**
284 * Get the DS register.
285 * @returns DS.
286 */
287#if RT_INLINE_ASM_EXTERNAL
288DECLASM(RTSEL) ASMGetDS(void);
289#else
290DECLINLINE(RTSEL) ASMGetDS(void)
291{
292 RTSEL SelDS;
293# if RT_INLINE_ASM_GNU_STYLE
294 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
295# else
296 __asm
297 {
298 mov ax, ds
299 mov [SelDS], ax
300 }
301# endif
302 return SelDS;
303}
304#endif
305
306
307/**
308 * Get the ES register.
309 * @returns ES.
310 */
311#if RT_INLINE_ASM_EXTERNAL
312DECLASM(RTSEL) ASMGetES(void);
313#else
314DECLINLINE(RTSEL) ASMGetES(void)
315{
316 RTSEL SelES;
317# if RT_INLINE_ASM_GNU_STYLE
318 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
319# else
320 __asm
321 {
322 mov ax, es
323 mov [SelES], ax
324 }
325# endif
326 return SelES;
327}
328#endif
329
330
331/**
332 * Get the FS register.
333 * @returns FS.
334 */
335#if RT_INLINE_ASM_EXTERNAL
336DECLASM(RTSEL) ASMGetFS(void);
337#else
338DECLINLINE(RTSEL) ASMGetFS(void)
339{
340 RTSEL SelFS;
341# if RT_INLINE_ASM_GNU_STYLE
342 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
343# else
344 __asm
345 {
346 mov ax, fs
347 mov [SelFS], ax
348 }
349# endif
350 return SelFS;
351}
352# endif
353
354
355/**
356 * Get the GS register.
357 * @returns GS.
358 */
359#if RT_INLINE_ASM_EXTERNAL
360DECLASM(RTSEL) ASMGetGS(void);
361#else
362DECLINLINE(RTSEL) ASMGetGS(void)
363{
364 RTSEL SelGS;
365# if RT_INLINE_ASM_GNU_STYLE
366 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
367# else
368 __asm
369 {
370 mov ax, gs
371 mov [SelGS], ax
372 }
373# endif
374 return SelGS;
375}
376#endif
377
378
379/**
380 * Get the SS register.
381 * @returns SS.
382 */
383#if RT_INLINE_ASM_EXTERNAL
384DECLASM(RTSEL) ASMGetSS(void);
385#else
386DECLINLINE(RTSEL) ASMGetSS(void)
387{
388 RTSEL SelSS;
389# if RT_INLINE_ASM_GNU_STYLE
390 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
391# else
392 __asm
393 {
394 mov ax, ss
395 mov [SelSS], ax
396 }
397# endif
398 return SelSS;
399}
400#endif
401
402
403/**
404 * Get the TR register.
405 * @returns TR.
406 */
407#if RT_INLINE_ASM_EXTERNAL
408DECLASM(RTSEL) ASMGetTR(void);
409#else
410DECLINLINE(RTSEL) ASMGetTR(void)
411{
412 RTSEL SelTR;
413# if RT_INLINE_ASM_GNU_STYLE
414 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
415# else
416 __asm
417 {
418 str ax
419 mov [SelTR], ax
420 }
421# endif
422 return SelTR;
423}
424#endif
425
426
427/**
428 * Get the [RE]FLAGS register.
429 * @returns [RE]FLAGS.
430 */
431#if RT_INLINE_ASM_EXTERNAL
432DECLASM(RTCCUINTREG) ASMGetFlags(void);
433#else
434DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
435{
436 RTCCUINTREG uFlags;
437# if RT_INLINE_ASM_GNU_STYLE
438# ifdef RT_ARCH_AMD64
439 __asm__ __volatile__("pushfq\n\t"
440 "popq %0\n\t"
441 : "=g" (uFlags));
442# else
443 __asm__ __volatile__("pushfl\n\t"
444 "popl %0\n\t"
445 : "=g" (uFlags));
446# endif
447# else
448 __asm
449 {
450# ifdef RT_ARCH_AMD64
451 pushfq
452 pop [uFlags]
453# else
454 pushfd
455 pop [uFlags]
456# endif
457 }
458# endif
459 return uFlags;
460}
461#endif
462
463
464/**
465 * Set the [RE]FLAGS register.
466 * @param uFlags The new [RE]FLAGS value.
467 */
468#if RT_INLINE_ASM_EXTERNAL
469DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
470#else
471DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
472{
473# if RT_INLINE_ASM_GNU_STYLE
474# ifdef RT_ARCH_AMD64
475 __asm__ __volatile__("pushq %0\n\t"
476 "popfq\n\t"
477 : : "g" (uFlags));
478# else
479 __asm__ __volatile__("pushl %0\n\t"
480 "popfl\n\t"
481 : : "g" (uFlags));
482# endif
483# else
484 __asm
485 {
486# ifdef RT_ARCH_AMD64
487 push [uFlags]
488 popfq
489# else
490 push [uFlags]
491 popfd
492# endif
493 }
494# endif
495}
496#endif
497
498
499/**
500 * Gets the content of the CPU timestamp counter register.
501 *
502 * @returns TSC.
503 */
504#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
505DECLASM(uint64_t) ASMReadTSC(void);
506#else
507DECLINLINE(uint64_t) ASMReadTSC(void)
508{
509 RTUINT64U u;
510# if RT_INLINE_ASM_GNU_STYLE
511 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
512# else
513# if RT_INLINE_ASM_USES_INTRIN
514 u.u = __rdtsc();
515# else
516 __asm
517 {
518 rdtsc
519 mov [u.s.Lo], eax
520 mov [u.s.Hi], edx
521 }
522# endif
523# endif
524 return u.u;
525}
526#endif
527
528
529/**
530 * Performs the cpuid instruction returning all registers.
531 *
532 * @param uOperator CPUID operation (eax).
533 * @param pvEAX Where to store eax.
534 * @param pvEBX Where to store ebx.
535 * @param pvECX Where to store ecx.
536 * @param pvEDX Where to store edx.
537 * @remark We're using void pointers to ease the use of special bitfield structures and such.
538 */
539#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
540DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
541#else
542DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
543{
544# if RT_INLINE_ASM_GNU_STYLE
545# ifdef RT_ARCH_AMD64
546 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
547 __asm__ ("cpuid\n\t"
548 : "=a" (uRAX),
549 "=b" (uRBX),
550 "=c" (uRCX),
551 "=d" (uRDX)
552 : "0" (uOperator));
553 *(uint32_t *)pvEAX = (uint32_t)uRAX;
554 *(uint32_t *)pvEBX = (uint32_t)uRBX;
555 *(uint32_t *)pvECX = (uint32_t)uRCX;
556 *(uint32_t *)pvEDX = (uint32_t)uRDX;
557# else
558 __asm__ ("xchgl %%ebx, %1\n\t"
559 "cpuid\n\t"
560 "xchgl %%ebx, %1\n\t"
561 : "=a" (*(uint32_t *)pvEAX),
562 "=r" (*(uint32_t *)pvEBX),
563 "=c" (*(uint32_t *)pvECX),
564 "=d" (*(uint32_t *)pvEDX)
565 : "0" (uOperator));
566# endif
567
568# elif RT_INLINE_ASM_USES_INTRIN
569 int aInfo[4];
570 __cpuid(aInfo, uOperator);
571 *(uint32_t *)pvEAX = aInfo[0];
572 *(uint32_t *)pvEBX = aInfo[1];
573 *(uint32_t *)pvECX = aInfo[2];
574 *(uint32_t *)pvEDX = aInfo[3];
575
576# else
577 uint32_t uEAX;
578 uint32_t uEBX;
579 uint32_t uECX;
580 uint32_t uEDX;
581 __asm
582 {
583 push ebx
584 mov eax, [uOperator]
585 cpuid
586 mov [uEAX], eax
587 mov [uEBX], ebx
588 mov [uECX], ecx
589 mov [uEDX], edx
590 pop ebx
591 }
592 *(uint32_t *)pvEAX = uEAX;
593 *(uint32_t *)pvEBX = uEBX;
594 *(uint32_t *)pvECX = uECX;
595 *(uint32_t *)pvEDX = uEDX;
596# endif
597}
598#endif
599
600
601/**
602 * Performs the cpuid instruction returning all registers.
603 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
604 *
605 * @param uOperator CPUID operation (eax).
606 * @param uIdxECX ecx index
607 * @param pvEAX Where to store eax.
608 * @param pvEBX Where to store ebx.
609 * @param pvECX Where to store ecx.
610 * @param pvEDX Where to store edx.
611 * @remark We're using void pointers to ease the use of special bitfield structures and such.
612 */
613#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
614DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
615#else
616DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
617{
618# if RT_INLINE_ASM_GNU_STYLE
619# ifdef RT_ARCH_AMD64
620 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
621 __asm__ ("cpuid\n\t"
622 : "=a" (uRAX),
623 "=b" (uRBX),
624 "=c" (uRCX),
625 "=d" (uRDX)
626 : "0" (uOperator),
627 "2" (uIdxECX));
628 *(uint32_t *)pvEAX = (uint32_t)uRAX;
629 *(uint32_t *)pvEBX = (uint32_t)uRBX;
630 *(uint32_t *)pvECX = (uint32_t)uRCX;
631 *(uint32_t *)pvEDX = (uint32_t)uRDX;
632# else
633 __asm__ ("xchgl %%ebx, %1\n\t"
634 "cpuid\n\t"
635 "xchgl %%ebx, %1\n\t"
636 : "=a" (*(uint32_t *)pvEAX),
637 "=r" (*(uint32_t *)pvEBX),
638 "=c" (*(uint32_t *)pvECX),
639 "=d" (*(uint32_t *)pvEDX)
640 : "0" (uOperator),
641 "2" (uIdxECX));
642# endif
643
644# elif RT_INLINE_ASM_USES_INTRIN
645 int aInfo[4];
646 /* ??? another intrinsic ??? */
647 __cpuid(aInfo, uOperator);
648 *(uint32_t *)pvEAX = aInfo[0];
649 *(uint32_t *)pvEBX = aInfo[1];
650 *(uint32_t *)pvECX = aInfo[2];
651 *(uint32_t *)pvEDX = aInfo[3];
652
653# else
654 uint32_t uEAX;
655 uint32_t uEBX;
656 uint32_t uECX;
657 uint32_t uEDX;
658 __asm
659 {
660 push ebx
661 mov eax, [uOperator]
662 mov ecx, [uIdxECX]
663 cpuid
664 mov [uEAX], eax
665 mov [uEBX], ebx
666 mov [uECX], ecx
667 mov [uEDX], edx
668 pop ebx
669 }
670 *(uint32_t *)pvEAX = uEAX;
671 *(uint32_t *)pvEBX = uEBX;
672 *(uint32_t *)pvECX = uECX;
673 *(uint32_t *)pvEDX = uEDX;
674# endif
675}
676#endif
677
678
679/**
680 * Performs the cpuid instruction returning ecx and edx.
681 *
682 * @param uOperator CPUID operation (eax).
683 * @param pvECX Where to store ecx.
684 * @param pvEDX Where to store edx.
685 * @remark We're using void pointers to ease the use of special bitfield structures and such.
686 */
687#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
688DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
689#else
690DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
691{
692 uint32_t uEBX;
693 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
694}
695#endif
696
697
698/**
699 * Performs the cpuid instruction returning edx.
700 *
701 * @param uOperator CPUID operation (eax).
702 * @returns EDX after cpuid operation.
703 */
704#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
705DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
706#else
707DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
708{
709 RTCCUINTREG xDX;
710# if RT_INLINE_ASM_GNU_STYLE
711# ifdef RT_ARCH_AMD64
712 RTCCUINTREG uSpill;
713 __asm__ ("cpuid"
714 : "=a" (uSpill),
715 "=d" (xDX)
716 : "0" (uOperator)
717 : "rbx", "rcx");
718# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: PIC by default. */
719 __asm__ ("push %%ebx\n\t"
720 "cpuid\n\t"
721 "pop %%ebx\n\t"
722 : "=a" (uOperator),
723 "=d" (xDX)
724 : "0" (uOperator)
725 : "ecx");
726# else
727 __asm__ ("cpuid"
728 : "=a" (uOperator),
729 "=d" (xDX)
730 : "0" (uOperator)
731 : "ebx", "ecx");
732# endif
733
734# elif RT_INLINE_ASM_USES_INTRIN
735 int aInfo[4];
736 __cpuid(aInfo, uOperator);
737 xDX = aInfo[3];
738
739# else
740 __asm
741 {
742 push ebx
743 mov eax, [uOperator]
744 cpuid
745 mov [xDX], edx
746 pop ebx
747 }
748# endif
749 return (uint32_t)xDX;
750}
751#endif
752
753
754/**
755 * Performs the cpuid instruction returning ecx.
756 *
757 * @param uOperator CPUID operation (eax).
758 * @returns ECX after cpuid operation.
759 */
760#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
761DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
762#else
763DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
764{
765 RTCCUINTREG xCX;
766# if RT_INLINE_ASM_GNU_STYLE
767# ifdef RT_ARCH_AMD64
768 RTCCUINTREG uSpill;
769 __asm__ ("cpuid"
770 : "=a" (uSpill),
771 "=c" (xCX)
772 : "0" (uOperator)
773 : "rbx", "rdx");
774# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
775 __asm__ ("push %%ebx\n\t"
776 "cpuid\n\t"
777 "pop %%ebx\n\t"
778 : "=a" (uOperator),
779 "=c" (xCX)
780 : "0" (uOperator)
781 : "edx");
782# else
783 __asm__ ("cpuid"
784 : "=a" (uOperator),
785 "=c" (xCX)
786 : "0" (uOperator)
787 : "ebx", "edx");
788
789# endif
790
791# elif RT_INLINE_ASM_USES_INTRIN
792 int aInfo[4];
793 __cpuid(aInfo, uOperator);
794 xCX = aInfo[2];
795
796# else
797 __asm
798 {
799 push ebx
800 mov eax, [uOperator]
801 cpuid
802 mov [xCX], ecx
803 pop ebx
804 }
805# endif
806 return (uint32_t)xCX;
807}
808#endif
809
810
811/**
812 * Checks if the current CPU supports CPUID.
813 *
814 * @returns true if CPUID is supported.
815 */
816DECLINLINE(bool) ASMHasCpuId(void)
817{
818#ifdef RT_ARCH_AMD64
819 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
820#else /* !RT_ARCH_AMD64 */
821 bool fRet = false;
822# if RT_INLINE_ASM_GNU_STYLE
823 uint32_t u1;
824 uint32_t u2;
825 __asm__ ("pushf\n\t"
826 "pop %1\n\t"
827 "mov %1, %2\n\t"
828 "xorl $0x200000, %1\n\t"
829 "push %1\n\t"
830 "popf\n\t"
831 "pushf\n\t"
832 "pop %1\n\t"
833 "cmpl %1, %2\n\t"
834 "setne %0\n\t"
835 "push %2\n\t"
836 "popf\n\t"
837 : "=m" (fRet), "=r" (u1), "=r" (u2));
838# else
839 __asm
840 {
841 pushfd
842 pop eax
843 mov ebx, eax
844 xor eax, 0200000h
845 push eax
846 popfd
847 pushfd
848 pop eax
849 cmp eax, ebx
850 setne fRet
851 push ebx
852 popfd
853 }
854# endif
855 return fRet;
856#endif /* !RT_ARCH_AMD64 */
857}
858
859
860/**
861 * Gets the APIC ID of the current CPU.
862 *
863 * @returns the APIC ID.
864 */
865#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
866DECLASM(uint8_t) ASMGetApicId(void);
867#else
868DECLINLINE(uint8_t) ASMGetApicId(void)
869{
870 RTCCUINTREG xBX;
871# if RT_INLINE_ASM_GNU_STYLE
872# ifdef RT_ARCH_AMD64
873 RTCCUINTREG uSpill;
874 __asm__ ("cpuid"
875 : "=a" (uSpill),
876 "=b" (xBX)
877 : "0" (1)
878 : "rcx", "rdx");
879# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__)
880 RTCCUINTREG uSpill;
881 __asm__ ("mov %%ebx,%1\n\t"
882 "cpuid\n\t"
883 "xchgl %%ebx,%1\n\t"
884 : "=a" (uSpill),
885 "=r" (xBX)
886 : "0" (1)
887 : "ecx", "edx");
888# else
889 RTCCUINTREG uSpill;
890 __asm__ ("cpuid"
891 : "=a" (uSpill),
892 "=b" (xBX)
893 : "0" (1)
894 : "ecx", "edx");
895# endif
896
897# elif RT_INLINE_ASM_USES_INTRIN
898 int aInfo[4];
899 __cpuid(aInfo, 1);
900 xBX = aInfo[1];
901
902# else
903 __asm
904 {
905 push ebx
906 mov eax, 1
907 cpuid
908 mov [xBX], ebx
909 pop ebx
910 }
911# endif
912 return (uint8_t)(xBX >> 24);
913}
914#endif
915
916/**
917 * Get cr0.
918 * @returns cr0.
919 */
920#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
921DECLASM(RTCCUINTREG) ASMGetCR0(void);
922#else
923DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
924{
925 RTCCUINTREG uCR0;
926# if RT_INLINE_ASM_USES_INTRIN
927 uCR0 = __readcr0();
928
929# elif RT_INLINE_ASM_GNU_STYLE
930# ifdef RT_ARCH_AMD64
931 __asm__ ("movq %%cr0, %0\t\n" : "=r" (uCR0));
932# else
933 __asm__ ("movl %%cr0, %0\t\n" : "=r" (uCR0));
934# endif
935# else
936 __asm
937 {
938# ifdef RT_ARCH_AMD64
939 mov rax, cr0
940 mov [uCR0], rax
941# else
942 mov eax, cr0
943 mov [uCR0], eax
944# endif
945 }
946# endif
947 return uCR0;
948}
949#endif
950
951
952/**
953 * Sets the CR0 register.
954 * @param uCR0 The new CR0 value.
955 */
956#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
957DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
958#else
959DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
960{
961# if RT_INLINE_ASM_USES_INTRIN
962 __writecr0(uCR0);
963
964# elif RT_INLINE_ASM_GNU_STYLE
965# ifdef RT_ARCH_AMD64
966 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
967# else
968 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
969# endif
970# else
971 __asm
972 {
973# ifdef RT_ARCH_AMD64
974 mov rax, [uCR0]
975 mov cr0, rax
976# else
977 mov eax, [uCR0]
978 mov cr0, eax
979# endif
980 }
981# endif
982}
983#endif
984
985
986/**
987 * Get cr2.
988 * @returns cr2.
989 */
990#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
991DECLASM(RTCCUINTREG) ASMGetCR2(void);
992#else
993DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
994{
995 RTCCUINTREG uCR2;
996# if RT_INLINE_ASM_USES_INTRIN
997 uCR2 = __readcr2();
998
999# elif RT_INLINE_ASM_GNU_STYLE
1000# ifdef RT_ARCH_AMD64
1001 __asm__ ("movq %%cr2, %0\t\n" : "=r" (uCR2));
1002# else
1003 __asm__ ("movl %%cr2, %0\t\n" : "=r" (uCR2));
1004# endif
1005# else
1006 __asm
1007 {
1008# ifdef RT_ARCH_AMD64
1009 mov rax, cr2
1010 mov [uCR2], rax
1011# else
1012 mov eax, cr2
1013 mov [uCR2], eax
1014# endif
1015 }
1016# endif
1017 return uCR2;
1018}
1019#endif
1020
1021
1022/**
1023 * Sets the CR2 register.
1024 * @param uCR2 The new CR0 value.
1025 */
1026#if RT_INLINE_ASM_EXTERNAL
1027DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1028#else
1029DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1030{
1031# if RT_INLINE_ASM_GNU_STYLE
1032# ifdef RT_ARCH_AMD64
1033 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1034# else
1035 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1036# endif
1037# else
1038 __asm
1039 {
1040# ifdef RT_ARCH_AMD64
1041 mov rax, [uCR2]
1042 mov cr2, rax
1043# else
1044 mov eax, [uCR2]
1045 mov cr2, eax
1046# endif
1047 }
1048# endif
1049}
1050#endif
1051
1052
1053/**
1054 * Get cr3.
1055 * @returns cr3.
1056 */
1057#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1058DECLASM(RTCCUINTREG) ASMGetCR3(void);
1059#else
1060DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1061{
1062 RTCCUINTREG uCR3;
1063# if RT_INLINE_ASM_USES_INTRIN
1064 uCR3 = __readcr3();
1065
1066# elif RT_INLINE_ASM_GNU_STYLE
1067# ifdef RT_ARCH_AMD64
1068 __asm__ ("movq %%cr3, %0\t\n" : "=r" (uCR3));
1069# else
1070 __asm__ ("movl %%cr3, %0\t\n" : "=r" (uCR3));
1071# endif
1072# else
1073 __asm
1074 {
1075# ifdef RT_ARCH_AMD64
1076 mov rax, cr3
1077 mov [uCR3], rax
1078# else
1079 mov eax, cr3
1080 mov [uCR3], eax
1081# endif
1082 }
1083# endif
1084 return uCR3;
1085}
1086#endif
1087
1088
1089/**
1090 * Sets the CR3 register.
1091 *
1092 * @param uCR3 New CR3 value.
1093 */
1094#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1095DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1096#else
1097DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1098{
1099# if RT_INLINE_ASM_USES_INTRIN
1100 __writecr3(uCR3);
1101
1102# elif RT_INLINE_ASM_GNU_STYLE
1103# ifdef RT_ARCH_AMD64
1104 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1105# else
1106 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1107# endif
1108# else
1109 __asm
1110 {
1111# ifdef RT_ARCH_AMD64
1112 mov rax, [uCR3]
1113 mov cr3, rax
1114# else
1115 mov eax, [uCR3]
1116 mov cr3, eax
1117# endif
1118 }
1119# endif
1120}
1121#endif
1122
1123
1124/**
1125 * Reloads the CR3 register.
1126 */
1127#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1128DECLASM(void) ASMReloadCR3(void);
1129#else
1130DECLINLINE(void) ASMReloadCR3(void)
1131{
1132# if RT_INLINE_ASM_USES_INTRIN
1133 __writecr3(__readcr3());
1134
1135# elif RT_INLINE_ASM_GNU_STYLE
1136 RTCCUINTREG u;
1137# ifdef RT_ARCH_AMD64
1138 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1139 "movq %0, %%cr3\n\t"
1140 : "=r" (u));
1141# else
1142 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1143 "movl %0, %%cr3\n\t"
1144 : "=r" (u));
1145# endif
1146# else
1147 __asm
1148 {
1149# ifdef RT_ARCH_AMD64
1150 mov rax, cr3
1151 mov cr3, rax
1152# else
1153 mov eax, cr3
1154 mov cr3, eax
1155# endif
1156 }
1157# endif
1158}
1159#endif
1160
1161
1162/**
1163 * Get cr4.
1164 * @returns cr4.
1165 */
1166#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1167DECLASM(RTCCUINTREG) ASMGetCR4(void);
1168#else
1169DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1170{
1171 RTCCUINTREG uCR4;
1172# if RT_INLINE_ASM_USES_INTRIN
1173 uCR4 = __readcr4();
1174
1175# elif RT_INLINE_ASM_GNU_STYLE
1176# ifdef RT_ARCH_AMD64
1177 __asm__ ("movq %%cr4, %0\t\n" : "=r" (uCR4));
1178# else
1179 __asm__ ("movl %%cr4, %0\t\n" : "=r" (uCR4));
1180# endif
1181# else
1182 __asm
1183 {
1184# ifdef RT_ARCH_AMD64
1185 mov rax, cr4
1186 mov [uCR4], rax
1187# else
1188 push eax /* just in case */
1189 /*mov eax, cr4*/
1190 _emit 0x0f
1191 _emit 0x20
1192 _emit 0xe0
1193 mov [uCR4], eax
1194 pop eax
1195# endif
1196 }
1197# endif
1198 return uCR4;
1199}
1200#endif
1201
1202
1203/**
1204 * Sets the CR4 register.
1205 *
1206 * @param uCR4 New CR4 value.
1207 */
1208#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1209DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1210#else
1211DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1212{
1213# if RT_INLINE_ASM_USES_INTRIN
1214 __writecr4(uCR4);
1215
1216# elif RT_INLINE_ASM_GNU_STYLE
1217# ifdef RT_ARCH_AMD64
1218 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1219# else
1220 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1221# endif
1222# else
1223 __asm
1224 {
1225# ifdef RT_ARCH_AMD64
1226 mov rax, [uCR4]
1227 mov cr4, rax
1228# else
1229 mov eax, [uCR4]
1230 _emit 0x0F
1231 _emit 0x22
1232 _emit 0xE0 /* mov cr4, eax */
1233# endif
1234 }
1235# endif
1236}
1237#endif
1238
1239
1240/**
1241 * Get cr8.
1242 * @returns cr8.
1243 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1244 */
1245#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1246DECLASM(RTCCUINTREG) ASMGetCR8(void);
1247#else
1248DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1249{
1250# ifdef RT_ARCH_AMD64
1251 RTCCUINTREG uCR8;
1252# if RT_INLINE_ASM_USES_INTRIN
1253 uCR8 = __readcr8();
1254
1255# elif RT_INLINE_ASM_GNU_STYLE
1256 __asm__ ("movq %%cr8, %0\t\n" : "=r" (uCR8));
1257# else
1258 __asm
1259 {
1260 mov rax, cr8
1261 mov [uCR8], rax
1262 }
1263# endif
1264 return uCR8;
1265# else /* !RT_ARCH_AMD64 */
1266 return 0;
1267# endif /* !RT_ARCH_AMD64 */
1268}
1269#endif
1270
1271
1272/**
1273 * Enables interrupts (EFLAGS.IF).
1274 */
1275#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1276DECLASM(void) ASMIntEnable(void);
1277#else
1278DECLINLINE(void) ASMIntEnable(void)
1279{
1280# if RT_INLINE_ASM_GNU_STYLE
1281 __asm("sti\n");
1282# elif RT_INLINE_ASM_USES_INTRIN
1283 _enable();
1284# else
1285 __asm sti
1286# endif
1287}
1288#endif
1289
1290
1291/**
1292 * Disables interrupts (!EFLAGS.IF).
1293 */
1294#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1295DECLASM(void) ASMIntDisable(void);
1296#else
1297DECLINLINE(void) ASMIntDisable(void)
1298{
1299# if RT_INLINE_ASM_GNU_STYLE
1300 __asm("cli\n");
1301# elif RT_INLINE_ASM_USES_INTRIN
1302 _disable();
1303# else
1304 __asm cli
1305# endif
1306}
1307#endif
1308
1309
1310/**
1311 * Disables interrupts and returns previous xFLAGS.
1312 */
1313#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1314DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1315#else
1316DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1317{
1318 RTCCUINTREG xFlags;
1319# if RT_INLINE_ASM_GNU_STYLE
1320# ifdef RT_ARCH_AMD64
1321 __asm__ __volatile__("pushfq\n\t"
1322 "cli\n\t"
1323 "popq %0\n\t"
1324 : "=rm" (xFlags));
1325# else
1326 __asm__ __volatile__("pushfl\n\t"
1327 "cli\n\t"
1328 "popl %0\n\t"
1329 : "=rm" (xFlags));
1330# endif
1331# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1332 xFlags = ASMGetFlags();
1333 _disable();
1334# else
1335 __asm {
1336 pushfd
1337 cli
1338 pop [xFlags]
1339 }
1340# endif
1341 return xFlags;
1342}
1343#endif
1344
1345
1346/**
1347 * Reads a machine specific register.
1348 *
1349 * @returns Register content.
1350 * @param uRegister Register to read.
1351 */
1352#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1353DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1354#else
1355DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1356{
1357 RTUINT64U u;
1358# if RT_INLINE_ASM_GNU_STYLE
1359 __asm__ ("rdmsr\n\t"
1360 : "=a" (u.s.Lo),
1361 "=d" (u.s.Hi)
1362 : "c" (uRegister));
1363
1364# elif RT_INLINE_ASM_USES_INTRIN
1365 u.u = __readmsr(uRegister);
1366
1367# else
1368 __asm
1369 {
1370 mov ecx, [uRegister]
1371 rdmsr
1372 mov [u.s.Lo], eax
1373 mov [u.s.Hi], edx
1374 }
1375# endif
1376
1377 return u.u;
1378}
1379#endif
1380
1381
1382/**
1383 * Writes a machine specific register.
1384 *
1385 * @returns Register content.
1386 * @param uRegister Register to write to.
1387 * @param u64Val Value to write.
1388 */
1389#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1390DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1391#else
1392DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1393{
1394 RTUINT64U u;
1395
1396 u.u = u64Val;
1397# if RT_INLINE_ASM_GNU_STYLE
1398 __asm__ __volatile__("wrmsr\n\t"
1399 ::"a" (u.s.Lo),
1400 "d" (u.s.Hi),
1401 "c" (uRegister));
1402
1403# elif RT_INLINE_ASM_USES_INTRIN
1404 __writemsr(uRegister, u.u);
1405
1406# else
1407 __asm
1408 {
1409 mov ecx, [uRegister]
1410 mov edx, [u.s.Hi]
1411 mov eax, [u.s.Lo]
1412 wrmsr
1413 }
1414# endif
1415}
1416#endif
1417
1418
1419/**
1420 * Reads low part of a machine specific register.
1421 *
1422 * @returns Register content.
1423 * @param uRegister Register to read.
1424 */
1425#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1426DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1427#else
1428DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1429{
1430 uint32_t u32;
1431# if RT_INLINE_ASM_GNU_STYLE
1432 __asm__ ("rdmsr\n\t"
1433 : "=a" (u32)
1434 : "c" (uRegister)
1435 : "edx");
1436
1437# elif RT_INLINE_ASM_USES_INTRIN
1438 u32 = (uint32_t)__readmsr(uRegister);
1439
1440#else
1441 __asm
1442 {
1443 mov ecx, [uRegister]
1444 rdmsr
1445 mov [u32], eax
1446 }
1447# endif
1448
1449 return u32;
1450}
1451#endif
1452
1453
1454/**
1455 * Reads high part of a machine specific register.
1456 *
1457 * @returns Register content.
1458 * @param uRegister Register to read.
1459 */
1460#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1461DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1462#else
1463DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1464{
1465 uint32_t u32;
1466# if RT_INLINE_ASM_GNU_STYLE
1467 __asm__ ("rdmsr\n\t"
1468 : "=d" (u32)
1469 : "c" (uRegister)
1470 : "eax");
1471
1472# elif RT_INLINE_ASM_USES_INTRIN
1473 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1474
1475# else
1476 __asm
1477 {
1478 mov ecx, [uRegister]
1479 rdmsr
1480 mov [u32], edx
1481 }
1482# endif
1483
1484 return u32;
1485}
1486#endif
1487
1488
1489/**
1490 * Gets dr7.
1491 *
1492 * @returns dr7.
1493 */
1494#if RT_INLINE_ASM_EXTERNAL
1495DECLASM(RTCCUINTREG) ASMGetDR7(void);
1496#else
1497DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1498{
1499 RTCCUINTREG uDR7;
1500# if RT_INLINE_ASM_GNU_STYLE
1501# ifdef RT_ARCH_AMD64
1502 __asm__ ("movq %%dr7, %0\n\t" : "=r" (uDR7));
1503# else
1504 __asm__ ("movl %%dr7, %0\n\t" : "=r" (uDR7));
1505# endif
1506# else
1507 __asm
1508 {
1509# ifdef RT_ARCH_AMD64
1510 mov rax, dr7
1511 mov [uDR7], rax
1512# else
1513 mov eax, dr7
1514 mov [uDR7], eax
1515# endif
1516 }
1517# endif
1518 return uDR7;
1519}
1520#endif
1521
1522
1523/**
1524 * Gets dr6.
1525 *
1526 * @returns dr6.
1527 */
1528#if RT_INLINE_ASM_EXTERNAL
1529DECLASM(RTCCUINTREG) ASMGetDR6(void);
1530#else
1531DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1532{
1533 RTCCUINTREG uDR6;
1534# if RT_INLINE_ASM_GNU_STYLE
1535# ifdef RT_ARCH_AMD64
1536 __asm__ ("movq %%dr6, %0\n\t" : "=r" (uDR6));
1537# else
1538 __asm__ ("movl %%dr6, %0\n\t" : "=r" (uDR6));
1539# endif
1540# else
1541 __asm
1542 {
1543# ifdef RT_ARCH_AMD64
1544 mov rax, dr6
1545 mov [uDR6], rax
1546# else
1547 mov eax, dr6
1548 mov [uDR6], eax
1549# endif
1550 }
1551# endif
1552 return uDR6;
1553}
1554#endif
1555
1556
1557/**
1558 * Reads and clears DR6.
1559 *
1560 * @returns DR6.
1561 */
1562#if RT_INLINE_ASM_EXTERNAL
1563DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1564#else
1565DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1566{
1567 RTCCUINTREG uDR6;
1568# if RT_INLINE_ASM_GNU_STYLE
1569 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1570# ifdef RT_ARCH_AMD64
1571 __asm__ ("movq %%dr6, %0\n\t"
1572 "movq %1, %%dr6\n\t"
1573 : "=r" (uDR6)
1574 : "r" (uNewValue));
1575# else
1576 __asm__ ("movl %%dr6, %0\n\t"
1577 "movl %1, %%dr6\n\t"
1578 : "=r" (uDR6)
1579 : "r" (uNewValue));
1580# endif
1581# else
1582 __asm
1583 {
1584# ifdef RT_ARCH_AMD64
1585 mov rax, dr6
1586 mov [uDR6], rax
1587 mov rcx, rax
1588 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1589 mov dr6, rcx
1590# else
1591 mov eax, dr6
1592 mov [uDR6], eax
1593 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1594 mov dr6, ecx
1595# endif
1596 }
1597# endif
1598 return uDR6;
1599}
1600#endif
1601
1602
1603/**
1604 * Compiler memory barrier.
1605 *
1606 * Ensure that the compiler does not use any cached (register/tmp stack) memory
1607 * values or any outstanding writes when returning from this function.
1608 *
1609 * This function must be used if non-volatile data is modified by a
1610 * device or the VMM. Typical cases are port access, MMIO access,
1611 * trapping instruction, etc.
1612 */
1613#if RT_INLINE_ASM_GNU_STYLE
1614# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
1615#elif RT_INLINE_ASM_USES_INTRIN
1616# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
1617#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
1618DECLINLINE(void) ASMCompilerBarrier(void)
1619{
1620 __asm
1621 {
1622 }
1623}
1624#endif
1625
1626
1627/**
1628 * Writes a 8-bit unsigned integer to an I/O port, ordered.
1629 *
1630 * @param Port I/O port to read from.
1631 * @param u8 8-bit integer to write.
1632 */
1633#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1634DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1635#else
1636DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1637{
1638# if RT_INLINE_ASM_GNU_STYLE
1639 __asm__ __volatile__("outb %b1, %w0\n\t"
1640 :: "Nd" (Port),
1641 "a" (u8));
1642
1643# elif RT_INLINE_ASM_USES_INTRIN
1644 __outbyte(Port, u8);
1645
1646# else
1647 __asm
1648 {
1649 mov dx, [Port]
1650 mov al, [u8]
1651 out dx, al
1652 }
1653# endif
1654}
1655#endif
1656
1657
1658/**
1659 * Gets a 8-bit unsigned integer from an I/O port, ordered.
1660 *
1661 * @returns 8-bit integer.
1662 * @param Port I/O port to read from.
1663 */
1664#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1665DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1666#else
1667DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1668{
1669 uint8_t u8;
1670# if RT_INLINE_ASM_GNU_STYLE
1671 __asm__ __volatile__("inb %w1, %b0\n\t"
1672 : "=a" (u8)
1673 : "Nd" (Port));
1674
1675# elif RT_INLINE_ASM_USES_INTRIN
1676 u8 = __inbyte(Port);
1677
1678# else
1679 __asm
1680 {
1681 mov dx, [Port]
1682 in al, dx
1683 mov [u8], al
1684 }
1685# endif
1686 return u8;
1687}
1688#endif
1689
1690
1691/**
1692 * Writes a 16-bit unsigned integer to an I/O port, ordered.
1693 *
1694 * @param Port I/O port to read from.
1695 * @param u16 16-bit integer to write.
1696 */
1697#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1698DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1699#else
1700DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1701{
1702# if RT_INLINE_ASM_GNU_STYLE
1703 __asm__ __volatile__("outw %w1, %w0\n\t"
1704 :: "Nd" (Port),
1705 "a" (u16));
1706
1707# elif RT_INLINE_ASM_USES_INTRIN
1708 __outword(Port, u16);
1709
1710# else
1711 __asm
1712 {
1713 mov dx, [Port]
1714 mov ax, [u16]
1715 out dx, ax
1716 }
1717# endif
1718}
1719#endif
1720
1721
1722/**
1723 * Gets a 16-bit unsigned integer from an I/O port, ordered.
1724 *
1725 * @returns 16-bit integer.
1726 * @param Port I/O port to read from.
1727 */
1728#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1729DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1730#else
1731DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1732{
1733 uint16_t u16;
1734# if RT_INLINE_ASM_GNU_STYLE
1735 __asm__ __volatile__("inw %w1, %w0\n\t"
1736 : "=a" (u16)
1737 : "Nd" (Port));
1738
1739# elif RT_INLINE_ASM_USES_INTRIN
1740 u16 = __inword(Port);
1741
1742# else
1743 __asm
1744 {
1745 mov dx, [Port]
1746 in ax, dx
1747 mov [u16], ax
1748 }
1749# endif
1750 return u16;
1751}
1752#endif
1753
1754
1755/**
1756 * Writes a 32-bit unsigned integer to an I/O port, ordered.
1757 *
1758 * @param Port I/O port to read from.
1759 * @param u32 32-bit integer to write.
1760 */
1761#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1762DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1763#else
1764DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1765{
1766# if RT_INLINE_ASM_GNU_STYLE
1767 __asm__ __volatile__("outl %1, %w0\n\t"
1768 :: "Nd" (Port),
1769 "a" (u32));
1770
1771# elif RT_INLINE_ASM_USES_INTRIN
1772 __outdword(Port, u32);
1773
1774# else
1775 __asm
1776 {
1777 mov dx, [Port]
1778 mov eax, [u32]
1779 out dx, eax
1780 }
1781# endif
1782}
1783#endif
1784
1785
1786/**
1787 * Gets a 32-bit unsigned integer from an I/O port, ordered.
1788 *
1789 * @returns 32-bit integer.
1790 * @param Port I/O port to read from.
1791 */
1792#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1793DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1794#else
1795DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1796{
1797 uint32_t u32;
1798# if RT_INLINE_ASM_GNU_STYLE
1799 __asm__ __volatile__("inl %w1, %0\n\t"
1800 : "=a" (u32)
1801 : "Nd" (Port));
1802
1803# elif RT_INLINE_ASM_USES_INTRIN
1804 u32 = __indword(Port);
1805
1806# else
1807 __asm
1808 {
1809 mov dx, [Port]
1810 in eax, dx
1811 mov [u32], eax
1812 }
1813# endif
1814 return u32;
1815}
1816#endif
1817
1818/** @todo string i/o */
1819
1820
1821/**
1822 * Atomically Exchange an unsigned 8-bit value, ordered.
1823 *
1824 * @returns Current *pu8 value
1825 * @param pu8 Pointer to the 8-bit variable to update.
1826 * @param u8 The 8-bit value to assign to *pu8.
1827 */
1828#if RT_INLINE_ASM_EXTERNAL
1829DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1830#else
1831DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1832{
1833# if RT_INLINE_ASM_GNU_STYLE
1834 __asm__ __volatile__("xchgb %0, %1\n\t"
1835 : "=m" (*pu8),
1836 "=r" (u8)
1837 : "1" (u8));
1838# else
1839 __asm
1840 {
1841# ifdef RT_ARCH_AMD64
1842 mov rdx, [pu8]
1843 mov al, [u8]
1844 xchg [rdx], al
1845 mov [u8], al
1846# else
1847 mov edx, [pu8]
1848 mov al, [u8]
1849 xchg [edx], al
1850 mov [u8], al
1851# endif
1852 }
1853# endif
1854 return u8;
1855}
1856#endif
1857
1858
1859/**
1860 * Atomically Exchange a signed 8-bit value, ordered.
1861 *
1862 * @returns Current *pu8 value
1863 * @param pi8 Pointer to the 8-bit variable to update.
1864 * @param i8 The 8-bit value to assign to *pi8.
1865 */
1866DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1867{
1868 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1869}
1870
1871
1872/**
1873 * Atomically Exchange a bool value, ordered.
1874 *
1875 * @returns Current *pf value
1876 * @param pf Pointer to the 8-bit variable to update.
1877 * @param f The 8-bit value to assign to *pi8.
1878 */
1879DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
1880{
1881#ifdef _MSC_VER
1882 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1883#else
1884 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1885#endif
1886}
1887
1888
1889/**
1890 * Atomically Exchange an unsigned 16-bit value, ordered.
1891 *
1892 * @returns Current *pu16 value
1893 * @param pu16 Pointer to the 16-bit variable to update.
1894 * @param u16 The 16-bit value to assign to *pu16.
1895 */
1896#if RT_INLINE_ASM_EXTERNAL
1897DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
1898#else
1899DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
1900{
1901# if RT_INLINE_ASM_GNU_STYLE
1902 __asm__ __volatile__("xchgw %0, %1\n\t"
1903 : "=m" (*pu16),
1904 "=r" (u16)
1905 : "1" (u16));
1906# else
1907 __asm
1908 {
1909# ifdef RT_ARCH_AMD64
1910 mov rdx, [pu16]
1911 mov ax, [u16]
1912 xchg [rdx], ax
1913 mov [u16], ax
1914# else
1915 mov edx, [pu16]
1916 mov ax, [u16]
1917 xchg [edx], ax
1918 mov [u16], ax
1919# endif
1920 }
1921# endif
1922 return u16;
1923}
1924#endif
1925
1926
1927/**
1928 * Atomically Exchange a signed 16-bit value, ordered.
1929 *
1930 * @returns Current *pu16 value
1931 * @param pi16 Pointer to the 16-bit variable to update.
1932 * @param i16 The 16-bit value to assign to *pi16.
1933 */
1934DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
1935{
1936 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
1937}
1938
1939
1940/**
1941 * Atomically Exchange an unsigned 32-bit value, ordered.
1942 *
1943 * @returns Current *pu32 value
1944 * @param pu32 Pointer to the 32-bit variable to update.
1945 * @param u32 The 32-bit value to assign to *pu32.
1946 */
1947#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1948DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
1949#else
1950DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
1951{
1952# if RT_INLINE_ASM_GNU_STYLE
1953 __asm__ __volatile__("xchgl %0, %1\n\t"
1954 : "=m" (*pu32),
1955 "=r" (u32)
1956 : "1" (u32));
1957
1958# elif RT_INLINE_ASM_USES_INTRIN
1959 u32 = _InterlockedExchange((long *)pu32, u32);
1960
1961# else
1962 __asm
1963 {
1964# ifdef RT_ARCH_AMD64
1965 mov rdx, [pu32]
1966 mov eax, u32
1967 xchg [rdx], eax
1968 mov [u32], eax
1969# else
1970 mov edx, [pu32]
1971 mov eax, u32
1972 xchg [edx], eax
1973 mov [u32], eax
1974# endif
1975 }
1976# endif
1977 return u32;
1978}
1979#endif
1980
1981
1982/**
1983 * Atomically Exchange a signed 32-bit value, ordered.
1984 *
1985 * @returns Current *pu32 value
1986 * @param pi32 Pointer to the 32-bit variable to update.
1987 * @param i32 The 32-bit value to assign to *pi32.
1988 */
1989DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
1990{
1991 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
1992}
1993
1994
1995/**
1996 * Atomically Exchange an unsigned 64-bit value, ordered.
1997 *
1998 * @returns Current *pu64 value
1999 * @param pu64 Pointer to the 64-bit variable to update.
2000 * @param u64 The 64-bit value to assign to *pu64.
2001 */
2002#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2003DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2004#else
2005DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2006{
2007# if defined(RT_ARCH_AMD64)
2008# if RT_INLINE_ASM_USES_INTRIN
2009 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2010
2011# elif RT_INLINE_ASM_GNU_STYLE
2012 __asm__ __volatile__("xchgq %0, %1\n\t"
2013 : "=m" (*pu64),
2014 "=r" (u64)
2015 : "1" (u64));
2016# else
2017 __asm
2018 {
2019 mov rdx, [pu64]
2020 mov rax, [u64]
2021 xchg [rdx], rax
2022 mov [u64], rax
2023 }
2024# endif
2025# else /* !RT_ARCH_AMD64 */
2026# if RT_INLINE_ASM_GNU_STYLE
2027# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2028 uint32_t u32 = (uint32_t)u64;
2029 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2030 "xchgl %%ebx, %3\n\t"
2031 "1:\n\t"
2032 "lock; cmpxchg8b (%5)\n\t"
2033 "jnz 1b\n\t"
2034 "xchgl %%ebx, %3\n\t"
2035 /*"xchgl %%esi, %5\n\t"*/
2036 : "=A" (u64),
2037 "=m" (*pu64)
2038 : "0" (*pu64),
2039 "m" ( u32 ),
2040 "c" ( (uint32_t)(u64 >> 32) ),
2041 "S" (pu64) );
2042# else /* !PIC */
2043 __asm__ __volatile__("1:\n\t"
2044 "lock; cmpxchg8b %1\n\t"
2045 "jnz 1b\n\t"
2046 : "=A" (u64),
2047 "=m" (*pu64)
2048 : "0" (*pu64),
2049 "b" ( (uint32_t)u64 ),
2050 "c" ( (uint32_t)(u64 >> 32) ));
2051# endif
2052# else
2053 __asm
2054 {
2055 mov ebx, dword ptr [u64]
2056 mov ecx, dword ptr [u64 + 4]
2057 mov edi, pu64
2058 mov eax, dword ptr [edi]
2059 mov edx, dword ptr [edi + 4]
2060 retry:
2061 lock cmpxchg8b [edi]
2062 jnz retry
2063 mov dword ptr [u64], eax
2064 mov dword ptr [u64 + 4], edx
2065 }
2066# endif
2067# endif /* !RT_ARCH_AMD64 */
2068 return u64;
2069}
2070#endif
2071
2072
2073/**
2074 * Atomically Exchange an signed 64-bit value, ordered.
2075 *
2076 * @returns Current *pi64 value
2077 * @param pi64 Pointer to the 64-bit variable to update.
2078 * @param i64 The 64-bit value to assign to *pi64.
2079 */
2080DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2081{
2082 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2083}
2084
2085
2086#ifdef RT_ARCH_AMD64
2087/**
2088 * Atomically Exchange an unsigned 128-bit value, ordered.
2089 *
2090 * @returns Current *pu128.
2091 * @param pu128 Pointer to the 128-bit variable to update.
2092 * @param u128 The 128-bit value to assign to *pu128.
2093 *
2094 * @remark We cannot really assume that any hardware supports this. Nor do I have
2095 * GAS support for it. So, for the time being we'll BREAK the atomic
2096 * bit of this function and use two 64-bit exchanges instead.
2097 */
2098# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2099DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2100# else
2101DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2102{
2103 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2104 {
2105 /** @todo this is clumsy code */
2106 RTUINT128U u128Ret;
2107 u128Ret.u = u128;
2108 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2109 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2110 return u128Ret.u;
2111 }
2112#if 0 /* later? */
2113 else
2114 {
2115# if RT_INLINE_ASM_GNU_STYLE
2116 __asm__ __volatile__("1:\n\t"
2117 "lock; cmpxchg8b %1\n\t"
2118 "jnz 1b\n\t"
2119 : "=A" (u128),
2120 "=m" (*pu128)
2121 : "0" (*pu128),
2122 "b" ( (uint64_t)u128 ),
2123 "c" ( (uint64_t)(u128 >> 64) ));
2124# else
2125 __asm
2126 {
2127 mov rbx, dword ptr [u128]
2128 mov rcx, dword ptr [u128 + 8]
2129 mov rdi, pu128
2130 mov rax, dword ptr [rdi]
2131 mov rdx, dword ptr [rdi + 8]
2132 retry:
2133 lock cmpxchg16b [rdi]
2134 jnz retry
2135 mov dword ptr [u128], rax
2136 mov dword ptr [u128 + 8], rdx
2137 }
2138# endif
2139 }
2140 return u128;
2141#endif
2142}
2143# endif
2144#endif /* RT_ARCH_AMD64 */
2145
2146
2147/**
2148 * Atomically Exchange a value which size might differ
2149 * between platforms or compilers, ordered.
2150 *
2151 * @param pu Pointer to the variable to update.
2152 * @param uNew The value to assign to *pu.
2153 */
2154#define ASMAtomicXchgSize(pu, uNew) \
2155 do { \
2156 switch (sizeof(*(pu))) { \
2157 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2158 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2159 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2160 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2161 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2162 } \
2163 } while (0)
2164
2165
2166/**
2167 * Atomically Exchange a pointer value, ordered.
2168 *
2169 * @returns Current *ppv value
2170 * @param ppv Pointer to the pointer variable to update.
2171 * @param pv The pointer value to assign to *ppv.
2172 */
2173DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2174{
2175#if ARCH_BITS == 32
2176 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2177#elif ARCH_BITS == 64
2178 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2179#else
2180# error "ARCH_BITS is bogus"
2181#endif
2182}
2183
2184
2185/**
2186 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
2187 *
2188 * @returns true if xchg was done.
2189 * @returns false if xchg wasn't done.
2190 *
2191 * @param pu32 Pointer to the value to update.
2192 * @param u32New The new value to assigned to *pu32.
2193 * @param u32Old The old value to *pu32 compare with.
2194 */
2195#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2196DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2197#else
2198DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old)
2199{
2200# if RT_INLINE_ASM_GNU_STYLE
2201 uint8_t u8Ret;
2202 __asm__ __volatile__("lock; cmpxchgl %2, %0\n\t"
2203 "setz %1\n\t"
2204 : "=m" (*pu32),
2205 "=qm" (u8Ret)
2206 : "r" (u32New),
2207 "a" (u32Old));
2208 return (bool)u8Ret;
2209
2210# elif RT_INLINE_ASM_USES_INTRIN
2211 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2212
2213# else
2214 uint32_t u32Ret;
2215 __asm
2216 {
2217# ifdef RT_ARCH_AMD64
2218 mov rdx, [pu32]
2219# else
2220 mov edx, [pu32]
2221# endif
2222 mov eax, [u32Old]
2223 mov ecx, [u32New]
2224# ifdef RT_ARCH_AMD64
2225 lock cmpxchg [rdx], ecx
2226# else
2227 lock cmpxchg [edx], ecx
2228# endif
2229 setz al
2230 movzx eax, al
2231 mov [u32Ret], eax
2232 }
2233 return !!u32Ret;
2234# endif
2235}
2236#endif
2237
2238
2239/**
2240 * Atomically Compare and Exchange a signed 32-bit value, ordered.
2241 *
2242 * @returns true if xchg was done.
2243 * @returns false if xchg wasn't done.
2244 *
2245 * @param pi32 Pointer to the value to update.
2246 * @param i32New The new value to assigned to *pi32.
2247 * @param i32Old The old value to *pi32 compare with.
2248 */
2249DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2250{
2251 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2252}
2253
2254
2255/**
2256 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
2257 *
2258 * @returns true if xchg was done.
2259 * @returns false if xchg wasn't done.
2260 *
2261 * @param pu64 Pointer to the 64-bit variable to update.
2262 * @param u64New The 64-bit value to assign to *pu64.
2263 * @param u64Old The value to compare with.
2264 */
2265#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2266DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2267#else
2268DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old)
2269{
2270# if RT_INLINE_ASM_USES_INTRIN
2271 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2272
2273# elif defined(RT_ARCH_AMD64)
2274# if RT_INLINE_ASM_GNU_STYLE
2275 uint8_t u8Ret;
2276 __asm__ __volatile__("lock; cmpxchgq %2, %0\n\t"
2277 "setz %1\n\t"
2278 : "=m" (*pu64),
2279 "=qm" (u8Ret)
2280 : "r" (u64New),
2281 "a" (u64Old));
2282 return (bool)u8Ret;
2283# else
2284 bool fRet;
2285 __asm
2286 {
2287 mov rdx, [pu32]
2288 mov rax, [u64Old]
2289 mov rcx, [u64New]
2290 lock cmpxchg [rdx], rcx
2291 setz al
2292 mov [fRet], al
2293 }
2294 return fRet;
2295# endif
2296# else /* !RT_ARCH_AMD64 */
2297 uint32_t u32Ret;
2298# if RT_INLINE_ASM_GNU_STYLE
2299# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2300 uint32_t u32 = (uint32_t)u64New;
2301 uint32_t u32Spill;
2302 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
2303 "lock; cmpxchg8b (%6)\n\t"
2304 "setz %%al\n\t"
2305 "xchgl %%ebx, %4\n\t"
2306 "movzbl %%al, %%eax\n\t"
2307 : "=a" (u32Ret),
2308 "=d" (u32Spill),
2309 "=m" (*pu64)
2310 : "A" (u64Old),
2311 "m" ( u32 ),
2312 "c" ( (uint32_t)(u64New >> 32) ),
2313 "S" (pu64) );
2314# else /* !PIC */
2315 uint32_t u32Spill;
2316 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
2317 "setz %%al\n\t"
2318 "movzbl %%al, %%eax\n\t"
2319 : "=a" (u32Ret),
2320 "=d" (u32Spill),
2321 "=m" (*pu64)
2322 : "A" (u64Old),
2323 "b" ( (uint32_t)u64New ),
2324 "c" ( (uint32_t)(u64New >> 32) ));
2325# endif
2326 return (bool)u32Ret;
2327# else
2328 __asm
2329 {
2330 mov ebx, dword ptr [u64New]
2331 mov ecx, dword ptr [u64New + 4]
2332 mov edi, [pu64]
2333 mov eax, dword ptr [u64Old]
2334 mov edx, dword ptr [u64Old + 4]
2335 lock cmpxchg8b [edi]
2336 setz al
2337 movzx eax, al
2338 mov dword ptr [u32Ret], eax
2339 }
2340 return !!u32Ret;
2341# endif
2342# endif /* !RT_ARCH_AMD64 */
2343}
2344#endif
2345
2346
2347/**
2348 * Atomically Compare and exchange a signed 64-bit value, ordered.
2349 *
2350 * @returns true if xchg was done.
2351 * @returns false if xchg wasn't done.
2352 *
2353 * @param pi64 Pointer to the 64-bit variable to update.
2354 * @param i64 The 64-bit value to assign to *pu64.
2355 * @param i64Old The value to compare with.
2356 */
2357DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2358{
2359 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2360}
2361
2362
2363/** @def ASMAtomicCmpXchgSize
2364 * Atomically Compare and Exchange a value which size might differ
2365 * between platforms or compilers, ordered.
2366 *
2367 * @param pu Pointer to the value to update.
2368 * @param uNew The new value to assigned to *pu.
2369 * @param uOld The old value to *pu compare with.
2370 * @param fRc Where to store the result.
2371 */
2372#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2373 do { \
2374 switch (sizeof(*(pu))) { \
2375 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2376 break; \
2377 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2378 break; \
2379 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2380 (fRc) = false; \
2381 break; \
2382 } \
2383 } while (0)
2384
2385
2386/**
2387 * Atomically Compare and Exchange a pointer value, ordered.
2388 *
2389 * @returns true if xchg was done.
2390 * @returns false if xchg wasn't done.
2391 *
2392 * @param ppv Pointer to the value to update.
2393 * @param pvNew The new value to assigned to *ppv.
2394 * @param pvOld The old value to *ppv compare with.
2395 */
2396DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2397{
2398#if ARCH_BITS == 32
2399 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2400#elif ARCH_BITS == 64
2401 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2402#else
2403# error "ARCH_BITS is bogus"
2404#endif
2405}
2406
2407
2408/**
2409 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
2410 * passes back old value, ordered.
2411 *
2412 * @returns true if xchg was done.
2413 * @returns false if xchg wasn't done.
2414 *
2415 * @param pu32 Pointer to the value to update.
2416 * @param u32New The new value to assigned to *pu32.
2417 * @param u32Old The old value to *pu32 compare with.
2418 * @param pu32Old Pointer store the old value at.
2419 */
2420#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2421DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
2422#else
2423DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
2424{
2425# if RT_INLINE_ASM_GNU_STYLE
2426 uint8_t u8Ret;
2427 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2428 "setz %1\n\t"
2429 : "=m" (*pu32),
2430 "=qm" (u8Ret),
2431 "=a" (*pu32Old)
2432 : "r" (u32New),
2433 "a" (u32Old));
2434 return (bool)u8Ret;
2435
2436# elif RT_INLINE_ASM_USES_INTRIN
2437 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
2438
2439# else
2440 uint32_t u32Ret;
2441 __asm
2442 {
2443# ifdef RT_ARCH_AMD64
2444 mov rdx, [pu32]
2445# else
2446 mov edx, [pu32]
2447# endif
2448 mov eax, [u32Old]
2449 mov ecx, [u32New]
2450# ifdef RT_ARCH_AMD64
2451 lock cmpxchg [rdx], ecx
2452 mov rdx, [pu32Old]
2453 mov [rdx], eax
2454# else
2455 lock cmpxchg [edx], ecx
2456 mov edx, [pu32Old]
2457 mov [edx], eax
2458# endif
2459 setz al
2460 movzx eax, al
2461 mov [u32Ret], eax
2462 }
2463 return !!u32Ret;
2464# endif
2465}
2466#endif
2467
2468
2469/**
2470 * Atomically Compare and Exchange a signed 32-bit value, additionally
2471 * passes back old value, ordered.
2472 *
2473 * @returns true if xchg was done.
2474 * @returns false if xchg wasn't done.
2475 *
2476 * @param pi32 Pointer to the value to update.
2477 * @param i32New The new value to assigned to *pi32.
2478 * @param i32Old The old value to *pi32 compare with.
2479 * @param pi32Old Pointer store the old value at.
2480 */
2481DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
2482{
2483 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
2484}
2485
2486
2487/**
2488 * Atomically Compare and exchange an unsigned 64-bit value, additionally
2489 * passing back old value, ordered.
2490 *
2491 * @returns true if xchg was done.
2492 * @returns false if xchg wasn't done.
2493 *
2494 * @param pu64 Pointer to the 64-bit variable to update.
2495 * @param u64New The 64-bit value to assign to *pu64.
2496 * @param u64Old The value to compare with.
2497 * @param pu32Old Pointer store the old value at.
2498 */
2499#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2500DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
2501#else
2502DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
2503{
2504# if RT_INLINE_ASM_USES_INTRIN
2505 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
2506
2507# elif defined(RT_ARCH_AMD64)
2508# if RT_INLINE_ASM_GNU_STYLE
2509 uint8_t u8Ret;
2510 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2511 "setz %1\n\t"
2512 : "=m" (*pu64),
2513 "=qm" (u8Ret),
2514 "=a" (*pu64Old)
2515 : "r" (u64New),
2516 "a" (u64Old));
2517 return (bool)u8Ret;
2518# else
2519 bool fRet;
2520 __asm
2521 {
2522 mov rdx, [pu32]
2523 mov rax, [u64Old]
2524 mov rcx, [u64New]
2525 lock cmpxchg [rdx], rcx
2526 mov rdx, [pu64Old]
2527 mov [rdx], rax
2528 setz al
2529 mov [fRet], al
2530 }
2531 return fRet;
2532# endif
2533# else /* !RT_ARCH_AMD64 */
2534# if RT_INLINE_ASM_GNU_STYLE
2535 uint64_t u64Ret;
2536# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2537 /* NB: this code uses a memory clobber description, because the clean
2538 * solution with an output value for *pu64 makes gcc run out of registers.
2539 * This will cause suboptimal code, and anyone with a better solution is
2540 * welcome to improve this. */
2541 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
2542 "lock; cmpxchg8b %3\n\t"
2543 "xchgl %%ebx, %1\n\t"
2544 : "=A" (u64Ret)
2545 : "DS" ((uint32_t)u64New),
2546 "c" ((uint32_t)(u64New >> 32)),
2547 "m" (*pu64),
2548 "0" (u64Old)
2549 : "memory" );
2550# else /* !PIC */
2551 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
2552 : "=A" (u64Ret),
2553 "=m" (*pu64)
2554 : "b" ((uint32_t)u64New),
2555 "c" ((uint32_t)(u64New >> 32)),
2556 "m" (*pu64),
2557 "0" (u64Old));
2558# endif
2559 *pu64Old = u64Ret;
2560 return u64Ret == u64Old;
2561# else
2562 uint32_t u32Ret;
2563 __asm
2564 {
2565 mov ebx, dword ptr [u64New]
2566 mov ecx, dword ptr [u64New + 4]
2567 mov edi, [pu64]
2568 mov eax, dword ptr [u64Old]
2569 mov edx, dword ptr [u64Old + 4]
2570 lock cmpxchg8b [edi]
2571 mov ebx, [pu64Old]
2572 mov [ebx], eax
2573 setz al
2574 movzx eax, al
2575 add ebx, 4
2576 mov [ebx], edx
2577 mov dword ptr [u32Ret], eax
2578 }
2579 return !!u32Ret;
2580# endif
2581# endif /* !RT_ARCH_AMD64 */
2582}
2583#endif
2584
2585
2586/**
2587 * Atomically Compare and exchange a signed 64-bit value, additionally
2588 * passing back old value, ordered.
2589 *
2590 * @returns true if xchg was done.
2591 * @returns false if xchg wasn't done.
2592 *
2593 * @param pi64 Pointer to the 64-bit variable to update.
2594 * @param i64 The 64-bit value to assign to *pu64.
2595 * @param i64Old The value to compare with.
2596 * @param pi64Old Pointer store the old value at.
2597 */
2598DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
2599{
2600 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
2601}
2602
2603
2604/** @def ASMAtomicCmpXchgExSize
2605 * Atomically Compare and Exchange a value which size might differ
2606 * between platforms or compilers. Additionally passes back old value.
2607 *
2608 * @param pu Pointer to the value to update.
2609 * @param uNew The new value to assigned to *pu.
2610 * @param uOld The old value to *pu compare with.
2611 * @param fRc Where to store the result.
2612 * @param uOldVal Where to store the old value.
2613 */
2614#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, uOldVal) \
2615 do { \
2616 switch (sizeof(*(pu))) { \
2617 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)&(uOldVal)); \
2618 break; \
2619 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)&(uOldVal)); \
2620 break; \
2621 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2622 (fRc) = false; \
2623 (uOldVal) = 0; \
2624 break; \
2625 } \
2626 } while (0)
2627
2628
2629/**
2630 * Atomically Compare and Exchange a pointer value, additionally
2631 * passing back old value, ordered.
2632 *
2633 * @returns true if xchg was done.
2634 * @returns false if xchg wasn't done.
2635 *
2636 * @param ppv Pointer to the value to update.
2637 * @param pvNew The new value to assigned to *ppv.
2638 * @param pvOld The old value to *ppv compare with.
2639 * @param ppvOld Pointer store the old value at.
2640 */
2641DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, void *pvNew, void *pvOld, void **ppvOld)
2642{
2643#if ARCH_BITS == 32
2644 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
2645#elif ARCH_BITS == 64
2646 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
2647#else
2648# error "ARCH_BITS is bogus"
2649#endif
2650}
2651
2652
2653/**
2654 * Atomically increment a 32-bit value, ordered.
2655 *
2656 * @returns The new value.
2657 * @param pu32 Pointer to the value to increment.
2658 */
2659#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2660DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2661#else
2662DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2663{
2664 uint32_t u32;
2665# if RT_INLINE_ASM_USES_INTRIN
2666 u32 = _InterlockedIncrement((long *)pu32);
2667 return u32;
2668
2669# elif RT_INLINE_ASM_GNU_STYLE
2670 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2671 : "=r" (u32),
2672 "=m" (*pu32)
2673 : "0" (1)
2674 : "memory");
2675 return u32+1;
2676# else
2677 __asm
2678 {
2679 mov eax, 1
2680# ifdef RT_ARCH_AMD64
2681 mov rdx, [pu32]
2682 lock xadd [rdx], eax
2683# else
2684 mov edx, [pu32]
2685 lock xadd [edx], eax
2686# endif
2687 mov u32, eax
2688 }
2689 return u32+1;
2690# endif
2691}
2692#endif
2693
2694
2695/**
2696 * Atomically increment a signed 32-bit value, ordered.
2697 *
2698 * @returns The new value.
2699 * @param pi32 Pointer to the value to increment.
2700 */
2701DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2702{
2703 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2704}
2705
2706
2707/**
2708 * Atomically decrement an unsigned 32-bit value, ordered.
2709 *
2710 * @returns The new value.
2711 * @param pu32 Pointer to the value to decrement.
2712 */
2713#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2714DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2715#else
2716DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2717{
2718 uint32_t u32;
2719# if RT_INLINE_ASM_USES_INTRIN
2720 u32 = _InterlockedDecrement((long *)pu32);
2721 return u32;
2722
2723# elif RT_INLINE_ASM_GNU_STYLE
2724 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2725 : "=r" (u32),
2726 "=m" (*pu32)
2727 : "0" (-1)
2728 : "memory");
2729 return u32-1;
2730# else
2731 __asm
2732 {
2733 mov eax, -1
2734# ifdef RT_ARCH_AMD64
2735 mov rdx, [pu32]
2736 lock xadd [rdx], eax
2737# else
2738 mov edx, [pu32]
2739 lock xadd [edx], eax
2740# endif
2741 mov u32, eax
2742 }
2743 return u32-1;
2744# endif
2745}
2746#endif
2747
2748
2749/**
2750 * Atomically decrement a signed 32-bit value, ordered.
2751 *
2752 * @returns The new value.
2753 * @param pi32 Pointer to the value to decrement.
2754 */
2755DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2756{
2757 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2758}
2759
2760
2761/**
2762 * Atomically Or an unsigned 32-bit value, ordered.
2763 *
2764 * @param pu32 Pointer to the pointer variable to OR u32 with.
2765 * @param u32 The value to OR *pu32 with.
2766 */
2767#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2768DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2769#else
2770DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2771{
2772# if RT_INLINE_ASM_USES_INTRIN
2773 _InterlockedOr((long volatile *)pu32, (long)u32);
2774
2775# elif RT_INLINE_ASM_GNU_STYLE
2776 __asm__ __volatile__("lock; orl %1, %0\n\t"
2777 : "=m" (*pu32)
2778 : "ir" (u32));
2779# else
2780 __asm
2781 {
2782 mov eax, [u32]
2783# ifdef RT_ARCH_AMD64
2784 mov rdx, [pu32]
2785 lock or [rdx], eax
2786# else
2787 mov edx, [pu32]
2788 lock or [edx], eax
2789# endif
2790 }
2791# endif
2792}
2793#endif
2794
2795
2796/**
2797 * Atomically Or a signed 32-bit value, ordered.
2798 *
2799 * @param pi32 Pointer to the pointer variable to OR u32 with.
2800 * @param i32 The value to OR *pu32 with.
2801 */
2802DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2803{
2804 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2805}
2806
2807
2808/**
2809 * Atomically And an unsigned 32-bit value, ordered.
2810 *
2811 * @param pu32 Pointer to the pointer variable to AND u32 with.
2812 * @param u32 The value to AND *pu32 with.
2813 */
2814#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2815DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
2816#else
2817DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
2818{
2819# if RT_INLINE_ASM_USES_INTRIN
2820 _InterlockedAnd((long volatile *)pu32, u32);
2821
2822# elif RT_INLINE_ASM_GNU_STYLE
2823 __asm__ __volatile__("lock; andl %1, %0\n\t"
2824 : "=m" (*pu32)
2825 : "ir" (u32));
2826# else
2827 __asm
2828 {
2829 mov eax, [u32]
2830# ifdef RT_ARCH_AMD64
2831 mov rdx, [pu32]
2832 lock and [rdx], eax
2833# else
2834 mov edx, [pu32]
2835 lock and [edx], eax
2836# endif
2837 }
2838# endif
2839}
2840#endif
2841
2842
2843/**
2844 * Atomically And a signed 32-bit value, ordered.
2845 *
2846 * @param pi32 Pointer to the pointer variable to AND i32 with.
2847 * @param i32 The value to AND *pi32 with.
2848 */
2849DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
2850{
2851 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
2852}
2853
2854
2855/**
2856 * Memory fence, waits for any pending writes and reads to complete.
2857 */
2858DECLINLINE(void) ASMMemoryFence(void)
2859{
2860 /** @todo use mfence? check if all cpus we care for support it. */
2861 uint32_t volatile u32;
2862 ASMAtomicXchgU32(&u32, 0);
2863}
2864
2865
2866/**
2867 * Write fence, waits for any pending writes to complete.
2868 */
2869DECLINLINE(void) ASMWriteFence(void)
2870{
2871 /** @todo use sfence? check if all cpus we care for support it. */
2872 ASMMemoryFence();
2873}
2874
2875
2876/**
2877 * Read fence, waits for any pending reads to complete.
2878 */
2879DECLINLINE(void) ASMReadFence(void)
2880{
2881 /** @todo use lfence? check if all cpus we care for support it. */
2882 ASMMemoryFence();
2883}
2884
2885
2886/**
2887 * Atomically reads an unsigned 8-bit value, ordered.
2888 *
2889 * @returns Current *pu8 value
2890 * @param pu8 Pointer to the 8-bit variable to read.
2891 */
2892DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
2893{
2894 ASMMemoryFence();
2895 return *pu8; /* byte reads are atomic on x86 */
2896}
2897
2898
2899/**
2900 * Atomically reads an unsigned 8-bit value, unordered.
2901 *
2902 * @returns Current *pu8 value
2903 * @param pu8 Pointer to the 8-bit variable to read.
2904 */
2905DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
2906{
2907 return *pu8; /* byte reads are atomic on x86 */
2908}
2909
2910
2911/**
2912 * Atomically reads a signed 8-bit value, ordered.
2913 *
2914 * @returns Current *pi8 value
2915 * @param pi8 Pointer to the 8-bit variable to read.
2916 */
2917DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
2918{
2919 ASMMemoryFence();
2920 return *pi8; /* byte reads are atomic on x86 */
2921}
2922
2923
2924/**
2925 * Atomically reads a signed 8-bit value, unordered.
2926 *
2927 * @returns Current *pi8 value
2928 * @param pi8 Pointer to the 8-bit variable to read.
2929 */
2930DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
2931{
2932 return *pi8; /* byte reads are atomic on x86 */
2933}
2934
2935
2936/**
2937 * Atomically reads an unsigned 16-bit value, ordered.
2938 *
2939 * @returns Current *pu16 value
2940 * @param pu16 Pointer to the 16-bit variable to read.
2941 */
2942DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
2943{
2944 ASMMemoryFence();
2945 Assert(!((uintptr_t)pu16 & 1));
2946 return *pu16;
2947}
2948
2949
2950/**
2951 * Atomically reads an unsigned 16-bit value, unordered.
2952 *
2953 * @returns Current *pu16 value
2954 * @param pu16 Pointer to the 16-bit variable to read.
2955 */
2956DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
2957{
2958 Assert(!((uintptr_t)pu16 & 1));
2959 return *pu16;
2960}
2961
2962
2963/**
2964 * Atomically reads a signed 16-bit value, ordered.
2965 *
2966 * @returns Current *pi16 value
2967 * @param pi16 Pointer to the 16-bit variable to read.
2968 */
2969DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
2970{
2971 ASMMemoryFence();
2972 Assert(!((uintptr_t)pi16 & 1));
2973 return *pi16;
2974}
2975
2976
2977/**
2978 * Atomically reads a signed 16-bit value, unordered.
2979 *
2980 * @returns Current *pi16 value
2981 * @param pi16 Pointer to the 16-bit variable to read.
2982 */
2983DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
2984{
2985 Assert(!((uintptr_t)pi16 & 1));
2986 return *pi16;
2987}
2988
2989
2990/**
2991 * Atomically reads an unsigned 32-bit value, ordered.
2992 *
2993 * @returns Current *pu32 value
2994 * @param pu32 Pointer to the 32-bit variable to read.
2995 */
2996DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
2997{
2998 ASMMemoryFence();
2999 Assert(!((uintptr_t)pu32 & 3));
3000 return *pu32;
3001}
3002
3003
3004/**
3005 * Atomically reads an unsigned 32-bit value, unordered.
3006 *
3007 * @returns Current *pu32 value
3008 * @param pu32 Pointer to the 32-bit variable to read.
3009 */
3010DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
3011{
3012 Assert(!((uintptr_t)pu32 & 3));
3013 return *pu32;
3014}
3015
3016
3017/**
3018 * Atomically reads a signed 32-bit value, ordered.
3019 *
3020 * @returns Current *pi32 value
3021 * @param pi32 Pointer to the 32-bit variable to read.
3022 */
3023DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
3024{
3025 ASMMemoryFence();
3026 Assert(!((uintptr_t)pi32 & 3));
3027 return *pi32;
3028}
3029
3030
3031/**
3032 * Atomically reads a signed 32-bit value, unordered.
3033 *
3034 * @returns Current *pi32 value
3035 * @param pi32 Pointer to the 32-bit variable to read.
3036 */
3037DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
3038{
3039 Assert(!((uintptr_t)pi32 & 3));
3040 return *pi32;
3041}
3042
3043
3044/**
3045 * Atomically reads an unsigned 64-bit value, ordered.
3046 *
3047 * @returns Current *pu64 value
3048 * @param pu64 Pointer to the 64-bit variable to read.
3049 * The memory pointed to must be writable.
3050 * @remark This will fault if the memory is read-only!
3051 */
3052#if RT_INLINE_ASM_EXTERNAL
3053DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
3054#else
3055DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
3056{
3057 uint64_t u64;
3058# ifdef RT_ARCH_AMD64
3059# if RT_INLINE_ASM_GNU_STYLE
3060 Assert(!((uintptr_t)pu64 & 7));
3061 __asm__ __volatile__( "mfence\n\t"
3062 "movq %1, %0\n\t"
3063 : "=r" (u64)
3064 : "m" (*pu64));
3065# else
3066 __asm
3067 {
3068 mfence
3069 mov rdx, [pu64]
3070 mov rax, [rdx]
3071 mov [u64], rax
3072 }
3073# endif
3074# else /* !RT_ARCH_AMD64 */
3075# if RT_INLINE_ASM_GNU_STYLE
3076# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3077 uint32_t u32EBX = 0;
3078 Assert(!((uintptr_t)pu64 & 7));
3079 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3080 "lock; cmpxchg8b (%5)\n\t"
3081 "xchgl %%ebx, %3\n\t"
3082 : "=A" (u64),
3083 "=m" (*pu64)
3084 : "0" (0),
3085 "m" (u32EBX),
3086 "c" (0),
3087 "S" (pu64));
3088# else /* !PIC */
3089 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3090 : "=A" (u64),
3091 "=m" (*pu64)
3092 : "0" (0),
3093 "b" (0),
3094 "c" (0));
3095# endif
3096# else
3097 Assert(!((uintptr_t)pu64 & 7));
3098 __asm
3099 {
3100 xor eax, eax
3101 xor edx, edx
3102 mov edi, pu64
3103 xor ecx, ecx
3104 xor ebx, ebx
3105 lock cmpxchg8b [edi]
3106 mov dword ptr [u64], eax
3107 mov dword ptr [u64 + 4], edx
3108 }
3109# endif
3110# endif /* !RT_ARCH_AMD64 */
3111 return u64;
3112}
3113#endif
3114
3115
3116/**
3117 * Atomically reads an unsigned 64-bit value, unordered.
3118 *
3119 * @returns Current *pu64 value
3120 * @param pu64 Pointer to the 64-bit variable to read.
3121 * The memory pointed to must be writable.
3122 * @remark This will fault if the memory is read-only!
3123 */
3124#if RT_INLINE_ASM_EXTERNAL
3125DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
3126#else
3127DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
3128{
3129 uint64_t u64;
3130# ifdef RT_ARCH_AMD64
3131# if RT_INLINE_ASM_GNU_STYLE
3132 Assert(!((uintptr_t)pu64 & 7));
3133 __asm__ __volatile__("movq %1, %0\n\t"
3134 : "=r" (u64)
3135 : "m" (*pu64));
3136# else
3137 __asm
3138 {
3139 mov rdx, [pu64]
3140 mov rax, [rdx]
3141 mov [u64], rax
3142 }
3143# endif
3144# else /* !RT_ARCH_AMD64 */
3145# if RT_INLINE_ASM_GNU_STYLE
3146# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3147 uint32_t u32EBX = 0;
3148 Assert(!((uintptr_t)pu64 & 7));
3149 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3150 "lock; cmpxchg8b (%5)\n\t"
3151 "xchgl %%ebx, %3\n\t"
3152 : "=A" (u64),
3153 "=m" (*pu64)
3154 : "0" (0),
3155 "m" (u32EBX),
3156 "c" (0),
3157 "S" (pu64));
3158# else /* !PIC */
3159 __asm__ __volatile__("cmpxchg8b %1\n\t"
3160 : "=A" (u64),
3161 "=m" (*pu64)
3162 : "0" (0),
3163 "b" (0),
3164 "c" (0));
3165# endif
3166# else
3167 Assert(!((uintptr_t)pu64 & 7));
3168 __asm
3169 {
3170 xor eax, eax
3171 xor edx, edx
3172 mov edi, pu64
3173 xor ecx, ecx
3174 xor ebx, ebx
3175 lock cmpxchg8b [edi]
3176 mov dword ptr [u64], eax
3177 mov dword ptr [u64 + 4], edx
3178 }
3179# endif
3180# endif /* !RT_ARCH_AMD64 */
3181 return u64;
3182}
3183#endif
3184
3185
3186/**
3187 * Atomically reads a signed 64-bit value, ordered.
3188 *
3189 * @returns Current *pi64 value
3190 * @param pi64 Pointer to the 64-bit variable to read.
3191 * The memory pointed to must be writable.
3192 * @remark This will fault if the memory is read-only!
3193 */
3194DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
3195{
3196 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
3197}
3198
3199
3200/**
3201 * Atomically reads a signed 64-bit value, unordered.
3202 *
3203 * @returns Current *pi64 value
3204 * @param pi64 Pointer to the 64-bit variable to read.
3205 * The memory pointed to must be writable.
3206 * @remark This will fault if the memory is read-only!
3207 */
3208DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
3209{
3210 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
3211}
3212
3213
3214/**
3215 * Atomically reads a pointer value, ordered.
3216 *
3217 * @returns Current *pv value
3218 * @param ppv Pointer to the pointer variable to read.
3219 */
3220DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
3221{
3222#if ARCH_BITS == 32
3223 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
3224#elif ARCH_BITS == 64
3225 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
3226#else
3227# error "ARCH_BITS is bogus"
3228#endif
3229}
3230
3231
3232/**
3233 * Atomically reads a pointer value, unordered.
3234 *
3235 * @returns Current *pv value
3236 * @param ppv Pointer to the pointer variable to read.
3237 */
3238DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
3239{
3240#if ARCH_BITS == 32
3241 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
3242#elif ARCH_BITS == 64
3243 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
3244#else
3245# error "ARCH_BITS is bogus"
3246#endif
3247}
3248
3249
3250/**
3251 * Atomically reads a boolean value, ordered.
3252 *
3253 * @returns Current *pf value
3254 * @param pf Pointer to the boolean variable to read.
3255 */
3256DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
3257{
3258 ASMMemoryFence();
3259 return *pf; /* byte reads are atomic on x86 */
3260}
3261
3262
3263/**
3264 * Atomically reads a boolean value, unordered.
3265 *
3266 * @returns Current *pf value
3267 * @param pf Pointer to the boolean variable to read.
3268 */
3269DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
3270{
3271 return *pf; /* byte reads are atomic on x86 */
3272}
3273
3274
3275/**
3276 * Atomically read a value which size might differ
3277 * between platforms or compilers, ordered.
3278 *
3279 * @param pu Pointer to the variable to update.
3280 * @param puRes Where to store the result.
3281 */
3282#define ASMAtomicReadSize(pu, puRes) \
3283 do { \
3284 switch (sizeof(*(pu))) { \
3285 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3286 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
3287 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
3288 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
3289 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3290 } \
3291 } while (0)
3292
3293
3294/**
3295 * Atomically read a value which size might differ
3296 * between platforms or compilers, unordered.
3297 *
3298 * @param pu Pointer to the variable to update.
3299 * @param puRes Where to store the result.
3300 */
3301#define ASMAtomicUoReadSize(pu, puRes) \
3302 do { \
3303 switch (sizeof(*(pu))) { \
3304 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3305 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
3306 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
3307 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
3308 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3309 } \
3310 } while (0)
3311
3312
3313/**
3314 * Atomically writes an unsigned 8-bit value, ordered.
3315 *
3316 * @param pu8 Pointer to the 8-bit variable.
3317 * @param u8 The 8-bit value to assign to *pu8.
3318 */
3319DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
3320{
3321 ASMAtomicXchgU8(pu8, u8);
3322}
3323
3324
3325/**
3326 * Atomically writes an unsigned 8-bit value, unordered.
3327 *
3328 * @param pu8 Pointer to the 8-bit variable.
3329 * @param u8 The 8-bit value to assign to *pu8.
3330 */
3331DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
3332{
3333 *pu8 = u8; /* byte writes are atomic on x86 */
3334}
3335
3336
3337/**
3338 * Atomically writes a signed 8-bit value, ordered.
3339 *
3340 * @param pi8 Pointer to the 8-bit variable to read.
3341 * @param u8 The 8-bit value to assign to *pu8.
3342 */
3343DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
3344{
3345 ASMAtomicXchgS8(pi8, i8);
3346}
3347
3348
3349/**
3350 * Atomically writes a signed 8-bit value, unordered.
3351 *
3352 * @param pi8 Pointer to the 8-bit variable to read.
3353 * @param u8 The 8-bit value to assign to *pu8.
3354 */
3355DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
3356{
3357 *pi8 = i8; /* byte writes are atomic on x86 */
3358}
3359
3360
3361/**
3362 * Atomically writes an unsigned 16-bit value, ordered.
3363 *
3364 * @param pu16 Pointer to the 16-bit variable.
3365 * @param u16 The 16-bit value to assign to *pu16.
3366 */
3367DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
3368{
3369 ASMAtomicXchgU16(pu16, u16);
3370}
3371
3372
3373/**
3374 * Atomically writes an unsigned 16-bit value, unordered.
3375 *
3376 * @param pu16 Pointer to the 16-bit variable.
3377 * @param u16 The 16-bit value to assign to *pu16.
3378 */
3379DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
3380{
3381 Assert(!((uintptr_t)pu16 & 1));
3382 *pu16 = u16;
3383}
3384
3385
3386/**
3387 * Atomically writes a signed 16-bit value, ordered.
3388 *
3389 * @param pi16 Pointer to the 16-bit variable to read.
3390 * @param u16 The 16-bit value to assign to *pu16.
3391 */
3392DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
3393{
3394 ASMAtomicXchgS16(pi16, i16);
3395}
3396
3397
3398/**
3399 * Atomically writes a signed 16-bit value, unordered.
3400 *
3401 * @param pi16 Pointer to the 16-bit variable to read.
3402 * @param u16 The 16-bit value to assign to *pu16.
3403 */
3404DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
3405{
3406 Assert(!((uintptr_t)pi16 & 1));
3407 *pi16 = i16;
3408}
3409
3410
3411/**
3412 * Atomically writes an unsigned 32-bit value, ordered.
3413 *
3414 * @param pu32 Pointer to the 32-bit variable.
3415 * @param u32 The 32-bit value to assign to *pu32.
3416 */
3417DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
3418{
3419 ASMAtomicXchgU32(pu32, u32);
3420}
3421
3422
3423/**
3424 * Atomically writes an unsigned 32-bit value, unordered.
3425 *
3426 * @param pu32 Pointer to the 32-bit variable.
3427 * @param u32 The 32-bit value to assign to *pu32.
3428 */
3429DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
3430{
3431 Assert(!((uintptr_t)pu32 & 3));
3432 *pu32 = u32;
3433}
3434
3435
3436/**
3437 * Atomically writes a signed 32-bit value, ordered.
3438 *
3439 * @param pi32 Pointer to the 32-bit variable to read.
3440 * @param u32 The 32-bit value to assign to *pu32.
3441 */
3442DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
3443{
3444 ASMAtomicXchgS32(pi32, i32);
3445}
3446
3447
3448/**
3449 * Atomically writes a signed 32-bit value, unordered.
3450 *
3451 * @param pi32 Pointer to the 32-bit variable to read.
3452 * @param u32 The 32-bit value to assign to *pu32.
3453 */
3454DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
3455{
3456 Assert(!((uintptr_t)pi32 & 3));
3457 *pi32 = i32;
3458}
3459
3460
3461/**
3462 * Atomically writes an unsigned 64-bit value, ordered.
3463 *
3464 * @param pu64 Pointer to the 64-bit variable.
3465 * @param u64 The 64-bit value to assign to *pu64.
3466 */
3467DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
3468{
3469 ASMAtomicXchgU64(pu64, u64);
3470}
3471
3472
3473/**
3474 * Atomically writes an unsigned 64-bit value, unordered.
3475 *
3476 * @param pu64 Pointer to the 64-bit variable.
3477 * @param u64 The 64-bit value to assign to *pu64.
3478 */
3479DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
3480{
3481 Assert(!((uintptr_t)pu64 & 7));
3482#if ARCH_BITS == 64
3483 *pu64 = u64;
3484#else
3485 ASMAtomicXchgU64(pu64, u64);
3486#endif
3487}
3488
3489
3490/**
3491 * Atomically writes a signed 64-bit value, ordered.
3492 *
3493 * @param pi64 Pointer to the 64-bit variable.
3494 * @param u64 The 64-bit value to assign to *pu64.
3495 */
3496DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
3497{
3498 ASMAtomicXchgS64(pi64, i64);
3499}
3500
3501
3502/**
3503 * Atomically writes a signed 64-bit value, unordered.
3504 *
3505 * @param pi64 Pointer to the 64-bit variable.
3506 * @param u64 The 64-bit value to assign to *pu64.
3507 */
3508DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
3509{
3510 Assert(!((uintptr_t)pi64 & 7));
3511#if ARCH_BITS == 64
3512 *pi64 = i64;
3513#else
3514 ASMAtomicXchgS64(pi64, i64);
3515#endif
3516}
3517
3518
3519/**
3520 * Atomically writes a boolean value, unordered.
3521 *
3522 * @param pf Pointer to the boolean variable.
3523 * @param f The boolean value to assign to *pf.
3524 */
3525DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
3526{
3527 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
3528}
3529
3530
3531/**
3532 * Atomically writes a boolean value, unordered.
3533 *
3534 * @param pf Pointer to the boolean variable.
3535 * @param f The boolean value to assign to *pf.
3536 */
3537DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
3538{
3539 *pf = f; /* byte writes are atomic on x86 */
3540}
3541
3542
3543/**
3544 * Atomically writes a pointer value, ordered.
3545 *
3546 * @returns Current *pv value
3547 * @param ppv Pointer to the pointer variable.
3548 * @param pv The pointer value to assigne to *ppv.
3549 */
3550DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, void *pv)
3551{
3552#if ARCH_BITS == 32
3553 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
3554#elif ARCH_BITS == 64
3555 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
3556#else
3557# error "ARCH_BITS is bogus"
3558#endif
3559}
3560
3561
3562/**
3563 * Atomically writes a pointer value, unordered.
3564 *
3565 * @returns Current *pv value
3566 * @param ppv Pointer to the pointer variable.
3567 * @param pv The pointer value to assigne to *ppv.
3568 */
3569DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, void *pv)
3570{
3571#if ARCH_BITS == 32
3572 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
3573#elif ARCH_BITS == 64
3574 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
3575#else
3576# error "ARCH_BITS is bogus"
3577#endif
3578}
3579
3580
3581/**
3582 * Atomically write a value which size might differ
3583 * between platforms or compilers, ordered.
3584 *
3585 * @param pu Pointer to the variable to update.
3586 * @param uNew The value to assign to *pu.
3587 */
3588#define ASMAtomicWriteSize(pu, uNew) \
3589 do { \
3590 switch (sizeof(*(pu))) { \
3591 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
3592 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3593 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3594 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3595 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3596 } \
3597 } while (0)
3598
3599/**
3600 * Atomically write a value which size might differ
3601 * between platforms or compilers, unordered.
3602 *
3603 * @param pu Pointer to the variable to update.
3604 * @param uNew The value to assign to *pu.
3605 */
3606#define ASMAtomicUoWriteSize(pu, uNew) \
3607 do { \
3608 switch (sizeof(*(pu))) { \
3609 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
3610 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3611 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3612 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3613 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3614 } \
3615 } while (0)
3616
3617
3618
3619
3620/**
3621 * Invalidate page.
3622 *
3623 * @param pv Address of the page to invalidate.
3624 */
3625#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3626DECLASM(void) ASMInvalidatePage(void *pv);
3627#else
3628DECLINLINE(void) ASMInvalidatePage(void *pv)
3629{
3630# if RT_INLINE_ASM_USES_INTRIN
3631 __invlpg(pv);
3632
3633# elif RT_INLINE_ASM_GNU_STYLE
3634 __asm__ __volatile__("invlpg %0\n\t"
3635 : : "m" (*(uint8_t *)pv));
3636# else
3637 __asm
3638 {
3639# ifdef RT_ARCH_AMD64
3640 mov rax, [pv]
3641 invlpg [rax]
3642# else
3643 mov eax, [pv]
3644 invlpg [eax]
3645# endif
3646 }
3647# endif
3648}
3649#endif
3650
3651
3652#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3653# if PAGE_SIZE != 0x1000
3654# error "PAGE_SIZE is not 0x1000!"
3655# endif
3656#endif
3657
3658/**
3659 * Zeros a 4K memory page.
3660 *
3661 * @param pv Pointer to the memory block. This must be page aligned.
3662 */
3663#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3664DECLASM(void) ASMMemZeroPage(volatile void *pv);
3665# else
3666DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
3667{
3668# if RT_INLINE_ASM_USES_INTRIN
3669# ifdef RT_ARCH_AMD64
3670 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
3671# else
3672 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
3673# endif
3674
3675# elif RT_INLINE_ASM_GNU_STYLE
3676 RTUINTREG uDummy;
3677# ifdef RT_ARCH_AMD64
3678 __asm__ __volatile__ ("rep stosq"
3679 : "=D" (pv),
3680 "=c" (uDummy)
3681 : "0" (pv),
3682 "c" (0x1000 >> 3),
3683 "a" (0)
3684 : "memory");
3685# else
3686 __asm__ __volatile__ ("rep stosl"
3687 : "=D" (pv),
3688 "=c" (uDummy)
3689 : "0" (pv),
3690 "c" (0x1000 >> 2),
3691 "a" (0)
3692 : "memory");
3693# endif
3694# else
3695 __asm
3696 {
3697# ifdef RT_ARCH_AMD64
3698 xor rax, rax
3699 mov ecx, 0200h
3700 mov rdi, [pv]
3701 rep stosq
3702# else
3703 xor eax, eax
3704 mov ecx, 0400h
3705 mov edi, [pv]
3706 rep stosd
3707# endif
3708 }
3709# endif
3710}
3711# endif
3712
3713
3714/**
3715 * Zeros a memory block with a 32-bit aligned size.
3716 *
3717 * @param pv Pointer to the memory block.
3718 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3719 */
3720#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3721DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3722#else
3723DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3724{
3725# if RT_INLINE_ASM_USES_INTRIN
3726 __stosd((unsigned long *)pv, 0, cb >> 2);
3727
3728# elif RT_INLINE_ASM_GNU_STYLE
3729 __asm__ __volatile__ ("rep stosl"
3730 : "=D" (pv),
3731 "=c" (cb)
3732 : "0" (pv),
3733 "1" (cb >> 2),
3734 "a" (0)
3735 : "memory");
3736# else
3737 __asm
3738 {
3739 xor eax, eax
3740# ifdef RT_ARCH_AMD64
3741 mov rcx, [cb]
3742 shr rcx, 2
3743 mov rdi, [pv]
3744# else
3745 mov ecx, [cb]
3746 shr ecx, 2
3747 mov edi, [pv]
3748# endif
3749 rep stosd
3750 }
3751# endif
3752}
3753#endif
3754
3755
3756/**
3757 * Fills a memory block with a 32-bit aligned size.
3758 *
3759 * @param pv Pointer to the memory block.
3760 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3761 * @param u32 The value to fill with.
3762 */
3763#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3764DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3765#else
3766DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3767{
3768# if RT_INLINE_ASM_USES_INTRIN
3769 __stosd((unsigned long *)pv, 0, cb >> 2);
3770
3771# elif RT_INLINE_ASM_GNU_STYLE
3772 __asm__ __volatile__ ("rep stosl"
3773 : "=D" (pv),
3774 "=c" (cb)
3775 : "0" (pv),
3776 "1" (cb >> 2),
3777 "a" (u32)
3778 : "memory");
3779# else
3780 __asm
3781 {
3782# ifdef RT_ARCH_AMD64
3783 mov rcx, [cb]
3784 shr rcx, 2
3785 mov rdi, [pv]
3786# else
3787 mov ecx, [cb]
3788 shr ecx, 2
3789 mov edi, [pv]
3790# endif
3791 mov eax, [u32]
3792 rep stosd
3793 }
3794# endif
3795}
3796#endif
3797
3798
3799/**
3800 * Checks if a memory block is filled with the specified byte.
3801 *
3802 * This is a sort of inverted memchr.
3803 *
3804 * @returns Pointer to the byte which doesn't equal u8.
3805 * @returns NULL if all equal to u8.
3806 *
3807 * @param pv Pointer to the memory block.
3808 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3809 * @param u8 The value it's supposed to be filled with.
3810 */
3811#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3812DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
3813#else
3814DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
3815{
3816/** @todo rewrite this in inline assembly. */
3817 uint8_t const *pb = (uint8_t const *)pv;
3818 for (; cb; cb--, pb++)
3819 if (RT_UNLIKELY(*pb != u8))
3820 return (void *)pb;
3821 return NULL;
3822}
3823#endif
3824
3825
3826
3827/**
3828 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
3829 *
3830 * @returns u32F1 * u32F2.
3831 */
3832#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3833DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
3834#else
3835DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
3836{
3837# ifdef RT_ARCH_AMD64
3838 return (uint64_t)u32F1 * u32F2;
3839# else /* !RT_ARCH_AMD64 */
3840 uint64_t u64;
3841# if RT_INLINE_ASM_GNU_STYLE
3842 __asm__ __volatile__("mull %%edx"
3843 : "=A" (u64)
3844 : "a" (u32F2), "d" (u32F1));
3845# else
3846 __asm
3847 {
3848 mov edx, [u32F1]
3849 mov eax, [u32F2]
3850 mul edx
3851 mov dword ptr [u64], eax
3852 mov dword ptr [u64 + 4], edx
3853 }
3854# endif
3855 return u64;
3856# endif /* !RT_ARCH_AMD64 */
3857}
3858#endif
3859
3860
3861/**
3862 * Multiplies two signed 32-bit values returning a signed 64-bit result.
3863 *
3864 * @returns u32F1 * u32F2.
3865 */
3866#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3867DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
3868#else
3869DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
3870{
3871# ifdef RT_ARCH_AMD64
3872 return (int64_t)i32F1 * i32F2;
3873# else /* !RT_ARCH_AMD64 */
3874 int64_t i64;
3875# if RT_INLINE_ASM_GNU_STYLE
3876 __asm__ __volatile__("imull %%edx"
3877 : "=A" (i64)
3878 : "a" (i32F2), "d" (i32F1));
3879# else
3880 __asm
3881 {
3882 mov edx, [i32F1]
3883 mov eax, [i32F2]
3884 imul edx
3885 mov dword ptr [i64], eax
3886 mov dword ptr [i64 + 4], edx
3887 }
3888# endif
3889 return i64;
3890# endif /* !RT_ARCH_AMD64 */
3891}
3892#endif
3893
3894
3895/**
3896 * Devides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
3897 *
3898 * @returns u64 / u32.
3899 */
3900#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3901DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
3902#else
3903DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
3904{
3905# ifdef RT_ARCH_AMD64
3906 return (uint32_t)(u64 / u32);
3907# else /* !RT_ARCH_AMD64 */
3908# if RT_INLINE_ASM_GNU_STYLE
3909 RTUINTREG uDummy;
3910 __asm__ __volatile__("divl %3"
3911 : "=a" (u32), "=d"(uDummy)
3912 : "A" (u64), "r" (u32));
3913# else
3914 __asm
3915 {
3916 mov eax, dword ptr [u64]
3917 mov edx, dword ptr [u64 + 4]
3918 mov ecx, [u32]
3919 div ecx
3920 mov [u32], eax
3921 }
3922# endif
3923 return u32;
3924# endif /* !RT_ARCH_AMD64 */
3925}
3926#endif
3927
3928
3929/**
3930 * Devides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
3931 *
3932 * @returns u64 / u32.
3933 */
3934#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3935DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
3936#else
3937DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
3938{
3939# ifdef RT_ARCH_AMD64
3940 return (int32_t)(i64 / i32);
3941# else /* !RT_ARCH_AMD64 */
3942# if RT_INLINE_ASM_GNU_STYLE
3943 RTUINTREG iDummy;
3944 __asm__ __volatile__("idivl %3"
3945 : "=a" (i32), "=d"(iDummy)
3946 : "A" (i64), "r" (i32));
3947# else
3948 __asm
3949 {
3950 mov eax, dword ptr [i64]
3951 mov edx, dword ptr [i64 + 4]
3952 mov ecx, [i32]
3953 idiv ecx
3954 mov [i32], eax
3955 }
3956# endif
3957 return i32;
3958# endif /* !RT_ARCH_AMD64 */
3959}
3960#endif
3961
3962
3963/**
3964 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
3965 * using a 96 bit intermediate result.
3966 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
3967 * __udivdi3 and __umoddi3 even if this inline function is not used.
3968 *
3969 * @returns (u64A * u32B) / u32C.
3970 * @param u64A The 64-bit value.
3971 * @param u32B The 32-bit value to multiple by A.
3972 * @param u32C The 32-bit value to divide A*B by.
3973 */
3974#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
3975DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
3976#else
3977DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
3978{
3979# if RT_INLINE_ASM_GNU_STYLE
3980# ifdef RT_ARCH_AMD64
3981 uint64_t u64Result, u64Spill;
3982 __asm__ __volatile__("mulq %2\n\t"
3983 "divq %3\n\t"
3984 : "=a" (u64Result),
3985 "=d" (u64Spill)
3986 : "r" ((uint64_t)u32B),
3987 "r" ((uint64_t)u32C),
3988 "0" (u64A),
3989 "1" (0));
3990 return u64Result;
3991# else
3992 uint32_t u32Dummy;
3993 uint64_t u64Result;
3994 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
3995 edx = u64Lo.hi = (u64A.lo * u32B).hi */
3996 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
3997 eax = u64A.hi */
3998 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
3999 edx = u32C */
4000 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
4001 edx = u32B */
4002 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
4003 edx = u64Hi.hi = (u64A.hi * u32B).hi */
4004 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
4005 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
4006 "divl %%ecx \n\t" /* eax = u64Hi / u32C
4007 edx = u64Hi % u32C */
4008 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
4009 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
4010 "divl %%ecx \n\t" /* u64Result.lo */
4011 "movl %%edi,%%edx \n\t" /* u64Result.hi */
4012 : "=A"(u64Result), "=c"(u32Dummy),
4013 "=S"(u32Dummy), "=D"(u32Dummy)
4014 : "a"((uint32_t)u64A),
4015 "S"((uint32_t)(u64A >> 32)),
4016 "c"(u32B),
4017 "D"(u32C));
4018 return u64Result;
4019# endif
4020# else
4021 RTUINT64U u;
4022 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
4023 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
4024 u64Hi += (u64Lo >> 32);
4025 u.s.Hi = (uint32_t)(u64Hi / u32C);
4026 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
4027 return u.u;
4028# endif
4029}
4030#endif
4031
4032
4033/**
4034 * Probes a byte pointer for read access.
4035 *
4036 * While the function will not fault if the byte is not read accessible,
4037 * the idea is to do this in a safe place like before acquiring locks
4038 * and such like.
4039 *
4040 * Also, this functions guarantees that an eager compiler is not going
4041 * to optimize the probing away.
4042 *
4043 * @param pvByte Pointer to the byte.
4044 */
4045#if RT_INLINE_ASM_EXTERNAL
4046DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
4047#else
4048DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
4049{
4050 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4051 uint8_t u8;
4052# if RT_INLINE_ASM_GNU_STYLE
4053 __asm__ __volatile__("movb (%1), %0\n\t"
4054 : "=r" (u8)
4055 : "r" (pvByte));
4056# else
4057 __asm
4058 {
4059# ifdef RT_ARCH_AMD64
4060 mov rax, [pvByte]
4061 mov al, [rax]
4062# else
4063 mov eax, [pvByte]
4064 mov al, [eax]
4065# endif
4066 mov [u8], al
4067 }
4068# endif
4069 return u8;
4070}
4071#endif
4072
4073/**
4074 * Probes a buffer for read access page by page.
4075 *
4076 * While the function will fault if the buffer is not fully read
4077 * accessible, the idea is to do this in a safe place like before
4078 * acquiring locks and such like.
4079 *
4080 * Also, this functions guarantees that an eager compiler is not going
4081 * to optimize the probing away.
4082 *
4083 * @param pvBuf Pointer to the buffer.
4084 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4085 */
4086DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
4087{
4088 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4089 /* the first byte */
4090 const uint8_t *pu8 = (const uint8_t *)pvBuf;
4091 ASMProbeReadByte(pu8);
4092
4093 /* the pages in between pages. */
4094 while (cbBuf > /*PAGE_SIZE*/0x1000)
4095 {
4096 ASMProbeReadByte(pu8);
4097 cbBuf -= /*PAGE_SIZE*/0x1000;
4098 pu8 += /*PAGE_SIZE*/0x1000;
4099 }
4100
4101 /* the last byte */
4102 ASMProbeReadByte(pu8 + cbBuf - 1);
4103}
4104
4105
4106/** @def ASMBreakpoint
4107 * Debugger Breakpoint.
4108 * @remark In the gnu world we add a nop instruction after the int3 to
4109 * force gdb to remain at the int3 source line.
4110 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
4111 * @internal
4112 */
4113#if RT_INLINE_ASM_GNU_STYLE
4114# ifndef __L4ENV__
4115# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
4116# else
4117# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
4118# endif
4119#else
4120# define ASMBreakpoint() __debugbreak()
4121#endif
4122
4123
4124
4125/** @defgroup grp_inline_bits Bit Operations
4126 * @{
4127 */
4128
4129
4130/**
4131 * Sets a bit in a bitmap.
4132 *
4133 * @param pvBitmap Pointer to the bitmap.
4134 * @param iBit The bit to set.
4135 */
4136#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4137DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
4138#else
4139DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
4140{
4141# if RT_INLINE_ASM_USES_INTRIN
4142 _bittestandset((long *)pvBitmap, iBit);
4143
4144# elif RT_INLINE_ASM_GNU_STYLE
4145 __asm__ __volatile__ ("btsl %1, %0"
4146 : "=m" (*(volatile long *)pvBitmap)
4147 : "Ir" (iBit)
4148 : "memory");
4149# else
4150 __asm
4151 {
4152# ifdef RT_ARCH_AMD64
4153 mov rax, [pvBitmap]
4154 mov edx, [iBit]
4155 bts [rax], edx
4156# else
4157 mov eax, [pvBitmap]
4158 mov edx, [iBit]
4159 bts [eax], edx
4160# endif
4161 }
4162# endif
4163}
4164#endif
4165
4166
4167/**
4168 * Atomically sets a bit in a bitmap, ordered.
4169 *
4170 * @param pvBitmap Pointer to the bitmap.
4171 * @param iBit The bit to set.
4172 */
4173#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4174DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
4175#else
4176DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
4177{
4178# if RT_INLINE_ASM_USES_INTRIN
4179 _interlockedbittestandset((long *)pvBitmap, iBit);
4180# elif RT_INLINE_ASM_GNU_STYLE
4181 __asm__ __volatile__ ("lock; btsl %1, %0"
4182 : "=m" (*(volatile long *)pvBitmap)
4183 : "Ir" (iBit)
4184 : "memory");
4185# else
4186 __asm
4187 {
4188# ifdef RT_ARCH_AMD64
4189 mov rax, [pvBitmap]
4190 mov edx, [iBit]
4191 lock bts [rax], edx
4192# else
4193 mov eax, [pvBitmap]
4194 mov edx, [iBit]
4195 lock bts [eax], edx
4196# endif
4197 }
4198# endif
4199}
4200#endif
4201
4202
4203/**
4204 * Clears a bit in a bitmap.
4205 *
4206 * @param pvBitmap Pointer to the bitmap.
4207 * @param iBit The bit to clear.
4208 */
4209#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4210DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
4211#else
4212DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
4213{
4214# if RT_INLINE_ASM_USES_INTRIN
4215 _bittestandreset((long *)pvBitmap, iBit);
4216
4217# elif RT_INLINE_ASM_GNU_STYLE
4218 __asm__ __volatile__ ("btrl %1, %0"
4219 : "=m" (*(volatile long *)pvBitmap)
4220 : "Ir" (iBit)
4221 : "memory");
4222# else
4223 __asm
4224 {
4225# ifdef RT_ARCH_AMD64
4226 mov rax, [pvBitmap]
4227 mov edx, [iBit]
4228 btr [rax], edx
4229# else
4230 mov eax, [pvBitmap]
4231 mov edx, [iBit]
4232 btr [eax], edx
4233# endif
4234 }
4235# endif
4236}
4237#endif
4238
4239
4240/**
4241 * Atomically clears a bit in a bitmap, ordered.
4242 *
4243 * @param pvBitmap Pointer to the bitmap.
4244 * @param iBit The bit to toggle set.
4245 * @remark No memory barrier, take care on smp.
4246 */
4247#if RT_INLINE_ASM_EXTERNAL
4248DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
4249#else
4250DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
4251{
4252# if RT_INLINE_ASM_GNU_STYLE
4253 __asm__ __volatile__ ("lock; btrl %1, %0"
4254 : "=m" (*(volatile long *)pvBitmap)
4255 : "Ir" (iBit)
4256 : "memory");
4257# else
4258 __asm
4259 {
4260# ifdef RT_ARCH_AMD64
4261 mov rax, [pvBitmap]
4262 mov edx, [iBit]
4263 lock btr [rax], edx
4264# else
4265 mov eax, [pvBitmap]
4266 mov edx, [iBit]
4267 lock btr [eax], edx
4268# endif
4269 }
4270# endif
4271}
4272#endif
4273
4274
4275/**
4276 * Toggles a bit in a bitmap.
4277 *
4278 * @param pvBitmap Pointer to the bitmap.
4279 * @param iBit The bit to toggle.
4280 */
4281#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4282DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
4283#else
4284DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
4285{
4286# if RT_INLINE_ASM_USES_INTRIN
4287 _bittestandcomplement((long *)pvBitmap, iBit);
4288# elif RT_INLINE_ASM_GNU_STYLE
4289 __asm__ __volatile__ ("btcl %1, %0"
4290 : "=m" (*(volatile long *)pvBitmap)
4291 : "Ir" (iBit)
4292 : "memory");
4293# else
4294 __asm
4295 {
4296# ifdef RT_ARCH_AMD64
4297 mov rax, [pvBitmap]
4298 mov edx, [iBit]
4299 btc [rax], edx
4300# else
4301 mov eax, [pvBitmap]
4302 mov edx, [iBit]
4303 btc [eax], edx
4304# endif
4305 }
4306# endif
4307}
4308#endif
4309
4310
4311/**
4312 * Atomically toggles a bit in a bitmap, ordered.
4313 *
4314 * @param pvBitmap Pointer to the bitmap.
4315 * @param iBit The bit to test and set.
4316 */
4317#if RT_INLINE_ASM_EXTERNAL
4318DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
4319#else
4320DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
4321{
4322# if RT_INLINE_ASM_GNU_STYLE
4323 __asm__ __volatile__ ("lock; btcl %1, %0"
4324 : "=m" (*(volatile long *)pvBitmap)
4325 : "Ir" (iBit)
4326 : "memory");
4327# else
4328 __asm
4329 {
4330# ifdef RT_ARCH_AMD64
4331 mov rax, [pvBitmap]
4332 mov edx, [iBit]
4333 lock btc [rax], edx
4334# else
4335 mov eax, [pvBitmap]
4336 mov edx, [iBit]
4337 lock btc [eax], edx
4338# endif
4339 }
4340# endif
4341}
4342#endif
4343
4344
4345/**
4346 * Tests and sets a bit in a bitmap.
4347 *
4348 * @returns true if the bit was set.
4349 * @returns false if the bit was clear.
4350 * @param pvBitmap Pointer to the bitmap.
4351 * @param iBit The bit to test and set.
4352 */
4353#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4354DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4355#else
4356DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4357{
4358 union { bool f; uint32_t u32; uint8_t u8; } rc;
4359# if RT_INLINE_ASM_USES_INTRIN
4360 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
4361
4362# elif RT_INLINE_ASM_GNU_STYLE
4363 __asm__ __volatile__ ("btsl %2, %1\n\t"
4364 "setc %b0\n\t"
4365 "andl $1, %0\n\t"
4366 : "=q" (rc.u32),
4367 "=m" (*(volatile long *)pvBitmap)
4368 : "Ir" (iBit)
4369 : "memory");
4370# else
4371 __asm
4372 {
4373 mov edx, [iBit]
4374# ifdef RT_ARCH_AMD64
4375 mov rax, [pvBitmap]
4376 bts [rax], edx
4377# else
4378 mov eax, [pvBitmap]
4379 bts [eax], edx
4380# endif
4381 setc al
4382 and eax, 1
4383 mov [rc.u32], eax
4384 }
4385# endif
4386 return rc.f;
4387}
4388#endif
4389
4390
4391/**
4392 * Atomically tests and sets a bit in a bitmap, ordered.
4393 *
4394 * @returns true if the bit was set.
4395 * @returns false if the bit was clear.
4396 * @param pvBitmap Pointer to the bitmap.
4397 * @param iBit The bit to set.
4398 */
4399#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4400DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4401#else
4402DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4403{
4404 union { bool f; uint32_t u32; uint8_t u8; } rc;
4405# if RT_INLINE_ASM_USES_INTRIN
4406 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
4407# elif RT_INLINE_ASM_GNU_STYLE
4408 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
4409 "setc %b0\n\t"
4410 "andl $1, %0\n\t"
4411 : "=q" (rc.u32),
4412 "=m" (*(volatile long *)pvBitmap)
4413 : "Ir" (iBit)
4414 : "memory");
4415# else
4416 __asm
4417 {
4418 mov edx, [iBit]
4419# ifdef RT_ARCH_AMD64
4420 mov rax, [pvBitmap]
4421 lock bts [rax], edx
4422# else
4423 mov eax, [pvBitmap]
4424 lock bts [eax], edx
4425# endif
4426 setc al
4427 and eax, 1
4428 mov [rc.u32], eax
4429 }
4430# endif
4431 return rc.f;
4432}
4433#endif
4434
4435
4436/**
4437 * Tests and clears a bit in a bitmap.
4438 *
4439 * @returns true if the bit was set.
4440 * @returns false if the bit was clear.
4441 * @param pvBitmap Pointer to the bitmap.
4442 * @param iBit The bit to test and clear.
4443 */
4444#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4445DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4446#else
4447DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4448{
4449 union { bool f; uint32_t u32; uint8_t u8; } rc;
4450# if RT_INLINE_ASM_USES_INTRIN
4451 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
4452
4453# elif RT_INLINE_ASM_GNU_STYLE
4454 __asm__ __volatile__ ("btrl %2, %1\n\t"
4455 "setc %b0\n\t"
4456 "andl $1, %0\n\t"
4457 : "=q" (rc.u32),
4458 "=m" (*(volatile long *)pvBitmap)
4459 : "Ir" (iBit)
4460 : "memory");
4461# else
4462 __asm
4463 {
4464 mov edx, [iBit]
4465# ifdef RT_ARCH_AMD64
4466 mov rax, [pvBitmap]
4467 btr [rax], edx
4468# else
4469 mov eax, [pvBitmap]
4470 btr [eax], edx
4471# endif
4472 setc al
4473 and eax, 1
4474 mov [rc.u32], eax
4475 }
4476# endif
4477 return rc.f;
4478}
4479#endif
4480
4481
4482/**
4483 * Atomically tests and clears a bit in a bitmap, ordered.
4484 *
4485 * @returns true if the bit was set.
4486 * @returns false if the bit was clear.
4487 * @param pvBitmap Pointer to the bitmap.
4488 * @param iBit The bit to test and clear.
4489 * @remark No memory barrier, take care on smp.
4490 */
4491#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4492DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4493#else
4494DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4495{
4496 union { bool f; uint32_t u32; uint8_t u8; } rc;
4497# if RT_INLINE_ASM_USES_INTRIN
4498 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
4499
4500# elif RT_INLINE_ASM_GNU_STYLE
4501 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
4502 "setc %b0\n\t"
4503 "andl $1, %0\n\t"
4504 : "=q" (rc.u32),
4505 "=m" (*(volatile long *)pvBitmap)
4506 : "Ir" (iBit)
4507 : "memory");
4508# else
4509 __asm
4510 {
4511 mov edx, [iBit]
4512# ifdef RT_ARCH_AMD64
4513 mov rax, [pvBitmap]
4514 lock btr [rax], edx
4515# else
4516 mov eax, [pvBitmap]
4517 lock btr [eax], edx
4518# endif
4519 setc al
4520 and eax, 1
4521 mov [rc.u32], eax
4522 }
4523# endif
4524 return rc.f;
4525}
4526#endif
4527
4528
4529/**
4530 * Tests and toggles a bit in a bitmap.
4531 *
4532 * @returns true if the bit was set.
4533 * @returns false if the bit was clear.
4534 * @param pvBitmap Pointer to the bitmap.
4535 * @param iBit The bit to test and toggle.
4536 */
4537#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4538DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4539#else
4540DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4541{
4542 union { bool f; uint32_t u32; uint8_t u8; } rc;
4543# if RT_INLINE_ASM_USES_INTRIN
4544 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
4545
4546# elif RT_INLINE_ASM_GNU_STYLE
4547 __asm__ __volatile__ ("btcl %2, %1\n\t"
4548 "setc %b0\n\t"
4549 "andl $1, %0\n\t"
4550 : "=q" (rc.u32),
4551 "=m" (*(volatile long *)pvBitmap)
4552 : "Ir" (iBit)
4553 : "memory");
4554# else
4555 __asm
4556 {
4557 mov edx, [iBit]
4558# ifdef RT_ARCH_AMD64
4559 mov rax, [pvBitmap]
4560 btc [rax], edx
4561# else
4562 mov eax, [pvBitmap]
4563 btc [eax], edx
4564# endif
4565 setc al
4566 and eax, 1
4567 mov [rc.u32], eax
4568 }
4569# endif
4570 return rc.f;
4571}
4572#endif
4573
4574
4575/**
4576 * Atomically tests and toggles a bit in a bitmap, ordered.
4577 *
4578 * @returns true if the bit was set.
4579 * @returns false if the bit was clear.
4580 * @param pvBitmap Pointer to the bitmap.
4581 * @param iBit The bit to test and toggle.
4582 */
4583#if RT_INLINE_ASM_EXTERNAL
4584DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4585#else
4586DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4587{
4588 union { bool f; uint32_t u32; uint8_t u8; } rc;
4589# if RT_INLINE_ASM_GNU_STYLE
4590 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
4591 "setc %b0\n\t"
4592 "andl $1, %0\n\t"
4593 : "=q" (rc.u32),
4594 "=m" (*(volatile long *)pvBitmap)
4595 : "Ir" (iBit)
4596 : "memory");
4597# else
4598 __asm
4599 {
4600 mov edx, [iBit]
4601# ifdef RT_ARCH_AMD64
4602 mov rax, [pvBitmap]
4603 lock btc [rax], edx
4604# else
4605 mov eax, [pvBitmap]
4606 lock btc [eax], edx
4607# endif
4608 setc al
4609 and eax, 1
4610 mov [rc.u32], eax
4611 }
4612# endif
4613 return rc.f;
4614}
4615#endif
4616
4617
4618/**
4619 * Tests if a bit in a bitmap is set.
4620 *
4621 * @returns true if the bit is set.
4622 * @returns false if the bit is clear.
4623 * @param pvBitmap Pointer to the bitmap.
4624 * @param iBit The bit to test.
4625 */
4626#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4627DECLASM(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit);
4628#else
4629DECLINLINE(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit)
4630{
4631 union { bool f; uint32_t u32; uint8_t u8; } rc;
4632# if RT_INLINE_ASM_USES_INTRIN
4633 rc.u32 = _bittest((long *)pvBitmap, iBit);
4634# elif RT_INLINE_ASM_GNU_STYLE
4635
4636 __asm__ __volatile__ ("btl %2, %1\n\t"
4637 "setc %b0\n\t"
4638 "andl $1, %0\n\t"
4639 : "=q" (rc.u32),
4640 "=m" (*(volatile long *)pvBitmap)
4641 : "Ir" (iBit)
4642 : "memory");
4643# else
4644 __asm
4645 {
4646 mov edx, [iBit]
4647# ifdef RT_ARCH_AMD64
4648 mov rax, [pvBitmap]
4649 bt [rax], edx
4650# else
4651 mov eax, [pvBitmap]
4652 bt [eax], edx
4653# endif
4654 setc al
4655 and eax, 1
4656 mov [rc.u32], eax
4657 }
4658# endif
4659 return rc.f;
4660}
4661#endif
4662
4663
4664/**
4665 * Clears a bit range within a bitmap.
4666 *
4667 * @param pvBitmap Pointer to the bitmap.
4668 * @param iBitStart The First bit to clear.
4669 * @param iBitEnd The first bit not to clear.
4670 */
4671DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4672{
4673 if (iBitStart < iBitEnd)
4674 {
4675 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4676 int iStart = iBitStart & ~31;
4677 int iEnd = iBitEnd & ~31;
4678 if (iStart == iEnd)
4679 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
4680 else
4681 {
4682 /* bits in first dword. */
4683 if (iBitStart & 31)
4684 {
4685 *pu32 &= (1 << (iBitStart & 31)) - 1;
4686 pu32++;
4687 iBitStart = iStart + 32;
4688 }
4689
4690 /* whole dword. */
4691 if (iBitStart != iEnd)
4692 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4693
4694 /* bits in last dword. */
4695 if (iBitEnd & 31)
4696 {
4697 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4698 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
4699 }
4700 }
4701 }
4702}
4703
4704
4705/**
4706 * Finds the first clear bit in a bitmap.
4707 *
4708 * @returns Index of the first zero bit.
4709 * @returns -1 if no clear bit was found.
4710 * @param pvBitmap Pointer to the bitmap.
4711 * @param cBits The number of bits in the bitmap. Multiple of 32.
4712 */
4713#if RT_INLINE_ASM_EXTERNAL
4714DECLASM(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits);
4715#else
4716DECLINLINE(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits)
4717{
4718 if (cBits)
4719 {
4720 int32_t iBit;
4721# if RT_INLINE_ASM_GNU_STYLE
4722 RTCCUINTREG uEAX, uECX, uEDI;
4723 cBits = RT_ALIGN_32(cBits, 32);
4724 __asm__ __volatile__("repe; scasl\n\t"
4725 "je 1f\n\t"
4726# ifdef RT_ARCH_AMD64
4727 "lea -4(%%rdi), %%rdi\n\t"
4728 "xorl (%%rdi), %%eax\n\t"
4729 "subq %5, %%rdi\n\t"
4730# else
4731 "lea -4(%%edi), %%edi\n\t"
4732 "xorl (%%edi), %%eax\n\t"
4733 "subl %5, %%edi\n\t"
4734# endif
4735 "shll $3, %%edi\n\t"
4736 "bsfl %%eax, %%edx\n\t"
4737 "addl %%edi, %%edx\n\t"
4738 "1:\t\n"
4739 : "=d" (iBit),
4740 "=&c" (uECX),
4741 "=&D" (uEDI),
4742 "=&a" (uEAX)
4743 : "0" (0xffffffff),
4744 "mr" (pvBitmap),
4745 "1" (cBits >> 5),
4746 "2" (pvBitmap),
4747 "3" (0xffffffff));
4748# else
4749 cBits = RT_ALIGN_32(cBits, 32);
4750 __asm
4751 {
4752# ifdef RT_ARCH_AMD64
4753 mov rdi, [pvBitmap]
4754 mov rbx, rdi
4755# else
4756 mov edi, [pvBitmap]
4757 mov ebx, edi
4758# endif
4759 mov edx, 0ffffffffh
4760 mov eax, edx
4761 mov ecx, [cBits]
4762 shr ecx, 5
4763 repe scasd
4764 je done
4765
4766# ifdef RT_ARCH_AMD64
4767 lea rdi, [rdi - 4]
4768 xor eax, [rdi]
4769 sub rdi, rbx
4770# else
4771 lea edi, [edi - 4]
4772 xor eax, [edi]
4773 sub edi, ebx
4774# endif
4775 shl edi, 3
4776 bsf edx, eax
4777 add edx, edi
4778 done:
4779 mov [iBit], edx
4780 }
4781# endif
4782 return iBit;
4783 }
4784 return -1;
4785}
4786#endif
4787
4788
4789/**
4790 * Finds the next clear bit in a bitmap.
4791 *
4792 * @returns Index of the first zero bit.
4793 * @returns -1 if no clear bit was found.
4794 * @param pvBitmap Pointer to the bitmap.
4795 * @param cBits The number of bits in the bitmap. Multiple of 32.
4796 * @param iBitPrev The bit returned from the last search.
4797 * The search will start at iBitPrev + 1.
4798 */
4799#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4800DECLASM(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4801#else
4802DECLINLINE(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4803{
4804 int iBit = ++iBitPrev & 31;
4805 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
4806 cBits -= iBitPrev & ~31;
4807 if (iBit)
4808 {
4809 /* inspect the first dword. */
4810 uint32_t u32 = (~*(volatile uint32_t *)pvBitmap) >> iBit;
4811# if RT_INLINE_ASM_USES_INTRIN
4812 unsigned long ulBit = 0;
4813 if (_BitScanForward(&ulBit, u32))
4814 return ulBit + iBitPrev;
4815 iBit = -1;
4816# else
4817# if RT_INLINE_ASM_GNU_STYLE
4818 __asm__ __volatile__("bsf %1, %0\n\t"
4819 "jnz 1f\n\t"
4820 "movl $-1, %0\n\t"
4821 "1:\n\t"
4822 : "=r" (iBit)
4823 : "r" (u32));
4824# else
4825 __asm
4826 {
4827 mov edx, [u32]
4828 bsf eax, edx
4829 jnz done
4830 mov eax, 0ffffffffh
4831 done:
4832 mov [iBit], eax
4833 }
4834# endif
4835 if (iBit >= 0)
4836 return iBit + iBitPrev;
4837# endif
4838 /* Search the rest of the bitmap, if there is anything. */
4839 if (cBits > 32)
4840 {
4841 iBit = ASMBitFirstClear((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
4842 if (iBit >= 0)
4843 return iBit + (iBitPrev & ~31) + 32;
4844 }
4845 }
4846 else
4847 {
4848 /* Search the rest of the bitmap. */
4849 iBit = ASMBitFirstClear(pvBitmap, cBits);
4850 if (iBit >= 0)
4851 return iBit + (iBitPrev & ~31);
4852 }
4853 return iBit;
4854}
4855#endif
4856
4857
4858/**
4859 * Finds the first set bit in a bitmap.
4860 *
4861 * @returns Index of the first set bit.
4862 * @returns -1 if no clear bit was found.
4863 * @param pvBitmap Pointer to the bitmap.
4864 * @param cBits The number of bits in the bitmap. Multiple of 32.
4865 */
4866#if RT_INLINE_ASM_EXTERNAL
4867DECLASM(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits);
4868#else
4869DECLINLINE(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits)
4870{
4871 if (cBits)
4872 {
4873 int32_t iBit;
4874# if RT_INLINE_ASM_GNU_STYLE
4875 RTCCUINTREG uEAX, uECX, uEDI;
4876 cBits = RT_ALIGN_32(cBits, 32);
4877 __asm__ __volatile__("repe; scasl\n\t"
4878 "je 1f\n\t"
4879# ifdef RT_ARCH_AMD64
4880 "lea -4(%%rdi), %%rdi\n\t"
4881 "movl (%%rdi), %%eax\n\t"
4882 "subq %5, %%rdi\n\t"
4883# else
4884 "lea -4(%%edi), %%edi\n\t"
4885 "movl (%%edi), %%eax\n\t"
4886 "subl %5, %%edi\n\t"
4887# endif
4888 "shll $3, %%edi\n\t"
4889 "bsfl %%eax, %%edx\n\t"
4890 "addl %%edi, %%edx\n\t"
4891 "1:\t\n"
4892 : "=d" (iBit),
4893 "=&c" (uECX),
4894 "=&D" (uEDI),
4895 "=&a" (uEAX)
4896 : "0" (0xffffffff),
4897 "mr" (pvBitmap),
4898 "1" (cBits >> 5),
4899 "2" (pvBitmap),
4900 "3" (0));
4901# else
4902 cBits = RT_ALIGN_32(cBits, 32);
4903 __asm
4904 {
4905# ifdef RT_ARCH_AMD64
4906 mov rdi, [pvBitmap]
4907 mov rbx, rdi
4908# else
4909 mov edi, [pvBitmap]
4910 mov ebx, edi
4911# endif
4912 mov edx, 0ffffffffh
4913 xor eax, eax
4914 mov ecx, [cBits]
4915 shr ecx, 5
4916 repe scasd
4917 je done
4918# ifdef RT_ARCH_AMD64
4919 lea rdi, [rdi - 4]
4920 mov eax, [rdi]
4921 sub rdi, rbx
4922# else
4923 lea edi, [edi - 4]
4924 mov eax, [edi]
4925 sub edi, ebx
4926# endif
4927 shl edi, 3
4928 bsf edx, eax
4929 add edx, edi
4930 done:
4931 mov [iBit], edx
4932 }
4933# endif
4934 return iBit;
4935 }
4936 return -1;
4937}
4938#endif
4939
4940
4941/**
4942 * Finds the next set bit in a bitmap.
4943 *
4944 * @returns Index of the next set bit.
4945 * @returns -1 if no set bit was found.
4946 * @param pvBitmap Pointer to the bitmap.
4947 * @param cBits The number of bits in the bitmap. Multiple of 32.
4948 * @param iBitPrev The bit returned from the last search.
4949 * The search will start at iBitPrev + 1.
4950 */
4951#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4952DECLASM(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4953#else
4954DECLINLINE(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4955{
4956 int iBit = ++iBitPrev & 31;
4957 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
4958 cBits -= iBitPrev & ~31;
4959 if (iBit)
4960 {
4961 /* inspect the first dword. */
4962 uint32_t u32 = *(volatile uint32_t *)pvBitmap >> iBit;
4963# if RT_INLINE_ASM_USES_INTRIN
4964 unsigned long ulBit = 0;
4965 if (_BitScanForward(&ulBit, u32))
4966 return ulBit + iBitPrev;
4967 iBit = -1;
4968# else
4969# if RT_INLINE_ASM_GNU_STYLE
4970 __asm__ __volatile__("bsf %1, %0\n\t"
4971 "jnz 1f\n\t"
4972 "movl $-1, %0\n\t"
4973 "1:\n\t"
4974 : "=r" (iBit)
4975 : "r" (u32));
4976# else
4977 __asm
4978 {
4979 mov edx, u32
4980 bsf eax, edx
4981 jnz done
4982 mov eax, 0ffffffffh
4983 done:
4984 mov [iBit], eax
4985 }
4986# endif
4987 if (iBit >= 0)
4988 return iBit + iBitPrev;
4989# endif
4990 /* Search the rest of the bitmap, if there is anything. */
4991 if (cBits > 32)
4992 {
4993 iBit = ASMBitFirstSet((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
4994 if (iBit >= 0)
4995 return iBit + (iBitPrev & ~31) + 32;
4996 }
4997
4998 }
4999 else
5000 {
5001 /* Search the rest of the bitmap. */
5002 iBit = ASMBitFirstSet(pvBitmap, cBits);
5003 if (iBit >= 0)
5004 return iBit + (iBitPrev & ~31);
5005 }
5006 return iBit;
5007}
5008#endif
5009
5010
5011/**
5012 * Finds the first bit which is set in the given 32-bit integer.
5013 * Bits are numbered from 1 (least significant) to 32.
5014 *
5015 * @returns index [1..32] of the first set bit.
5016 * @returns 0 if all bits are cleared.
5017 * @param u32 Integer to search for set bits.
5018 * @remark Similar to ffs() in BSD.
5019 */
5020DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5021{
5022# if RT_INLINE_ASM_USES_INTRIN
5023 unsigned long iBit;
5024 if (_BitScanForward(&iBit, u32))
5025 iBit++;
5026 else
5027 iBit = 0;
5028# elif RT_INLINE_ASM_GNU_STYLE
5029 uint32_t iBit;
5030 __asm__ __volatile__("bsf %1, %0\n\t"
5031 "jnz 1f\n\t"
5032 "xorl %0, %0\n\t"
5033 "jmp 2f\n"
5034 "1:\n\t"
5035 "incl %0\n"
5036 "2:\n\t"
5037 : "=r" (iBit)
5038 : "rm" (u32));
5039# else
5040 uint32_t iBit;
5041 _asm
5042 {
5043 bsf eax, [u32]
5044 jnz found
5045 xor eax, eax
5046 jmp done
5047 found:
5048 inc eax
5049 done:
5050 mov [iBit], eax
5051 }
5052# endif
5053 return iBit;
5054}
5055
5056
5057/**
5058 * Finds the first bit which is set in the given 32-bit integer.
5059 * Bits are numbered from 1 (least significant) to 32.
5060 *
5061 * @returns index [1..32] of the first set bit.
5062 * @returns 0 if all bits are cleared.
5063 * @param i32 Integer to search for set bits.
5064 * @remark Similar to ffs() in BSD.
5065 */
5066DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5067{
5068 return ASMBitFirstSetU32((uint32_t)i32);
5069}
5070
5071
5072/**
5073 * Finds the last bit which is set in the given 32-bit integer.
5074 * Bits are numbered from 1 (least significant) to 32.
5075 *
5076 * @returns index [1..32] of the last set bit.
5077 * @returns 0 if all bits are cleared.
5078 * @param u32 Integer to search for set bits.
5079 * @remark Similar to fls() in BSD.
5080 */
5081DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5082{
5083# if RT_INLINE_ASM_USES_INTRIN
5084 unsigned long iBit;
5085 if (_BitScanReverse(&iBit, u32))
5086 iBit++;
5087 else
5088 iBit = 0;
5089# elif RT_INLINE_ASM_GNU_STYLE
5090 uint32_t iBit;
5091 __asm__ __volatile__("bsrl %1, %0\n\t"
5092 "jnz 1f\n\t"
5093 "xorl %0, %0\n\t"
5094 "jmp 2f\n"
5095 "1:\n\t"
5096 "incl %0\n"
5097 "2:\n\t"
5098 : "=r" (iBit)
5099 : "rm" (u32));
5100# else
5101 uint32_t iBit;
5102 _asm
5103 {
5104 bsr eax, [u32]
5105 jnz found
5106 xor eax, eax
5107 jmp done
5108 found:
5109 inc eax
5110 done:
5111 mov [iBit], eax
5112 }
5113# endif
5114 return iBit;
5115}
5116
5117
5118/**
5119 * Finds the last bit which is set in the given 32-bit integer.
5120 * Bits are numbered from 1 (least significant) to 32.
5121 *
5122 * @returns index [1..32] of the last set bit.
5123 * @returns 0 if all bits are cleared.
5124 * @param i32 Integer to search for set bits.
5125 * @remark Similar to fls() in BSD.
5126 */
5127DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5128{
5129 return ASMBitLastSetS32((uint32_t)i32);
5130}
5131
5132
5133/**
5134 * Reverse the byte order of the given 32-bit integer.
5135 * @param u32 Integer
5136 */
5137DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5138{
5139#if RT_INLINE_ASM_USES_INTRIN
5140 u32 = _byteswap_ulong(u32);
5141#elif RT_INLINE_ASM_GNU_STYLE
5142 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5143#else
5144 _asm
5145 {
5146 mov eax, [u32]
5147 bswap eax
5148 mov [u32], eax
5149 }
5150#endif
5151 return u32;
5152}
5153
5154/** @} */
5155
5156
5157/** @} */
5158#endif
5159
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette