VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 628

Last change on this file since 628 was 434, checked in by vboxsync, 18 years ago

fixed imul/idiv for msc

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 92.3 KB
Line 
1/** @file
2 * InnoTek Portable Runtime - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006 InnoTek Systemberatung GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License as published by the Free Software Foundation,
12 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
13 * distribution. VirtualBox OSE is distributed in the hope that it will
14 * be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * If you received this file as part of a commercial VirtualBox
17 * distribution, then only the terms of your commercial VirtualBox
18 * license agreement apply instead of the previous paragraph.
19 */
20
21#ifndef __iprt_asm_h__
22#define __iprt_asm_h__
23
24#include <iprt/cdefs.h>
25#include <iprt/types.h>
26/** @todo #include <iprt/param.h> for PAGE_SIZE. */
27/** @def RT_INLINE_ASM_USES_INTRIN
28 * Defined as 1 if we're using a _MSC_VER 1400.
29 * Otherwise defined as 0.
30 */
31
32#ifdef _MSC_VER
33# if _MSC_VER >= 1400
34# define RT_INLINE_ASM_USES_INTRIN 1
35# include <intrin.h>
36 /* Emit the intrinsics at all optimization levels. */
37# pragma intrinsic(__cpuid)
38# pragma intrinsic(_enable)
39# pragma intrinsic(_disable)
40# pragma intrinsic(__rdtsc)
41# pragma intrinsic(__readmsr)
42# pragma intrinsic(__writemsr)
43# pragma intrinsic(__outbyte)
44# pragma intrinsic(__outword)
45# pragma intrinsic(__outdword)
46# pragma intrinsic(__inbyte)
47# pragma intrinsic(__inword)
48# pragma intrinsic(__indword)
49# pragma intrinsic(__invlpg)
50# pragma intrinsic(__stosd)
51# pragma intrinsic(__stosw)
52# pragma intrinsic(__stosb)
53# pragma intrinsic(__readcr0)
54# pragma intrinsic(__readcr2)
55# pragma intrinsic(__readcr3)
56# pragma intrinsic(__readcr4)
57# pragma intrinsic(__writecr0)
58# pragma intrinsic(__writecr3)
59# pragma intrinsic(__writecr4)
60# pragma intrinsic(_BitScanForward)
61# pragma intrinsic(_BitScanReverse)
62# pragma intrinsic(_bittest)
63# pragma intrinsic(_bittestandset)
64# pragma intrinsic(_bittestandreset)
65# pragma intrinsic(_bittestandcomplement)
66# pragma intrinsic(_interlockedbittestandset)
67# pragma intrinsic(_interlockedbittestandreset)
68# pragma intrinsic(_InterlockedAnd)
69# pragma intrinsic(_InterlockedOr)
70# pragma intrinsic(_InterlockedIncrement)
71# pragma intrinsic(_InterlockedDecrement)
72# pragma intrinsic(_InterlockedExchange)
73# pragma intrinsic(_InterlockedCompareExchange)
74# pragma intrinsic(_InterlockedCompareExchange64)
75# ifdef __AMD64__
76# pragma intrinsic(__stosq)
77# pragma intrinsic(__readcr8)
78# pragma intrinsic(__writecr8)
79# pragma intrinsic(_InterlockedExchange64)
80# endif
81# endif
82#endif
83#ifndef RT_INLINE_ASM_USES_INTRIN
84# define RT_INLINE_ASM_USES_INTRIN 0
85#endif
86
87
88
89/** @defgroup grp_asm ASM - Assembly Routines
90 * @ingroup grp_rt
91 * @{
92 */
93
94/** @def RT_INLINE_ASM_EXTERNAL
95 * Defined as 1 if the compiler does not support inline assembly.
96 * The ASM* functions will then be implemented in an external .asm file.
97 *
98 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
99 * inline assmebly in their AMD64 compiler.
100 */
101#if defined(_MSC_VER) && defined(__AMD64__)
102# define RT_INLINE_ASM_EXTERNAL 1
103#else
104# define RT_INLINE_ASM_EXTERNAL 0
105#endif
106
107/** @def RT_INLINE_ASM_GNU_STYLE
108 * Defined as 1 if the compiler understand GNU style inline assembly.
109 */
110#if defined(_MSC_VER)
111# define RT_INLINE_ASM_GNU_STYLE 0
112#else
113# define RT_INLINE_ASM_GNU_STYLE 1
114#endif
115
116
117/** @todo find a more proper place for this structure? */
118#pragma pack(1)
119/** IDTR */
120typedef struct RTIDTR
121{
122 /** Size of the IDT. */
123 uint16_t cbIdt;
124 /** Address of the IDT. */
125 uintptr_t pIdt;
126} RTIDTR, *PRTIDTR;
127#pragma pack()
128
129#pragma pack(1)
130/** GDTR */
131typedef struct RTGDTR
132{
133 /** Size of the GDT. */
134 uint16_t cbGdt;
135 /** Address of the GDT. */
136 uintptr_t pGdt;
137} RTGDTR, *PRTGDTR;
138#pragma pack()
139
140
141/** @def ASMReturnAddress
142 * Gets the return address of the current (or calling if you like) function or method.
143 */
144#ifdef _MSC_VER
145# ifdef __cplusplus
146extern "C"
147# endif
148void * _ReturnAddress(void);
149# pragma intrinsic(_ReturnAddress)
150# define ASMReturnAddress() _ReturnAddress()
151#elif defined(__GNUC__) || defined(__DOXYGEN__)
152# define ASMReturnAddress() __builtin_return_address(0)
153#else
154# error "Unsupported compiler."
155#endif
156
157
158/**
159 * Gets the content of the IDTR CPU register.
160 * @param pIdtr Where to store the IDTR contents.
161 */
162#if RT_INLINE_ASM_EXTERNAL
163DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
164#else
165DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
166{
167# if RT_INLINE_ASM_GNU_STYLE
168 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
169# else
170 __asm
171 {
172# ifdef __AMD64__
173 mov rax, [pIdtr]
174 sidt [rax]
175# else
176 mov eax, [pIdtr]
177 sidt [eax]
178# endif
179 }
180# endif
181}
182#endif
183
184
185/**
186 * Sets the content of the IDTR CPU register.
187 * @param pIdtr Where to load the IDTR contents from
188 */
189#if RT_INLINE_ASM_EXTERNAL
190DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
191#else
192DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
193{
194# if RT_INLINE_ASM_GNU_STYLE
195 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
196# else
197 __asm
198 {
199# ifdef __AMD64__
200 mov rax, [pIdtr]
201 lidt [rax]
202# else
203 mov eax, [pIdtr]
204 lidt [eax]
205# endif
206 }
207# endif
208}
209#endif
210
211
212/**
213 * Gets the content of the GDTR CPU register.
214 * @param pGdtr Where to store the GDTR contents.
215 */
216#if RT_INLINE_ASM_EXTERNAL
217DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
218#else
219DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
220{
221# if RT_INLINE_ASM_GNU_STYLE
222 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
223# else
224 __asm
225 {
226# ifdef __AMD64__
227 mov rax, [pGdtr]
228 sgdt [rax]
229# else
230 mov eax, [pGdtr]
231 sgdt [eax]
232# endif
233 }
234# endif
235}
236#endif
237
238/**
239 * Get the cs register.
240 * @returns cs.
241 */
242#if RT_INLINE_ASM_EXTERNAL
243DECLASM(RTSEL) ASMGetCS(void);
244#else
245DECLINLINE(RTSEL) ASMGetCS(void)
246{
247 RTSEL SelCS;
248# if RT_INLINE_ASM_GNU_STYLE
249 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
250# else
251 __asm
252 {
253 mov ax, cs
254 mov [SelCS], ax
255 }
256# endif
257 return SelCS;
258}
259#endif
260
261
262/**
263 * Get the DS register.
264 * @returns DS.
265 */
266#if RT_INLINE_ASM_EXTERNAL
267DECLASM(RTSEL) ASMGetDS(void);
268#else
269DECLINLINE(RTSEL) ASMGetDS(void)
270{
271 RTSEL SelDS;
272# if RT_INLINE_ASM_GNU_STYLE
273 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
274# else
275 __asm
276 {
277 mov ax, ds
278 mov [SelDS], ax
279 }
280# endif
281 return SelDS;
282}
283#endif
284
285
286/**
287 * Get the ES register.
288 * @returns ES.
289 */
290#if RT_INLINE_ASM_EXTERNAL
291DECLASM(RTSEL) ASMGetES(void);
292#else
293DECLINLINE(RTSEL) ASMGetES(void)
294{
295 RTSEL SelES;
296# if RT_INLINE_ASM_GNU_STYLE
297 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
298# else
299 __asm
300 {
301 mov ax, es
302 mov [SelES], ax
303 }
304# endif
305 return SelES;
306}
307#endif
308
309
310/**
311 * Get the FS register.
312 * @returns FS.
313 */
314#if RT_INLINE_ASM_EXTERNAL
315DECLASM(RTSEL) ASMGetFS(void);
316#else
317DECLINLINE(RTSEL) ASMGetFS(void)
318{
319 RTSEL SelFS;
320# if RT_INLINE_ASM_GNU_STYLE
321 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
322# else
323 __asm
324 {
325 mov ax, fs
326 mov [SelFS], ax
327 }
328# endif
329 return SelFS;
330}
331# endif
332
333
334/**
335 * Get the GS register.
336 * @returns GS.
337 */
338#if RT_INLINE_ASM_EXTERNAL
339DECLASM(RTSEL) ASMGetGS(void);
340#else
341DECLINLINE(RTSEL) ASMGetGS(void)
342{
343 RTSEL SelGS;
344# if RT_INLINE_ASM_GNU_STYLE
345 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
346# else
347 __asm
348 {
349 mov ax, gs
350 mov [SelGS], ax
351 }
352# endif
353 return SelGS;
354}
355#endif
356
357
358/**
359 * Get the SS register.
360 * @returns SS.
361 */
362#if RT_INLINE_ASM_EXTERNAL
363DECLASM(RTSEL) ASMGetSS(void);
364#else
365DECLINLINE(RTSEL) ASMGetSS(void)
366{
367 RTSEL SelSS;
368# if RT_INLINE_ASM_GNU_STYLE
369 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
370# else
371 __asm
372 {
373 mov ax, ss
374 mov [SelSS], ax
375 }
376# endif
377 return SelSS;
378}
379#endif
380
381
382/**
383 * Get the TR register.
384 * @returns TR.
385 */
386#if RT_INLINE_ASM_EXTERNAL
387DECLASM(RTSEL) ASMGetTR(void);
388#else
389DECLINLINE(RTSEL) ASMGetTR(void)
390{
391 RTSEL SelTR;
392# if RT_INLINE_ASM_GNU_STYLE
393 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
394# else
395 __asm
396 {
397 str ax
398 mov [SelTR], ax
399 }
400# endif
401 return SelTR;
402}
403#endif
404
405
406/**
407 * Get the [RE]FLAGS register.
408 * @returns [RE]FLAGS.
409 */
410#if RT_INLINE_ASM_EXTERNAL
411DECLASM(RTCCUINTREG) ASMGetFlags(void);
412#else
413DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
414{
415 RTCCUINTREG uFlags;
416# if RT_INLINE_ASM_GNU_STYLE
417# ifdef __AMD64__
418 __asm__ __volatile__("pushfq\n\t"
419 "popq %0\n\t"
420 : "=m" (uFlags));
421# else
422 __asm__ __volatile__("pushfl\n\t"
423 "popl %0\n\t"
424 : "=m" (uFlags));
425# endif
426# else
427 __asm
428 {
429# ifdef __AMD64__
430 pushfq
431 pop [uFlags]
432# else
433 pushfd
434 pop [uFlags]
435# endif
436 }
437# endif
438 return uFlags;
439}
440#endif
441
442
443/**
444 * Set the [RE]FLAGS register.
445 * @param uFlags The new [RE]FLAGS value.
446 */
447#if RT_INLINE_ASM_EXTERNAL
448DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
449#else
450DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
451{
452# if RT_INLINE_ASM_GNU_STYLE
453# ifdef __AMD64__
454 __asm__ __volatile__("pushq %0\n\t"
455 "popfq\n\t"
456 : : "m" (uFlags));
457# else
458 __asm__ __volatile__("pushl %0\n\t"
459 "popfl\n\t"
460 : : "m" (uFlags));
461# endif
462# else
463 __asm
464 {
465# ifdef __AMD64__
466 push [uFlags]
467 popfq
468# else
469 push [uFlags]
470 popfd
471# endif
472 }
473# endif
474}
475#endif
476
477
478/**
479 * Gets the content of the CPU timestamp counter register.
480 *
481 * @returns TSC.
482 */
483#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
484DECLASM(uint64_t) ASMReadTSC(void);
485#else
486DECLINLINE(uint64_t) ASMReadTSC(void)
487{
488 RTUINT64U u;
489# if RT_INLINE_ASM_GNU_STYLE
490 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
491# else
492# if RT_INLINE_ASM_USES_INTRIN
493 u.u = __rdtsc();
494# else
495 __asm
496 {
497 rdtsc
498 mov [u.s.Lo], eax
499 mov [u.s.Hi], edx
500 }
501# endif
502# endif
503 return u.u;
504}
505#endif
506
507
508/**
509 * Performs the cpuid instruction returning all registers.
510 *
511 * @param uOperator CPUID operation (eax).
512 * @param pvEAX Where to store eax.
513 * @param pvEBX Where to store ebx.
514 * @param pvECX Where to store ecx.
515 * @param pvEDX Where to store edx.
516 * @remark We're using void pointers to ease the use of special bitfield structures and such.
517 */
518#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
519DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
520#else
521DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
522{
523# if RT_INLINE_ASM_GNU_STYLE
524 __asm__ ("xchgl %%ebx, %1\n\t"
525 "cpuid\n\t"
526 "xchgl %%ebx, %1\n\t"
527 : "=a" (*(uint32_t *)pvEAX),
528 "=r" (*(uint32_t *)pvEBX),
529 "=c" (*(uint32_t *)pvECX),
530 "=d" (*(uint32_t *)pvEDX)
531 : "0" (uOperator));
532
533# elif RT_INLINE_ASM_USES_INTRIN
534 int aInfo[4];
535 __cpuid(aInfo, uOperator);
536 *(uint32_t *)pvEAX = aInfo[0];
537 *(uint32_t *)pvEBX = aInfo[1];
538 *(uint32_t *)pvECX = aInfo[2];
539 *(uint32_t *)pvEDX = aInfo[3];
540
541# else
542 uint32_t uEAX;
543 uint32_t uEBX;
544 uint32_t uECX;
545 uint32_t uEDX;
546 __asm
547 {
548 push ebx
549 mov eax, [uOperator]
550 cpuid
551 mov [uEAX], eax
552 mov [uEBX], ebx
553 mov [uECX], ecx
554 mov [uEDX], edx
555 pop ebx
556 }
557 *(uint32_t *)pvEAX = uEAX;
558 *(uint32_t *)pvEBX = uEBX;
559 *(uint32_t *)pvECX = uECX;
560 *(uint32_t *)pvEDX = uEDX;
561# endif
562}
563#endif
564
565
566/**
567 * Performs the cpuid instruction returning ecx and edx.
568 *
569 * @param uOperator CPUID operation (eax).
570 * @param pvECX Where to store ecx.
571 * @param pvEDX Where to store edx.
572 * @remark We're using void pointers to ease the use of special bitfield structures and such.
573 */
574#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
575DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
576#else
577DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
578{
579 uint32_t uEBX;
580 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
581}
582#endif
583
584
585/**
586 * Performs the cpuid instruction returning edx.
587 *
588 * @param uOperator CPUID operation (eax).
589 * @returns EDX after cpuid operation.
590 */
591#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
592DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
593#else
594DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
595{
596 uint32_t u32EDX;
597# if RT_INLINE_ASM_GNU_STYLE
598# if (defined(PIC) || defined(__DARWIN__)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
599 __asm__ ("push %%ebx\n\t"
600 "cpuid\n\t"
601 "pop %%ebx\n\t"
602 : "=a" (uOperator),
603 "=d" (u32EDX)
604 : "0" (uOperator)
605 : "ecx");
606# else
607 __asm__ ("cpuid"
608 : "=a" (uOperator),
609 "=d" (u32EDX)
610 : "0" (uOperator)
611 : "ebx", "ecx");
612# endif
613
614# elif RT_INLINE_ASM_USES_INTRIN
615 int aInfo[4];
616 __cpuid(aInfo, uOperator);
617 u32EDX = aInfo[3];
618
619# else
620 __asm
621 {
622 push ebx
623 mov eax, [uOperator]
624 cpuid
625 mov [u32EDX], edx
626 pop ebx
627 }
628# endif
629 return u32EDX;
630}
631#endif
632
633
634/**
635 * Performs the cpuid instruction returning ecx.
636 *
637 * @param uOperator CPUID operation (eax).
638 * @returns ECX after cpuid operation.
639 */
640#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
641DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
642#else
643DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
644{
645 uint32_t u32ECX;
646# if RT_INLINE_ASM_GNU_STYLE
647# if (defined(PIC) || defined(__DARWIN__)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
648 __asm__ ("push %%ebx\n\t"
649 "cpuid\n\t"
650 "pop %%ebx\n\t"
651 : "=a" (uOperator),
652 "=c" (u32ECX)
653 : "0" (uOperator)
654 : "edx");
655# else
656 __asm__ ("cpuid"
657 : "=a" (uOperator),
658 "=c" (u32ECX)
659 : "0" (uOperator)
660 : "ebx", "edx");
661
662# endif
663
664# elif RT_INLINE_ASM_USES_INTRIN
665 int aInfo[4];
666 __cpuid(aInfo, uOperator);
667 u32ECX = aInfo[2];
668
669# else
670 __asm
671 {
672 push ebx
673 mov eax, [uOperator]
674 cpuid
675 mov [u32ECX], ecx
676 pop ebx
677 }
678# endif
679 return u32ECX;
680}
681#endif
682
683
684/**
685 * Checks if the current CPU supports CPUID.
686 *
687 * @returns true if CPUID is supported.
688 */
689DECLINLINE(bool) ASMHasCpuId(void)
690{
691#ifdef __AMD64__
692 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
693#else /* !__AMD64__ */
694 bool fRet = false;
695# if RT_INLINE_ASM_GNU_STYLE
696 uint32_t u1;
697 uint32_t u2;
698 __asm__ ("pushf\n\t"
699 "pop %1\n\t"
700 "mov %1, %2\n\t"
701 "xorl $0x200000, %1\n\t"
702 "push %1\n\t"
703 "popf\n\t"
704 "pushf\n\t"
705 "pop %1\n\t"
706 "cmpl %1, %2\n\t"
707 "setne %0\n\t"
708 "push %2\n\t"
709 "popf\n\t"
710 : "=m" (fRet), "=r" (u1), "=r" (u2));
711# else
712 __asm
713 {
714 pushfd
715 pop eax
716 mov ebx, eax
717 xor eax, 0200000h
718 push eax
719 popfd
720 pushfd
721 pop eax
722 cmp eax, ebx
723 setne fRet
724 push ebx
725 popfd
726 }
727# endif
728 return fRet;
729#endif /* !__AMD64__ */
730}
731
732
733/**
734 * Get cr0.
735 * @returns cr0.
736 */
737#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
738DECLASM(RTCCUINTREG) ASMGetCR0(void);
739#else
740DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
741{
742 RTCCUINTREG uCR0;
743# if RT_INLINE_ASM_USES_INTRIN
744 uCR0 = __readcr0();
745
746# elif RT_INLINE_ASM_GNU_STYLE
747# ifdef __AMD64__
748 __asm__ ("movq %%cr0, %0\t\n" : "=r" (uCR0));
749# else
750 __asm__ ("movl %%cr0, %0\t\n" : "=r" (uCR0));
751# endif
752# else
753 __asm
754 {
755# ifdef __AMD64__
756 mov rax, cr0
757 mov [uCR0], rax
758# else
759 mov eax, cr0
760 mov [uCR0], eax
761# endif
762 }
763# endif
764 return uCR0;
765}
766#endif
767
768
769/**
770 * Sets the CR0 register.
771 * @param uCR0 The new CR0 value.
772 */
773#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
774DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
775#else
776DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
777{
778# if RT_INLINE_ASM_USES_INTRIN
779 __writecr0(uCR0);
780
781# elif RT_INLINE_ASM_GNU_STYLE
782# ifdef __AMD64__
783 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
784# else
785 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
786# endif
787# else
788 __asm
789 {
790# ifdef __AMD64__
791 mov rax, [uCR0]
792 mov cr0, rax
793# else
794 mov eax, [uCR0]
795 mov cr0, eax
796# endif
797 }
798# endif
799}
800#endif
801
802
803/**
804 * Get cr2.
805 * @returns cr2.
806 */
807#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
808DECLASM(RTCCUINTREG) ASMGetCR2(void);
809#else
810DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
811{
812 RTCCUINTREG uCR2;
813# if RT_INLINE_ASM_USES_INTRIN
814 uCR2 = __readcr2();
815
816# elif RT_INLINE_ASM_GNU_STYLE
817# ifdef __AMD64__
818 __asm__ ("movq %%cr2, %0\t\n" : "=r" (uCR2));
819# else
820 __asm__ ("movl %%cr2, %0\t\n" : "=r" (uCR2));
821# endif
822# else
823 __asm
824 {
825# ifdef __AMD64__
826 mov rax, cr2
827 mov [uCR2], rax
828# else
829 mov eax, cr2
830 mov [uCR2], eax
831# endif
832 }
833# endif
834 return uCR2;
835}
836#endif
837
838
839/**
840 * Sets the CR2 register.
841 * @param uCR2 The new CR0 value.
842 */
843#if RT_INLINE_ASM_EXTERNAL
844DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
845#else
846DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
847{
848# if RT_INLINE_ASM_GNU_STYLE
849# ifdef __AMD64__
850 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
851# else
852 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
853# endif
854# else
855 __asm
856 {
857# ifdef __AMD64__
858 mov rax, [uCR2]
859 mov cr2, rax
860# else
861 mov eax, [uCR2]
862 mov cr2, eax
863# endif
864 }
865# endif
866}
867#endif
868
869
870/**
871 * Get cr3.
872 * @returns cr3.
873 */
874#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
875DECLASM(RTCCUINTREG) ASMGetCR3(void);
876#else
877DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
878{
879 RTCCUINTREG uCR3;
880# if RT_INLINE_ASM_USES_INTRIN
881 uCR3 = __readcr3();
882
883# elif RT_INLINE_ASM_GNU_STYLE
884# ifdef __AMD64__
885 __asm__ ("movq %%cr3, %0\t\n" : "=r" (uCR3));
886# else
887 __asm__ ("movl %%cr3, %0\t\n" : "=r" (uCR3));
888# endif
889# else
890 __asm
891 {
892# ifdef __AMD64__
893 mov rax, cr3
894 mov [uCR3], rax
895# else
896 mov eax, cr3
897 mov [uCR3], eax
898# endif
899 }
900# endif
901 return uCR3;
902}
903#endif
904
905
906/**
907 * Sets the CR3 register.
908 *
909 * @param uCR3 New CR3 value.
910 */
911#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
912DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
913#else
914DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
915{
916# if RT_INLINE_ASM_USES_INTRIN
917 __writecr3(uCR3);
918
919# elif RT_INLINE_ASM_GNU_STYLE
920# ifdef __AMD64__
921 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
922# else
923 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
924# endif
925# else
926 __asm
927 {
928# ifdef __AMD64__
929 mov rax, [uCR3]
930 mov cr3, rax
931# else
932 mov eax, [uCR3]
933 mov cr3, eax
934# endif
935 }
936# endif
937}
938#endif
939
940
941/**
942 * Reloads the CR3 register.
943 */
944#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
945DECLASM(void) ASMReloadCR3(void);
946#else
947DECLINLINE(void) ASMReloadCR3(void)
948{
949# if RT_INLINE_ASM_USES_INTRIN
950 __writecr3(__readcr3());
951
952# elif RT_INLINE_ASM_GNU_STYLE
953 RTCCUINTREG u;
954# ifdef __AMD64__
955 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
956 "movq %0, %%cr3\n\t"
957 : "=r" (u));
958# else
959 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
960 "movl %0, %%cr3\n\t"
961 : "=r" (u));
962# endif
963# else
964 __asm
965 {
966# ifdef __AMD64__
967 mov rax, cr3
968 mov cr3, rax
969# else
970 mov eax, cr3
971 mov cr3, eax
972# endif
973 }
974# endif
975}
976#endif
977
978
979/**
980 * Get cr4.
981 * @returns cr4.
982 */
983#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
984DECLASM(RTCCUINTREG) ASMGetCR4(void);
985#else
986DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
987{
988 RTCCUINTREG uCR4;
989# if RT_INLINE_ASM_USES_INTRIN
990 uCR4 = __readcr4();
991
992# elif RT_INLINE_ASM_GNU_STYLE
993# ifdef __AMD64__
994 __asm__ ("movq %%cr4, %0\t\n" : "=r" (uCR4));
995# else
996 __asm__ ("movl %%cr4, %0\t\n" : "=r" (uCR4));
997# endif
998# else
999 __asm
1000 {
1001# ifdef __AMD64__
1002 mov rax, cr4
1003 mov [uCR4], rax
1004# else
1005 push eax /* just in case */
1006 /*mov eax, cr4*/
1007 _emit 0x0f
1008 _emit 0x20
1009 _emit 0xe0
1010 mov [uCR4], eax
1011 pop eax
1012# endif
1013 }
1014# endif
1015 return uCR4;
1016}
1017#endif
1018
1019
1020/**
1021 * Sets the CR4 register.
1022 *
1023 * @param uCR4 New CR4 value.
1024 */
1025#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1026DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1027#else
1028DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1029{
1030# if RT_INLINE_ASM_USES_INTRIN
1031 __writecr4(uCR4);
1032
1033# elif RT_INLINE_ASM_GNU_STYLE
1034# ifdef __AMD64__
1035 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1036# else
1037 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1038# endif
1039# else
1040 __asm
1041 {
1042# ifdef __AMD64__
1043 mov rax, [uCR4]
1044 mov cr4, rax
1045# else
1046 mov eax, [uCR4]
1047 _emit 0x0F
1048 _emit 0x22
1049 _emit 0xE0 /* mov cr4, eax */
1050# endif
1051 }
1052# endif
1053}
1054#endif
1055
1056
1057/**
1058 * Get cr8.
1059 * @returns cr8.
1060 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1061 */
1062#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1063DECLASM(RTCCUINTREG) ASMGetCR8(void);
1064#else
1065DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1066{
1067# ifdef __AMD64__
1068 RTCCUINTREG uCR8;
1069# if RT_INLINE_ASM_USES_INTRIN
1070 uCR8 = __readcr8();
1071
1072# elif RT_INLINE_ASM_GNU_STYLE
1073 __asm__ ("movq %%cr8, %0\t\n" : "=r" (uCR8));
1074# else
1075 __asm
1076 {
1077 mov rax, cr8
1078 mov [uCR8], rax
1079 }
1080# endif
1081 return uCR8;
1082# else /* !__AMD64__ */
1083 return 0;
1084# endif /* !__AMD64__ */
1085}
1086#endif
1087
1088
1089/**
1090 * Enables interrupts (EFLAGS.IF).
1091 */
1092#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1093DECLASM(void) ASMIntEnable(void);
1094#else
1095DECLINLINE(void) ASMIntEnable(void)
1096{
1097# if RT_INLINE_ASM_GNU_STYLE
1098 __asm("sti\n");
1099# elif RT_INLINE_ASM_USES_INTRIN
1100 _enable();
1101# else
1102 __asm sti
1103# endif
1104}
1105#endif
1106
1107
1108/**
1109 * Disables interrupts (!EFLAGS.IF).
1110 */
1111#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1112DECLASM(void) ASMIntDisable(void);
1113#else
1114DECLINLINE(void) ASMIntDisable(void)
1115{
1116# if RT_INLINE_ASM_GNU_STYLE
1117 __asm("cli\n");
1118# elif RT_INLINE_ASM_USES_INTRIN
1119 _disable();
1120# else
1121 __asm cli
1122# endif
1123}
1124#endif
1125
1126
1127/**
1128 * Reads a machine specific register.
1129 *
1130 * @returns Register content.
1131 * @param uRegister Register to read.
1132 */
1133#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1134DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1135#else
1136DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1137{
1138 RTUINT64U u;
1139# if RT_INLINE_ASM_GNU_STYLE
1140 __asm__ ("rdmsr\n\t"
1141 : "=a" (u.s.Lo),
1142 "=d" (u.s.Hi)
1143 : "c" (uRegister));
1144
1145# elif RT_INLINE_ASM_USES_INTRIN
1146 u.u = __readmsr(uRegister);
1147
1148# else
1149 __asm
1150 {
1151 mov ecx, [uRegister]
1152 rdmsr
1153 mov [u.s.Lo], eax
1154 mov [u.s.Hi], edx
1155 }
1156# endif
1157
1158 return u.u;
1159}
1160#endif
1161
1162
1163/**
1164 * Writes a machine specific register.
1165 *
1166 * @returns Register content.
1167 * @param uRegister Register to write to.
1168 * @param u64Val Value to write.
1169 */
1170#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1171DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1172#else
1173DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1174{
1175 RTUINT64U u;
1176
1177 u.u = u64Val;
1178# if RT_INLINE_ASM_GNU_STYLE
1179 __asm__ __volatile__("wrmsr\n\t"
1180 ::"a" (u.s.Lo),
1181 "d" (u.s.Hi),
1182 "c" (uRegister));
1183
1184# elif RT_INLINE_ASM_USES_INTRIN
1185 __writemsr(uRegister, u.u);
1186
1187# else
1188 __asm
1189 {
1190 mov ecx, [uRegister]
1191 mov edx, [u.s.Hi]
1192 mov eax, [u.s.Lo]
1193 wrmsr
1194 }
1195# endif
1196}
1197#endif
1198
1199
1200/**
1201 * Reads low part of a machine specific register.
1202 *
1203 * @returns Register content.
1204 * @param uRegister Register to read.
1205 */
1206#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1207DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1208#else
1209DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1210{
1211 uint32_t u32;
1212# if RT_INLINE_ASM_GNU_STYLE
1213 __asm__ ("rdmsr\n\t"
1214 : "=a" (u32)
1215 : "c" (uRegister)
1216 : "edx");
1217
1218# elif RT_INLINE_ASM_USES_INTRIN
1219 u32 = (uint32_t)__readmsr(uRegister);
1220
1221#else
1222 __asm
1223 {
1224 mov ecx, [uRegister]
1225 rdmsr
1226 mov [u32], eax
1227 }
1228# endif
1229
1230 return u32;
1231}
1232#endif
1233
1234
1235/**
1236 * Reads high part of a machine specific register.
1237 *
1238 * @returns Register content.
1239 * @param uRegister Register to read.
1240 */
1241#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1242DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1243#else
1244DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1245{
1246 uint32_t u32;
1247# if RT_INLINE_ASM_GNU_STYLE
1248 __asm__ ("rdmsr\n\t"
1249 : "=d" (u32)
1250 : "c" (uRegister)
1251 : "eax");
1252
1253# elif RT_INLINE_ASM_USES_INTRIN
1254 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1255
1256# else
1257 __asm
1258 {
1259 mov ecx, [uRegister]
1260 rdmsr
1261 mov [u32], edx
1262 }
1263# endif
1264
1265 return u32;
1266}
1267#endif
1268
1269
1270/**
1271 * Gets dr7.
1272 *
1273 * @returns dr7.
1274 */
1275#if RT_INLINE_ASM_EXTERNAL
1276DECLASM(RTCCUINTREG) ASMGetDR7(void);
1277#else
1278DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1279{
1280 RTCCUINTREG uDR7;
1281# if RT_INLINE_ASM_GNU_STYLE
1282# ifdef __AMD64__
1283 __asm__ ("movq %%dr7, %0\n\t" : "=r" (uDR7));
1284# else
1285 __asm__ ("movl %%dr7, %0\n\t" : "=r" (uDR7));
1286# endif
1287# else
1288 __asm
1289 {
1290# ifdef __AMD64__
1291 mov rax, dr7
1292 mov [uDR7], rax
1293# else
1294 mov eax, dr7
1295 mov [uDR7], eax
1296# endif
1297 }
1298# endif
1299 return uDR7;
1300}
1301#endif
1302
1303
1304/**
1305 * Gets dr6.
1306 *
1307 * @returns dr6.
1308 */
1309#if RT_INLINE_ASM_EXTERNAL
1310DECLASM(RTCCUINTREG) ASMGetDR6(void);
1311#else
1312DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1313{
1314 RTCCUINTREG uDR6;
1315# if RT_INLINE_ASM_GNU_STYLE
1316# ifdef __AMD64__
1317 __asm__ ("movq %%dr6, %0\n\t" : "=r" (uDR6));
1318# else
1319 __asm__ ("movl %%dr6, %0\n\t" : "=r" (uDR6));
1320# endif
1321# else
1322 __asm
1323 {
1324# ifdef __AMD64__
1325 mov rax, dr6
1326 mov [uDR6], rax
1327# else
1328 mov eax, dr6
1329 mov [uDR6], eax
1330# endif
1331 }
1332# endif
1333 return uDR6;
1334}
1335#endif
1336
1337
1338/**
1339 * Reads and clears DR6.
1340 *
1341 * @returns DR6.
1342 */
1343#if RT_INLINE_ASM_EXTERNAL
1344DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1345#else
1346DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1347{
1348 RTCCUINTREG uDR6;
1349# if RT_INLINE_ASM_GNU_STYLE
1350 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1351# ifdef __AMD64__
1352 __asm__ ("movq %%dr6, %0\n\t"
1353 "movq %1, %%dr6\n\t"
1354 : "=r" (uDR6)
1355 : "r" (uNewValue));
1356# else
1357 __asm__ ("movl %%dr6, %0\n\t"
1358 "movl %1, %%dr6\n\t"
1359 : "=r" (uDR6)
1360 : "r" (uNewValue));
1361# endif
1362# else
1363 __asm
1364 {
1365# ifdef __AMD64__
1366 mov rax, dr6
1367 mov [uDR6], rax
1368 mov rcx, rax
1369 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1370 mov dr6, rcx
1371# else
1372 mov eax, dr6
1373 mov [uDR6], eax
1374 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1375 mov dr6, ecx
1376# endif
1377 }
1378# endif
1379 return uDR6;
1380}
1381#endif
1382
1383
1384/** @deprecated */
1385#define ASMOutB(p, b) ASMOutU8(p,b)
1386/** @deprecated */
1387#define ASMInB(p) ASMInU8(p)
1388
1389/**
1390 * Writes a 8-bit unsigned integer to an I/O port.
1391 *
1392 * @param Port I/O port to read from.
1393 * @param u8 8-bit integer to write.
1394 */
1395#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1396DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1397#else
1398DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1399{
1400# if RT_INLINE_ASM_GNU_STYLE
1401 __asm__ __volatile__("outb %b1, %w0\n\t"
1402 :: "Nd" (Port),
1403 "a" (u8));
1404
1405# elif RT_INLINE_ASM_USES_INTRIN
1406 __outbyte(Port, u8);
1407
1408# else
1409 __asm
1410 {
1411 mov dx, [Port]
1412 mov al, [u8]
1413 out dx, al
1414 }
1415# endif
1416}
1417#endif
1418
1419
1420/**
1421 * Gets a 8-bit unsigned integer from an I/O port.
1422 *
1423 * @returns 8-bit integer.
1424 * @param Port I/O port to read from.
1425 */
1426#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1427DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1428#else
1429DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1430{
1431 uint8_t u8;
1432# if RT_INLINE_ASM_GNU_STYLE
1433 __asm__ __volatile__("inb %w1, %b0\n\t"
1434 : "=a" (u8)
1435 : "Nd" (Port));
1436
1437# elif RT_INLINE_ASM_USES_INTRIN
1438 u8 = __inbyte(Port);
1439
1440# else
1441 __asm
1442 {
1443 mov dx, [Port]
1444 in al, dx
1445 mov [u8], al
1446 }
1447# endif
1448 return u8;
1449}
1450#endif
1451
1452
1453/**
1454 * Writes a 16-bit unsigned integer to an I/O port.
1455 *
1456 * @param Port I/O port to read from.
1457 * @param u16 16-bit integer to write.
1458 */
1459#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1460DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1461#else
1462DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1463{
1464# if RT_INLINE_ASM_GNU_STYLE
1465 __asm__ __volatile__("outw %w1, %w0\n\t"
1466 :: "Nd" (Port),
1467 "a" (u16));
1468
1469# elif RT_INLINE_ASM_USES_INTRIN
1470 __outword(Port, u16);
1471
1472# else
1473 __asm
1474 {
1475 mov dx, [Port]
1476 mov ax, [u16]
1477 out dx, ax
1478 }
1479# endif
1480}
1481#endif
1482
1483
1484/**
1485 * Gets a 16-bit unsigned integer from an I/O port.
1486 *
1487 * @returns 16-bit integer.
1488 * @param Port I/O port to read from.
1489 */
1490#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1491DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1492#else
1493DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1494{
1495 uint16_t u16;
1496# if RT_INLINE_ASM_GNU_STYLE
1497 __asm__ __volatile__("inw %w1, %w0\n\t"
1498 : "=a" (u16)
1499 : "Nd" (Port));
1500
1501# elif RT_INLINE_ASM_USES_INTRIN
1502 u16 = __inword(Port);
1503
1504# else
1505 __asm
1506 {
1507 mov dx, [Port]
1508 in ax, dx
1509 mov [u16], ax
1510 }
1511# endif
1512 return u16;
1513}
1514#endif
1515
1516
1517/**
1518 * Writes a 32-bit unsigned integer to an I/O port.
1519 *
1520 * @param Port I/O port to read from.
1521 * @param u32 32-bit integer to write.
1522 */
1523#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1524DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1525#else
1526DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1527{
1528# if RT_INLINE_ASM_GNU_STYLE
1529 __asm__ __volatile__("outl %1, %w0\n\t"
1530 :: "Nd" (Port),
1531 "a" (u32));
1532
1533# elif RT_INLINE_ASM_USES_INTRIN
1534 __outdword(Port, u32);
1535
1536# else
1537 __asm
1538 {
1539 mov dx, [Port]
1540 mov eax, [u32]
1541 out dx, eax
1542 }
1543# endif
1544}
1545#endif
1546
1547
1548/**
1549 * Gets a 32-bit unsigned integer from an I/O port.
1550 *
1551 * @returns 32-bit integer.
1552 * @param Port I/O port to read from.
1553 */
1554#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1555DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1556#else
1557DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1558{
1559 uint32_t u32;
1560# if RT_INLINE_ASM_GNU_STYLE
1561 __asm__ __volatile__("inl %w1, %0\n\t"
1562 : "=a" (u32)
1563 : "Nd" (Port));
1564
1565# elif RT_INLINE_ASM_USES_INTRIN
1566 u32 = __indword(Port);
1567
1568# else
1569 __asm
1570 {
1571 mov dx, [Port]
1572 in eax, dx
1573 mov [u32], eax
1574 }
1575# endif
1576 return u32;
1577}
1578#endif
1579
1580
1581/**
1582 * Atomically Exchange an unsigned 8-bit value.
1583 *
1584 * @returns Current *pu8 value
1585 * @param pu8 Pointer to the 8-bit variable to update.
1586 * @param u8 The 8-bit value to assign to *pu8.
1587 */
1588#if RT_INLINE_ASM_EXTERNAL
1589DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1590#else
1591DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1592{
1593# if RT_INLINE_ASM_GNU_STYLE
1594 __asm__ __volatile__("xchgb %0, %1\n\t"
1595 : "=m" (*pu8),
1596 "=r" (u8)
1597 : "1" (u8));
1598# else
1599 __asm
1600 {
1601# ifdef __AMD64__
1602 mov rdx, [pu8]
1603 mov al, [u8]
1604 xchg [rdx], al
1605 mov [u8], al
1606# else
1607 mov edx, [pu8]
1608 mov al, [u8]
1609 xchg [edx], al
1610 mov [u8], al
1611# endif
1612 }
1613# endif
1614 return u8;
1615}
1616#endif
1617
1618
1619/**
1620 * Atomically Exchange a signed 8-bit value.
1621 *
1622 * @returns Current *pu8 value
1623 * @param pi8 Pointer to the 8-bit variable to update.
1624 * @param i8 The 8-bit value to assign to *pi8.
1625 */
1626DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1627{
1628 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1629}
1630
1631
1632/**
1633 * Atomically Exchange an unsigned 16-bit value.
1634 *
1635 * @returns Current *pu16 value
1636 * @param pu16 Pointer to the 16-bit variable to update.
1637 * @param u16 The 16-bit value to assign to *pu16.
1638 */
1639#if RT_INLINE_ASM_EXTERNAL
1640DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
1641#else
1642DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
1643{
1644# if RT_INLINE_ASM_GNU_STYLE
1645 __asm__ __volatile__("xchgw %0, %1\n\t"
1646 : "=m" (*pu16),
1647 "=r" (u16)
1648 : "1" (u16));
1649# else
1650 __asm
1651 {
1652# ifdef __AMD64__
1653 mov rdx, [pu16]
1654 mov ax, [u16]
1655 xchg [rdx], ax
1656 mov [u16], ax
1657# else
1658 mov edx, [pu16]
1659 mov ax, [u16]
1660 xchg [edx], ax
1661 mov [u16], ax
1662# endif
1663 }
1664# endif
1665 return u16;
1666}
1667#endif
1668
1669
1670/**
1671 * Atomically Exchange a signed 16-bit value.
1672 *
1673 * @returns Current *pu16 value
1674 * @param pi16 Pointer to the 16-bit variable to update.
1675 * @param i16 The 16-bit value to assign to *pi16.
1676 */
1677DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
1678{
1679 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
1680}
1681
1682
1683/**
1684 * Atomically Exchange an unsigned 32-bit value.
1685 *
1686 * @returns Current *pu32 value
1687 * @param pu32 Pointer to the 32-bit variable to update.
1688 * @param u32 The 32-bit value to assign to *pu32.
1689 */
1690#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1691DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
1692#else
1693DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
1694{
1695# if RT_INLINE_ASM_GNU_STYLE
1696 __asm__ __volatile__("xchgl %0, %1\n\t"
1697 : "=m" (*pu32),
1698 "=r" (u32)
1699 : "1" (u32));
1700
1701# elif RT_INLINE_ASM_USES_INTRIN
1702 u32 = _InterlockedExchange((long *)pu32, u32);
1703
1704# else
1705 __asm
1706 {
1707# ifdef __AMD64__
1708 mov rdx, [pu32]
1709 mov eax, u32
1710 xchg [rdx], eax
1711 mov [u32], eax
1712# else
1713 mov edx, [pu32]
1714 mov eax, u32
1715 xchg [edx], eax
1716 mov [u32], eax
1717# endif
1718 }
1719# endif
1720 return u32;
1721}
1722#endif
1723
1724
1725/**
1726 * Atomically Exchange a signed 32-bit value.
1727 *
1728 * @returns Current *pu32 value
1729 * @param pi32 Pointer to the 32-bit variable to update.
1730 * @param i32 The 32-bit value to assign to *pi32.
1731 */
1732DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
1733{
1734 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
1735}
1736
1737
1738/**
1739 * Atomically Exchange an unsigned 64-bit value.
1740 *
1741 * @returns Current *pu64 value
1742 * @param pu64 Pointer to the 64-bit variable to update.
1743 * @param u64 The 64-bit value to assign to *pu64.
1744 */
1745#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1746DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
1747#else
1748DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
1749{
1750# if defined(__AMD64__)
1751# if RT_INLINE_ASM_USES_INTRIN
1752 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
1753
1754# elif RT_INLINE_ASM_GNU_STYLE
1755 __asm__ __volatile__("xchgq %0, %1\n\t"
1756 : "=m" (*pu64),
1757 "=r" (u64)
1758 : "1" (u64));
1759# else
1760 __asm
1761 {
1762 mov rdx, [pu64]
1763 mov rax, [u64]
1764 xchg [rdx], rax
1765 mov [u64], rax
1766 }
1767# endif
1768# else /* !__AMD64__ */
1769# if RT_INLINE_ASM_GNU_STYLE
1770# if defined(PIC) || defined(__DARWIN__) /* darwin: 4.0.1 compiler option / bug? */
1771 uint32_t u32 = (uint32_t)u64;
1772 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
1773 "xchgl %%ebx, %3\n\t"
1774 "1:\n\t"
1775 "lock; cmpxchg8b (%5)\n\t"
1776 "jnz 1b\n\t"
1777 "xchgl %%ebx, %3\n\t"
1778 /*"xchgl %%esi, %5\n\t"*/
1779 : "=A" (u64),
1780 "=m" (*pu64)
1781 : "0" (*pu64),
1782 "m" ( u32 ),
1783 "c" ( (uint32_t)(u64 >> 32) ),
1784 "S" (pu64) );
1785# else /* !PIC */
1786 __asm__ __volatile__("1:\n\t"
1787 "lock; cmpxchg8b %1\n\t"
1788 "jnz 1b\n\t"
1789 : "=A" (u64),
1790 "=m" (*pu64)
1791 : "0" (*pu64),
1792 "b" ( (uint32_t)u64 ),
1793 "c" ( (uint32_t)(u64 >> 32) ));
1794# endif
1795# else
1796 __asm
1797 {
1798 mov ebx, dword ptr [u64]
1799 mov ecx, dword ptr [u64 + 4]
1800 mov edi, pu64
1801 mov eax, dword ptr [edi]
1802 mov edx, dword ptr [edi + 4]
1803 retry:
1804 lock cmpxchg8b [edi]
1805 jnz retry
1806 mov dword ptr [u64], eax
1807 mov dword ptr [u64 + 4], edx
1808 }
1809# endif
1810# endif /* !__AMD64__ */
1811 return u64;
1812}
1813#endif
1814
1815
1816/**
1817 * Atomically Exchange an signed 64-bit value.
1818 *
1819 * @returns Current *pi64 value
1820 * @param pi64 Pointer to the 64-bit variable to update.
1821 * @param i64 The 64-bit value to assign to *pi64.
1822 */
1823DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
1824{
1825 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
1826}
1827
1828
1829#ifdef __AMD64__
1830/**
1831 * Atomically Exchange an unsigned 128-bit value.
1832 *
1833 * @returns Current *pu128.
1834 * @param pu128 Pointer to the 128-bit variable to update.
1835 * @param u128 The 128-bit value to assign to *pu128.
1836 *
1837 * @remark We cannot really assume that any hardware supports this. Nor do I have
1838 * GAS support for it. So, for the time being we'll BREAK the atomic
1839 * bit of this function and use two 64-bit exchanges instead.
1840 */
1841# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
1842DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
1843# else
1844DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
1845{
1846 if (true)/*ASMCpuId_ECX(1) & BIT(13))*/
1847 {
1848 /** @todo this is clumsy code */
1849 RTUINT128U u128Ret;
1850 u128Ret.u = u128;
1851 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
1852 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
1853 return u128Ret.u;
1854 }
1855#if 0 /* later? */
1856 else
1857 {
1858# if RT_INLINE_ASM_GNU_STYLE
1859 __asm__ __volatile__("1:\n\t"
1860 "lock; cmpxchg8b %1\n\t"
1861 "jnz 1b\n\t"
1862 : "=A" (u128),
1863 "=m" (*pu128)
1864 : "0" (*pu128),
1865 "b" ( (uint64_t)u128 ),
1866 "c" ( (uint64_t)(u128 >> 64) ));
1867# else
1868 __asm
1869 {
1870 mov rbx, dword ptr [u128]
1871 mov rcx, dword ptr [u128 + 4]
1872 mov rdi, pu128
1873 mov rax, dword ptr [rdi]
1874 mov rdx, dword ptr [rdi + 4]
1875 retry:
1876 lock cmpxchg16b [rdi]
1877 jnz retry
1878 mov dword ptr [u128], rax
1879 mov dword ptr [u128 + 4], rdx
1880 }
1881# endif
1882 }
1883 return u128;
1884#endif
1885}
1886# endif
1887#endif /* __AMD64__ */
1888
1889
1890/**
1891 * Atomically Reads a unsigned 64-bit value.
1892 *
1893 * @returns Current *pu64 value
1894 * @param pu64 Pointer to the 64-bit variable to read.
1895 * The memory pointed to must be writable.
1896 * @remark This will fault if the memory is read-only!
1897 */
1898#if RT_INLINE_ASM_EXTERNAL
1899DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
1900#else
1901DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
1902{
1903 uint64_t u64;
1904# ifdef __AMD64__
1905# if RT_INLINE_ASM_GNU_STYLE
1906 __asm__ __volatile__("movq %1, %0\n\t"
1907 : "=r" (u64)
1908 : "m" (*pu64));
1909# else
1910 __asm
1911 {
1912 mov rdx, [pu64]
1913 mov rax, [rdx]
1914 mov [u64], rax
1915 }
1916# endif
1917# else /* !__AMD64__ */
1918# if RT_INLINE_ASM_GNU_STYLE
1919# if defined(PIC) || defined(__DARWIN__) /* darwin: 4.0.1 compiler option / bug? */
1920 uint32_t u32EBX = 0;
1921 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1922 "lock; cmpxchg8b (%5)\n\t"
1923 "xchgl %%ebx, %3\n\t"
1924 : "=A" (u64),
1925 "=m" (*pu64)
1926 : "0" (0),
1927 "m" (u32EBX),
1928 "c" (0),
1929 "S" (pu64));
1930# else /* !PIC */
1931 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1932 : "=A" (u64),
1933 "=m" (*pu64)
1934 : "0" (0),
1935 "b" (0),
1936 "c" (0));
1937# endif
1938# else
1939 __asm
1940 {
1941 xor eax, eax
1942 xor edx, edx
1943 mov edi, pu64
1944 xor ecx, ecx
1945 xor ebx, ebx
1946 lock cmpxchg8b [edi]
1947 mov dword ptr [u64], eax
1948 mov dword ptr [u64 + 4], edx
1949 }
1950# endif
1951# endif /* !__AMD64__ */
1952 return u64;
1953}
1954#endif
1955
1956
1957/**
1958 * Atomically Reads a signed 64-bit value.
1959 *
1960 * @returns Current *pi64 value
1961 * @param pi64 Pointer to the 64-bit variable to read.
1962 * The memory pointed to must be writable.
1963 * @remark This will fault if the memory is read-only!
1964 */
1965DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
1966{
1967 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
1968}
1969
1970
1971/**
1972 * Atomically Exchange a value which size might differ
1973 * between platforms or compilers.
1974 *
1975 * @param pu Pointer to the variable to update.
1976 * @param uNew The value to assign to *pu.
1977 */
1978#define ASMAtomicXchgSize(pu, uNew) \
1979 do { \
1980 switch (sizeof(*(pu))) { \
1981 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
1982 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
1983 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
1984 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
1985 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1986 } \
1987 } while (0)
1988
1989
1990/**
1991 * Atomically Exchange a pointer value.
1992 *
1993 * @returns Current *ppv value
1994 * @param ppv Pointer to the pointer variable to update.
1995 * @param pv The pointer value to assign to *ppv.
1996 */
1997DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
1998{
1999#if ARCH_BITS == 32
2000 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2001#elif ARCH_BITS == 64
2002 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2003#else
2004# error "ARCH_BITS is bogus"
2005#endif
2006}
2007
2008
2009/**
2010 * Atomically Compare and Exchange an unsigned 32-bit value.
2011 *
2012 * @returns true if xchg was done.
2013 * @returns false if xchg wasn't done.
2014 *
2015 * @param pu32 Pointer to the value to update.
2016 * @param u32New The new value to assigned to *pu32.
2017 * @param u32Old The old value to *pu32 compare with.
2018 */
2019#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2020DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2021#else
2022DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old)
2023{
2024# if RT_INLINE_ASM_GNU_STYLE
2025 uint32_t u32Ret;
2026 __asm__ __volatile__("lock; cmpxchgl %2, %0\n\t"
2027 "setz %%al\n\t"
2028 "movzx %%al, %%eax\n\t"
2029 : "=m" (*pu32),
2030 "=a" (u32Ret)
2031 : "r" (u32New),
2032 "1" (u32Old));
2033 return (bool)u32Ret;
2034
2035# elif RT_INLINE_ASM_USES_INTRIN
2036 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2037
2038# else
2039 uint32_t u32Ret;
2040 __asm
2041 {
2042# ifdef __AMD64__
2043 mov rdx, [pu32]
2044# else
2045 mov edx, [pu32]
2046# endif
2047 mov eax, [u32Old]
2048 mov ecx, [u32New]
2049# ifdef __AMD64__
2050 lock cmpxchg [rdx], ecx
2051# else
2052 lock cmpxchg [edx], ecx
2053# endif
2054 setz al
2055 movzx eax, al
2056 mov [u32Ret], eax
2057 }
2058 return !!u32Ret;
2059# endif
2060}
2061#endif
2062
2063
2064/**
2065 * Atomically Compare and Exchange a signed 32-bit value.
2066 *
2067 * @returns true if xchg was done.
2068 * @returns false if xchg wasn't done.
2069 *
2070 * @param pi32 Pointer to the value to update.
2071 * @param i32New The new value to assigned to *pi32.
2072 * @param i32Old The old value to *pi32 compare with.
2073 */
2074DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2075{
2076 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2077}
2078
2079
2080/**
2081 * Atomically Compare and exchange an unsigned 64-bit value.
2082 *
2083 * @returns true if xchg was done.
2084 * @returns false if xchg wasn't done.
2085 *
2086 * @param pu64 Pointer to the 64-bit variable to update.
2087 * @param u64New The 64-bit value to assign to *pu64.
2088 * @param u64Old The value to compare with.
2089 */
2090#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2091DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2092#else
2093DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old)
2094{
2095# if RT_INLINE_ASM_USES_INTRIN
2096 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2097
2098# elif defined(__AMD64__)
2099# if RT_INLINE_ASM_GNU_STYLE
2100 uint64_t u64Ret;
2101 __asm__ __volatile__("lock; cmpxchgq %2, %0\n\t"
2102 "setz %%al\n\t"
2103 "movzx %%al, %%eax\n\t"
2104 : "=m" (*pu64),
2105 "=a" (u64Ret)
2106 : "r" (u64New),
2107 "1" (u64Old));
2108 return (bool)u64Ret;
2109# else
2110 bool fRet;
2111 __asm
2112 {
2113 mov rdx, [pu32]
2114 mov rax, [u64Old]
2115 mov rcx, [u64New]
2116 lock cmpxchg [rdx], rcx
2117 setz al
2118 mov [fRet], al
2119 }
2120 return fRet;
2121# endif
2122# else /* !__AMD64__ */
2123 uint32_t u32Ret;
2124# if RT_INLINE_ASM_GNU_STYLE
2125# if defined(PIC) || defined(__DARWIN__) /* darwin: 4.0.1 compiler option / bug? */
2126 uint32_t u32 = (uint32_t)u64New;
2127 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2128 "lock; cmpxchg8b (%5)\n\t"
2129 "setz %%al\n\t"
2130 "xchgl %%ebx, %3\n\t"
2131 "movzx %%al, %%eax\n\t"
2132 : "=a" (u32Ret),
2133 "=m" (*pu64)
2134 : "A" (u64Old),
2135 "m" ( u32 ),
2136 "c" ( (uint32_t)(u64New >> 32) ),
2137 "S" (pu64) );
2138# else /* !PIC */
2139 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2140 "setz %%al\n\t"
2141 "movzx %%al, %%eax\n\t"
2142 : "=a" (u32Ret),
2143 "=m" (*pu64)
2144 : "A" (u64Old),
2145 "b" ( (uint32_t)u64New ),
2146 "c" ( (uint32_t)(u64New >> 32) ));
2147# endif
2148 return (bool)u32Ret;
2149# else
2150 __asm
2151 {
2152 mov ebx, dword ptr [u64New]
2153 mov ecx, dword ptr [u64New + 4]
2154 mov edi, [pu64]
2155 mov eax, dword ptr [u64Old]
2156 mov edx, dword ptr [u64Old + 4]
2157 lock cmpxchg8b [edi]
2158 setz al
2159 movzx eax, al
2160 mov dword ptr [u32Ret], eax
2161 }
2162 return !!u32Ret;
2163# endif
2164# endif /* !__AMD64__ */
2165}
2166#endif
2167
2168
2169/**
2170 * Atomically Compare and exchange a signed 64-bit value.
2171 *
2172 * @returns true if xchg was done.
2173 * @returns false if xchg wasn't done.
2174 *
2175 * @param pi64 Pointer to the 64-bit variable to update.
2176 * @param i64 The 64-bit value to assign to *pu64.
2177 * @param i64Old The value to compare with.
2178 */
2179DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2180{
2181 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2182}
2183
2184
2185
2186/** @def ASMAtomicCmpXchgSize
2187 * Atomically Compare and Exchange a value which size might differ
2188 * between platforms or compilers.
2189 *
2190 * @param pu Pointer to the value to update.
2191 * @param uNew The new value to assigned to *pu.
2192 * @param uOld The old value to *pu compare with.
2193 * @param fRc Where to store the result.
2194 */
2195#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2196 do { \
2197 switch (sizeof(*(pu))) { \
2198 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2199 break; \
2200 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2201 break; \
2202 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2203 (fRc) = false; \
2204 break; \
2205 } \
2206 } while (0)
2207
2208
2209/**
2210 * Atomically Compare and Exchange a pointer value.
2211 *
2212 * @returns true if xchg was done.
2213 * @returns false if xchg wasn't done.
2214 *
2215 * @param ppv Pointer to the value to update.
2216 * @param pvNew The new value to assigned to *ppv.
2217 * @param pvOld The old value to *ppv compare with.
2218 */
2219DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2220{
2221#if ARCH_BITS == 32
2222 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2223#elif ARCH_BITS == 64
2224 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2225#else
2226# error "ARCH_BITS is bogus"
2227#endif
2228}
2229
2230
2231/**
2232 * Atomically increment a 32-bit value.
2233 *
2234 * @returns The new value.
2235 * @param pu32 Pointer to the value to increment.
2236 */
2237#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2238DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2239#else
2240DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2241{
2242 uint32_t u32;
2243# if RT_INLINE_ASM_USES_INTRIN
2244 u32 = _InterlockedIncrement((long *)pu32);
2245
2246# elif RT_INLINE_ASM_GNU_STYLE
2247 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2248 "incl %0\n\t"
2249 : "=r" (u32),
2250 "=m" (*pu32)
2251 : "0" (1)
2252 : "memory");
2253# else
2254 __asm
2255 {
2256 mov eax, 1
2257# ifdef __AMD64__
2258 mov rdx, [pu32]
2259 lock xadd [rdx], eax
2260# else
2261 mov edx, [pu32]
2262 lock xadd [edx], eax
2263# endif
2264 inc eax
2265 mov u32, eax
2266 }
2267# endif
2268 return u32;
2269}
2270#endif
2271
2272
2273/**
2274 * Atomically increment a signed 32-bit value.
2275 *
2276 * @returns The new value.
2277 * @param pi32 Pointer to the value to increment.
2278 */
2279DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2280{
2281 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2282}
2283
2284
2285/**
2286 * Atomically decrement an unsigned 32-bit value.
2287 *
2288 * @returns The new value.
2289 * @param pu32 Pointer to the value to decrement.
2290 */
2291#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2292DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2293#else
2294DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2295{
2296 uint32_t u32;
2297# if RT_INLINE_ASM_USES_INTRIN
2298 u32 = _InterlockedDecrement((long *)pu32);
2299
2300# elif RT_INLINE_ASM_GNU_STYLE
2301 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2302 "decl %0\n\t"
2303 : "=r" (u32),
2304 "=m" (*pu32)
2305 : "0" (-1)
2306 : "memory");
2307# else
2308 __asm
2309 {
2310 mov eax, -1
2311# ifdef __AMD64__
2312 mov rdx, [pu32]
2313 lock xadd [rdx], eax
2314# else
2315 mov edx, [pu32]
2316 lock xadd [edx], eax
2317# endif
2318 dec eax
2319 mov u32, eax
2320 }
2321# endif
2322 return u32;
2323}
2324#endif
2325
2326
2327/**
2328 * Atomically decrement a signed 32-bit value.
2329 *
2330 * @returns The new value.
2331 * @param pi32 Pointer to the value to decrement.
2332 */
2333DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2334{
2335 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2336}
2337
2338
2339/**
2340 * Atomically Or an unsigned 32-bit value.
2341 *
2342 * @param pu32 Pointer to the pointer variable to OR u32 with.
2343 * @param u32 The value to OR *pu32 with.
2344 */
2345#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2346DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2347#else
2348DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2349{
2350# if RT_INLINE_ASM_USES_INTRIN
2351 _InterlockedOr((long volatile *)pu32, (long)u32);
2352
2353# elif RT_INLINE_ASM_GNU_STYLE
2354 __asm__ __volatile__("lock; orl %1, %0\n\t"
2355 : "=m" (*pu32)
2356 : "r" (u32));
2357# else
2358 __asm
2359 {
2360 mov eax, [u32]
2361# ifdef __AMD64__
2362 mov rdx, [pu32]
2363 lock or [rdx], eax
2364# else
2365 mov edx, [pu32]
2366 lock or [edx], eax
2367# endif
2368 }
2369# endif
2370}
2371#endif
2372
2373
2374/**
2375 * Atomically Or a signed 32-bit value.
2376 *
2377 * @param pi32 Pointer to the pointer variable to OR u32 with.
2378 * @param i32 The value to OR *pu32 with.
2379 */
2380DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2381{
2382 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2383}
2384
2385
2386/**
2387 * Atomically And an unsigned 32-bit value.
2388 *
2389 * @param pu32 Pointer to the pointer variable to AND u32 with.
2390 * @param u32 The value to AND *pu32 with.
2391 */
2392#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2393DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
2394#else
2395DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
2396{
2397# if RT_INLINE_ASM_USES_INTRIN
2398 _InterlockedAnd((long volatile *)pu32, u32);
2399
2400# elif RT_INLINE_ASM_GNU_STYLE
2401 __asm__ __volatile__("lock; andl %1, %0\n\t"
2402 : "=m" (*pu32)
2403 : "r" (u32));
2404# else
2405 __asm
2406 {
2407 mov eax, [u32]
2408# ifdef __AMD64__
2409 mov rdx, [pu32]
2410 lock and [rdx], eax
2411# else
2412 mov edx, [pu32]
2413 lock and [edx], eax
2414# endif
2415 }
2416# endif
2417}
2418#endif
2419
2420
2421/**
2422 * Atomically And a signed 32-bit value.
2423 *
2424 * @param pi32 Pointer to the pointer variable to AND i32 with.
2425 * @param i32 The value to AND *pi32 with.
2426 */
2427DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
2428{
2429 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
2430}
2431
2432
2433/**
2434 * Invalidate page.
2435 *
2436 * @param pv Address of the page to invalidate.
2437 */
2438#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2439DECLASM(void) ASMInvalidatePage(void *pv);
2440#else
2441DECLINLINE(void) ASMInvalidatePage(void *pv)
2442{
2443# if RT_INLINE_ASM_USES_INTRIN
2444 __invlpg(pv);
2445
2446# elif RT_INLINE_ASM_GNU_STYLE
2447 __asm__ __volatile__("invlpg %0\n\t"
2448 : : "m" (*(uint8_t *)pv));
2449# else
2450 __asm
2451 {
2452# ifdef __AMD64__
2453 mov rax, [pv]
2454 invlpg [rax]
2455# else
2456 mov eax, [pv]
2457 invlpg [eax]
2458# endif
2459 }
2460# endif
2461}
2462#endif
2463
2464
2465#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
2466# if PAGE_SIZE != 0x1000
2467# error "PAGE_SIZE is not 0x1000!"
2468# endif
2469#endif
2470
2471/**
2472 * Zeros a 4K memory page.
2473 *
2474 * @param pv Pointer to the memory block. This must be page aligned.
2475 */
2476#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2477DECLASM(void) ASMMemZeroPage(volatile void *pv);
2478# else
2479DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
2480{
2481# if RT_INLINE_ASM_USES_INTRIN
2482# ifdef __AMD64__
2483 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
2484# else
2485 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
2486# endif
2487
2488# elif RT_INLINE_ASM_GNU_STYLE
2489 RTUINTREG uDummy;
2490# ifdef __AMD64__
2491 __asm__ __volatile__ ("rep stosq"
2492 : "=D" (pv),
2493 "=c" (uDummy)
2494 : "0" (pv),
2495 "c" (0x1000 >> 3),
2496 "a" (0)
2497 : "memory");
2498# else
2499 __asm__ __volatile__ ("rep stosl"
2500 : "=D" (pv),
2501 "=c" (uDummy)
2502 : "0" (pv),
2503 "c" (0x1000 >> 2),
2504 "a" (0)
2505 : "memory");
2506# endif
2507# else
2508 __asm
2509 {
2510# ifdef __AMD64__
2511 xor rax, rax
2512 mov ecx, 0200h
2513 mov rdi, [pv]
2514 rep stosq
2515# else
2516 xor eax, eax
2517 mov ecx, 0400h
2518 mov edi, [pv]
2519 rep stosd
2520# endif
2521 }
2522# endif
2523}
2524# endif
2525
2526
2527/**
2528 * Zeros a memory block with a 32-bit aligned size.
2529 *
2530 * @param pv Pointer to the memory block.
2531 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2532 */
2533#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2534DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
2535#else
2536DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
2537{
2538# if RT_INLINE_ASM_USES_INTRIN
2539 __stosd((unsigned long *)pv, 0, cb >> 2);
2540
2541# elif RT_INLINE_ASM_GNU_STYLE
2542 __asm__ __volatile__ ("rep stosl"
2543 : "=D" (pv),
2544 "=c" (cb)
2545 : "0" (pv),
2546 "1" (cb >> 2),
2547 "a" (0)
2548 : "memory");
2549# else
2550 __asm
2551 {
2552 xor eax, eax
2553# ifdef __AMD64__
2554 mov rcx, [cb]
2555 shr rcx, 2
2556 mov rdi, [pv]
2557# else
2558 mov ecx, [cb]
2559 shr ecx, 2
2560 mov edi, [pv]
2561# endif
2562 rep stosd
2563 }
2564# endif
2565}
2566#endif
2567
2568
2569/**
2570 * Fills a memory block with a 32-bit aligned size.
2571 *
2572 * @param pv Pointer to the memory block.
2573 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2574 * @param u32 The value to fill with.
2575 */
2576#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2577DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
2578#else
2579DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
2580{
2581# if RT_INLINE_ASM_USES_INTRIN
2582 __stosd((unsigned long *)pv, 0, cb >> 2);
2583
2584# elif RT_INLINE_ASM_GNU_STYLE
2585 __asm__ __volatile__ ("rep stosl"
2586 : "=D" (pv),
2587 "=c" (cb)
2588 : "0" (pv),
2589 "1" (cb >> 2),
2590 "a" (u32)
2591 : "memory");
2592# else
2593 __asm
2594 {
2595# ifdef __AMD64__
2596 mov rcx, [cb]
2597 shr rcx, 2
2598 mov rdi, [pv]
2599# else
2600 mov ecx, [cb]
2601 shr ecx, 2
2602 mov edi, [pv]
2603# endif
2604 mov eax, [u32]
2605 rep stosd
2606 }
2607# endif
2608}
2609#endif
2610
2611
2612
2613/**
2614 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
2615 *
2616 * @returns u32F1 * u32F2.
2617 */
2618#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2619DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
2620#else
2621DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
2622{
2623# ifdef __AMD64__
2624 return (uint64_t)u32F1 * u32F2;
2625# else /* !__AMD64__ */
2626 uint64_t u64;
2627# if RT_INLINE_ASM_GNU_STYLE
2628 __asm__ __volatile__("mull %%edx"
2629 : "=A" (u64)
2630 : "a" (u32F2), "d" (u32F1));
2631# else
2632 __asm
2633 {
2634 mov edx, [u32F1]
2635 mov eax, [u32F2]
2636 mul edx
2637 mov dword ptr [u64], eax
2638 mov dword ptr [u64 + 4], edx
2639 }
2640# endif
2641 return u64;
2642# endif /* !__AMD64__ */
2643}
2644#endif
2645
2646
2647/**
2648 * Multiplies two signed 32-bit values returning a signed 64-bit result.
2649 *
2650 * @returns u32F1 * u32F2.
2651 */
2652#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2653DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
2654#else
2655DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
2656{
2657# ifdef __AMD64__
2658 return (int64_t)i32F1 * i32F2;
2659# else /* !__AMD64__ */
2660 int64_t i64;
2661# if RT_INLINE_ASM_GNU_STYLE
2662 __asm__ __volatile__("imull %%edx"
2663 : "=A" (i64)
2664 : "a" (i32F2), "d" (i32F1));
2665# else
2666 __asm
2667 {
2668 mov edx, [i32F1]
2669 mov eax, [i32F2]
2670 imul edx
2671 mov dword ptr [i64], eax
2672 mov dword ptr [i64 + 4], edx
2673 }
2674# endif
2675 return i64;
2676# endif /* !__AMD64__ */
2677}
2678#endif
2679
2680
2681/**
2682 * Devides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
2683 *
2684 * @returns u64 / u32.
2685 */
2686#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2687DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
2688#else
2689DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
2690{
2691# ifdef __AMD64__
2692 return (uint32_t)(u64 / u32);
2693# else /* !__AMD64__ */
2694# if RT_INLINE_ASM_GNU_STYLE
2695 RTUINTREG uDummy;
2696 __asm__ __volatile__("divl %3"
2697 : "=a" (u32), "=d"(uDummy)
2698 : "A" (u64), "r" (u32));
2699# else
2700 __asm
2701 {
2702 mov eax, dword ptr [u64]
2703 mov edx, dword ptr [u64 + 4]
2704 mov ecx, [u32]
2705 div ecx
2706 mov [u32], eax
2707 }
2708# endif
2709 return u32;
2710# endif /* !__AMD64__ */
2711}
2712#endif
2713
2714
2715/**
2716 * Devides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
2717 *
2718 * @returns u64 / u32.
2719 */
2720#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2721DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
2722#else
2723DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
2724{
2725# ifdef __AMD64__
2726 return (int32_t)(i64 / i32);
2727# else /* !__AMD64__ */
2728# if RT_INLINE_ASM_GNU_STYLE
2729 RTUINTREG iDummy;
2730 __asm__ __volatile__("idivl %3"
2731 : "=a" (i32), "=d"(iDummy)
2732 : "A" (i64), "r" (i32));
2733# else
2734 __asm
2735 {
2736 mov eax, dword ptr [i64]
2737 mov edx, dword ptr [i64 + 4]
2738 mov ecx, [i32]
2739 idiv ecx
2740 mov [i32], eax
2741 }
2742# endif
2743 return i32;
2744# endif /* !__AMD64__ */
2745}
2746#endif
2747
2748
2749/**
2750 * Probes a byte pointer for read access.
2751 *
2752 * While the function will not fault if the byte is not read accessible,
2753 * the idea is to do this in a safe place like before acquiring locks
2754 * and such like.
2755 *
2756 * Also, this functions guarantees that an eager compiler is not going
2757 * to optimize the probing away.
2758 *
2759 * @param pvByte Pointer to the byte.
2760 */
2761#if RT_INLINE_ASM_EXTERNAL
2762DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
2763#else
2764DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
2765{
2766 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
2767 uint8_t u8;
2768# if RT_INLINE_ASM_GNU_STYLE
2769 __asm__ __volatile__("movb (%1), %0\n\t"
2770 : "=r" (u8)
2771 : "r" (pvByte));
2772# else
2773 __asm
2774 {
2775# ifdef __AMD64__
2776 mov rax, [pvByte]
2777 mov al, [rax]
2778# else
2779 mov eax, [pvByte]
2780 mov al, [eax]
2781# endif
2782 mov [u8], al
2783 }
2784# endif
2785 return u8;
2786}
2787#endif
2788
2789/**
2790 * Probes a buffer for read access page by page.
2791 *
2792 * While the function will fault if the buffer is not fully read
2793 * accessible, the idea is to do this in a safe place like before
2794 * acquiring locks and such like.
2795 *
2796 * Also, this functions guarantees that an eager compiler is not going
2797 * to optimize the probing away.
2798 *
2799 * @param pvBuf Pointer to the buffer.
2800 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
2801 */
2802DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
2803{
2804 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
2805 /* the first byte */
2806 const uint8_t *pu8 = (const uint8_t *)pvBuf;
2807 ASMProbeReadByte(pu8);
2808
2809 /* the pages in between pages. */
2810 while (cbBuf > /*PAGE_SIZE*/0x1000)
2811 {
2812 ASMProbeReadByte(pu8);
2813 cbBuf -= /*PAGE_SIZE*/0x1000;
2814 pu8 += /*PAGE_SIZE*/0x1000;
2815 }
2816
2817 /* the last byte */
2818 ASMProbeReadByte(pu8 + cbBuf - 1);
2819}
2820
2821
2822/** @def ASMBreakpoint
2823 * Debugger Breakpoint.
2824 * @remark In the gnu world we add a nop instruction after the int3 to
2825 * force gdb to remain at the int3 source line.
2826 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
2827 * @internal
2828 */
2829#if RT_INLINE_ASM_GNU_STYLE
2830# ifndef __L4ENV__
2831# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
2832# else
2833# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
2834# endif
2835#else
2836# define ASMBreakpoint() __debugbreak()
2837#endif
2838
2839
2840
2841/** @defgroup grp_inline_bits Bit Operations
2842 * @{
2843 */
2844
2845
2846/**
2847 * Sets a bit in a bitmap.
2848 *
2849 * @param pvBitmap Pointer to the bitmap.
2850 * @param iBit The bit to set.
2851 */
2852#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2853DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
2854#else
2855DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
2856{
2857# if RT_INLINE_ASM_USES_INTRIN
2858 _bittestandset((long *)pvBitmap, iBit);
2859
2860# elif RT_INLINE_ASM_GNU_STYLE
2861 __asm__ __volatile__ ("btsl %1, %0"
2862 : "=m" (*(volatile long *)pvBitmap)
2863 : "Ir" (iBit)
2864 : "memory");
2865# else
2866 __asm
2867 {
2868# ifdef __AMD64__
2869 mov rax, [pvBitmap]
2870 mov edx, [iBit]
2871 bts [rax], edx
2872# else
2873 mov eax, [pvBitmap]
2874 mov edx, [iBit]
2875 bts [eax], edx
2876# endif
2877 }
2878# endif
2879}
2880#endif
2881
2882
2883/**
2884 * Atomically sets a bit in a bitmap.
2885 *
2886 * @param pvBitmap Pointer to the bitmap.
2887 * @param iBit The bit to set.
2888 */
2889#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2890DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
2891#else
2892DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
2893{
2894# if RT_INLINE_ASM_USES_INTRIN
2895 _interlockedbittestandset((long *)pvBitmap, iBit);
2896# elif RT_INLINE_ASM_GNU_STYLE
2897 __asm__ __volatile__ ("lock; btsl %1, %0"
2898 : "=m" (*(volatile long *)pvBitmap)
2899 : "Ir" (iBit)
2900 : "memory");
2901# else
2902 __asm
2903 {
2904# ifdef __AMD64__
2905 mov rax, [pvBitmap]
2906 mov edx, [iBit]
2907 lock bts [rax], edx
2908# else
2909 mov eax, [pvBitmap]
2910 mov edx, [iBit]
2911 lock bts [eax], edx
2912# endif
2913 }
2914# endif
2915}
2916#endif
2917
2918
2919/**
2920 * Clears a bit in a bitmap.
2921 *
2922 * @param pvBitmap Pointer to the bitmap.
2923 * @param iBit The bit to clear.
2924 */
2925#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2926DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
2927#else
2928DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
2929{
2930# if RT_INLINE_ASM_USES_INTRIN
2931 _bittestandreset((long *)pvBitmap, iBit);
2932
2933# elif RT_INLINE_ASM_GNU_STYLE
2934 __asm__ __volatile__ ("btrl %1, %0"
2935 : "=m" (*(volatile long *)pvBitmap)
2936 : "Ir" (iBit)
2937 : "memory");
2938# else
2939 __asm
2940 {
2941# ifdef __AMD64__
2942 mov rax, [pvBitmap]
2943 mov edx, [iBit]
2944 btr [rax], edx
2945# else
2946 mov eax, [pvBitmap]
2947 mov edx, [iBit]
2948 btr [eax], edx
2949# endif
2950 }
2951# endif
2952}
2953#endif
2954
2955
2956/**
2957 * Atomically clears a bit in a bitmap.
2958 *
2959 * @param pvBitmap Pointer to the bitmap.
2960 * @param iBit The bit to toggle set.
2961 * @remark No memory barrier, take care on smp.
2962 */
2963#if RT_INLINE_ASM_EXTERNAL
2964DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
2965#else
2966DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
2967{
2968# if RT_INLINE_ASM_GNU_STYLE
2969 __asm__ __volatile__ ("lock; btrl %1, %0"
2970 : "=m" (*(volatile long *)pvBitmap)
2971 : "Ir" (iBit)
2972 : "memory");
2973# else
2974 __asm
2975 {
2976# ifdef __AMD64__
2977 mov rax, [pvBitmap]
2978 mov edx, [iBit]
2979 lock btr [rax], edx
2980# else
2981 mov eax, [pvBitmap]
2982 mov edx, [iBit]
2983 lock btr [eax], edx
2984# endif
2985 }
2986# endif
2987}
2988#endif
2989
2990
2991/**
2992 * Toggles a bit in a bitmap.
2993 *
2994 * @param pvBitmap Pointer to the bitmap.
2995 * @param iBit The bit to toggle.
2996 */
2997#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2998DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
2999#else
3000DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
3001{
3002# if RT_INLINE_ASM_USES_INTRIN
3003 _bittestandcomplement((long *)pvBitmap, iBit);
3004# elif RT_INLINE_ASM_GNU_STYLE
3005 __asm__ __volatile__ ("btcl %1, %0"
3006 : "=m" (*(volatile long *)pvBitmap)
3007 : "Ir" (iBit)
3008 : "memory");
3009# else
3010 __asm
3011 {
3012# ifdef __AMD64__
3013 mov rax, [pvBitmap]
3014 mov edx, [iBit]
3015 btc [rax], edx
3016# else
3017 mov eax, [pvBitmap]
3018 mov edx, [iBit]
3019 btc [eax], edx
3020# endif
3021 }
3022# endif
3023}
3024#endif
3025
3026
3027/**
3028 * Atomically toggles a bit in a bitmap.
3029 *
3030 * @param pvBitmap Pointer to the bitmap.
3031 * @param iBit The bit to test and set.
3032 */
3033#if RT_INLINE_ASM_EXTERNAL
3034DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
3035#else
3036DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
3037{
3038# if RT_INLINE_ASM_GNU_STYLE
3039 __asm__ __volatile__ ("lock; btcl %1, %0"
3040 : "=m" (*(volatile long *)pvBitmap)
3041 : "Ir" (iBit)
3042 : "memory");
3043# else
3044 __asm
3045 {
3046# ifdef __AMD64__
3047 mov rax, [pvBitmap]
3048 mov edx, [iBit]
3049 lock btc [rax], edx
3050# else
3051 mov eax, [pvBitmap]
3052 mov edx, [iBit]
3053 lock btc [eax], edx
3054# endif
3055 }
3056# endif
3057}
3058#endif
3059
3060
3061/**
3062 * Tests and sets a bit in a bitmap.
3063 *
3064 * @returns true if the bit was set.
3065 * @returns false if the bit was clear.
3066 * @param pvBitmap Pointer to the bitmap.
3067 * @param iBit The bit to test and set.
3068 */
3069#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3070DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3071#else
3072DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3073{
3074 union { bool f; uint32_t u32; uint8_t u8; } rc;
3075# if RT_INLINE_ASM_USES_INTRIN
3076 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
3077
3078# elif RT_INLINE_ASM_GNU_STYLE
3079 __asm__ __volatile__ ("btsl %2, %1\n\t"
3080 "setc %b0\n\t"
3081 "andl $1, %0\n\t"
3082 : "=q" (rc.u32),
3083 "=m" (*(volatile long *)pvBitmap)
3084 : "Ir" (iBit)
3085 : "memory");
3086# else
3087 __asm
3088 {
3089 mov edx, [iBit]
3090# ifdef __AMD64__
3091 mov rax, [pvBitmap]
3092 bts [rax], edx
3093# else
3094 mov eax, [pvBitmap]
3095 bts [eax], edx
3096# endif
3097 setc al
3098 and eax, 1
3099 mov [rc.u32], eax
3100 }
3101# endif
3102 return rc.f;
3103}
3104#endif
3105
3106
3107/**
3108 * Atomically tests and sets a bit in a bitmap.
3109 *
3110 * @returns true if the bit was set.
3111 * @returns false if the bit was clear.
3112 * @param pvBitmap Pointer to the bitmap.
3113 * @param iBit The bit to set.
3114 */
3115#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3116DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3117#else
3118DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3119{
3120 union { bool f; uint32_t u32; uint8_t u8; } rc;
3121# if RT_INLINE_ASM_USES_INTRIN
3122 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
3123# elif RT_INLINE_ASM_GNU_STYLE
3124 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
3125 "setc %b0\n\t"
3126 "andl $1, %0\n\t"
3127 : "=q" (rc.u32),
3128 "=m" (*(volatile long *)pvBitmap)
3129 : "Ir" (iBit)
3130 : "memory");
3131# else
3132 __asm
3133 {
3134 mov edx, [iBit]
3135# ifdef __AMD64__
3136 mov rax, [pvBitmap]
3137 lock bts [rax], edx
3138# else
3139 mov eax, [pvBitmap]
3140 lock bts [eax], edx
3141# endif
3142 setc al
3143 and eax, 1
3144 mov [rc.u32], eax
3145 }
3146# endif
3147 return rc.f;
3148}
3149#endif
3150
3151
3152/**
3153 * Tests and clears a bit in a bitmap.
3154 *
3155 * @returns true if the bit was set.
3156 * @returns false if the bit was clear.
3157 * @param pvBitmap Pointer to the bitmap.
3158 * @param iBit The bit to test and clear.
3159 */
3160#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3161DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3162#else
3163DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3164{
3165 union { bool f; uint32_t u32; uint8_t u8; } rc;
3166# if RT_INLINE_ASM_USES_INTRIN
3167 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
3168
3169# elif RT_INLINE_ASM_GNU_STYLE
3170 __asm__ __volatile__ ("btrl %2, %1\n\t"
3171 "setc %b0\n\t"
3172 "andl $1, %0\n\t"
3173 : "=q" (rc.u32),
3174 "=m" (*(volatile long *)pvBitmap)
3175 : "Ir" (iBit)
3176 : "memory");
3177# else
3178 __asm
3179 {
3180 mov edx, [iBit]
3181# ifdef __AMD64__
3182 mov rax, [pvBitmap]
3183 btr [rax], edx
3184# else
3185 mov eax, [pvBitmap]
3186 btr [eax], edx
3187# endif
3188 setc al
3189 and eax, 1
3190 mov [rc.u32], eax
3191 }
3192# endif
3193 return rc.f;
3194}
3195#endif
3196
3197
3198/**
3199 * Atomically tests and clears a bit in a bitmap.
3200 *
3201 * @returns true if the bit was set.
3202 * @returns false if the bit was clear.
3203 * @param pvBitmap Pointer to the bitmap.
3204 * @param iBit The bit to test and clear.
3205 * @remark No memory barrier, take care on smp.
3206 */
3207#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3208DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3209#else
3210DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3211{
3212 union { bool f; uint32_t u32; uint8_t u8; } rc;
3213# if RT_INLINE_ASM_USES_INTRIN
3214 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
3215
3216# elif RT_INLINE_ASM_GNU_STYLE
3217 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
3218 "setc %b0\n\t"
3219 "andl $1, %0\n\t"
3220 : "=q" (rc.u32),
3221 "=m" (*(volatile long *)pvBitmap)
3222 : "Ir" (iBit)
3223 : "memory");
3224# else
3225 __asm
3226 {
3227 mov edx, [iBit]
3228# ifdef __AMD64__
3229 mov rax, [pvBitmap]
3230 lock btr [rax], edx
3231# else
3232 mov eax, [pvBitmap]
3233 lock btr [eax], edx
3234# endif
3235 setc al
3236 and eax, 1
3237 mov [rc.u32], eax
3238 }
3239# endif
3240 return rc.f;
3241}
3242#endif
3243
3244
3245/**
3246 * Tests and toggles a bit in a bitmap.
3247 *
3248 * @returns true if the bit was set.
3249 * @returns false if the bit was clear.
3250 * @param pvBitmap Pointer to the bitmap.
3251 * @param iBit The bit to test and toggle.
3252 */
3253#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3254DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3255#else
3256DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3257{
3258 union { bool f; uint32_t u32; uint8_t u8; } rc;
3259# if RT_INLINE_ASM_USES_INTRIN
3260 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
3261
3262# elif RT_INLINE_ASM_GNU_STYLE
3263 __asm__ __volatile__ ("btcl %2, %1\n\t"
3264 "setc %b0\n\t"
3265 "andl $1, %0\n\t"
3266 : "=q" (rc.u32),
3267 "=m" (*(volatile long *)pvBitmap)
3268 : "Ir" (iBit)
3269 : "memory");
3270# else
3271 __asm
3272 {
3273 mov edx, [iBit]
3274# ifdef __AMD64__
3275 mov rax, [pvBitmap]
3276 btc [rax], edx
3277# else
3278 mov eax, [pvBitmap]
3279 btc [eax], edx
3280# endif
3281 setc al
3282 and eax, 1
3283 mov [rc.u32], eax
3284 }
3285# endif
3286 return rc.f;
3287}
3288#endif
3289
3290
3291/**
3292 * Atomically tests and toggles a bit in a bitmap.
3293 *
3294 * @returns true if the bit was set.
3295 * @returns false if the bit was clear.
3296 * @param pvBitmap Pointer to the bitmap.
3297 * @param iBit The bit to test and toggle.
3298 */
3299#if RT_INLINE_ASM_EXTERNAL
3300DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3301#else
3302DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3303{
3304 union { bool f; uint32_t u32; uint8_t u8; } rc;
3305# if RT_INLINE_ASM_GNU_STYLE
3306 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
3307 "setc %b0\n\t"
3308 "andl $1, %0\n\t"
3309 : "=q" (rc.u32),
3310 "=m" (*(volatile long *)pvBitmap)
3311 : "Ir" (iBit)
3312 : "memory");
3313# else
3314 __asm
3315 {
3316 mov edx, [iBit]
3317# ifdef __AMD64__
3318 mov rax, [pvBitmap]
3319 lock btc [rax], edx
3320# else
3321 mov eax, [pvBitmap]
3322 lock btc [eax], edx
3323# endif
3324 setc al
3325 and eax, 1
3326 mov [rc.u32], eax
3327 }
3328# endif
3329 return rc.f;
3330}
3331#endif
3332
3333
3334/**
3335 * Tests if a bit in a bitmap is set.
3336 *
3337 * @returns true if the bit is set.
3338 * @returns false if the bit is clear.
3339 * @param pvBitmap Pointer to the bitmap.
3340 * @param iBit The bit to test.
3341 */
3342#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3343DECLASM(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit);
3344#else
3345DECLINLINE(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit)
3346{
3347 union { bool f; uint32_t u32; uint8_t u8; } rc;
3348# if RT_INLINE_ASM_USES_INTRIN
3349 rc.u32 = _bittest((long *)pvBitmap, iBit);
3350# elif RT_INLINE_ASM_GNU_STYLE
3351
3352 __asm__ __volatile__ ("btl %2, %1\n\t"
3353 "setc %b0\n\t"
3354 "andl $1, %0\n\t"
3355 : "=q" (rc.u32),
3356 "=m" (*(volatile long *)pvBitmap)
3357 : "Ir" (iBit)
3358 : "memory");
3359# else
3360 __asm
3361 {
3362 mov edx, [iBit]
3363# ifdef __AMD64__
3364 mov rax, [pvBitmap]
3365 bt [rax], edx
3366# else
3367 mov eax, [pvBitmap]
3368 bt [eax], edx
3369# endif
3370 setc al
3371 and eax, 1
3372 mov [rc.u32], eax
3373 }
3374# endif
3375 return rc.f;
3376}
3377#endif
3378
3379
3380/**
3381 * Clears a bit range within a bitmap.
3382 *
3383 * @param pvBitmap Pointer to the bitmap.
3384 * @param iBitStart The First bit to clear.
3385 * @param iBitEnd The first bit not to clear.
3386 */
3387DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
3388{
3389 if (iBitStart < iBitEnd)
3390 {
3391 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
3392 int iStart = iBitStart & ~31;
3393 int iEnd = iBitEnd & ~31;
3394 if (iStart == iEnd)
3395 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
3396 else
3397 {
3398 /* bits in first dword. */
3399 if (iBitStart & 31)
3400 {
3401 *pu32 &= (1 << (iBitStart & 31)) - 1;
3402 pu32++;
3403 iBitStart = iStart + 32;
3404 }
3405
3406 /* whole dword. */
3407 if (iBitStart != iEnd)
3408 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
3409
3410 /* bits in last dword. */
3411 if (iBitEnd & 31)
3412 {
3413 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
3414 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
3415 }
3416 }
3417 }
3418}
3419
3420
3421/**
3422 * Finds the first clear bit in a bitmap.
3423 *
3424 * @returns Index of the first zero bit.
3425 * @returns -1 if no clear bit was found.
3426 * @param pvBitmap Pointer to the bitmap.
3427 * @param cBits The number of bits in the bitmap. Multiple of 32.
3428 */
3429#if RT_INLINE_ASM_EXTERNAL
3430DECLASM(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits);
3431#else
3432DECLINLINE(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits)
3433{
3434 if (cBits)
3435 {
3436 int32_t iBit;
3437# if RT_INLINE_ASM_GNU_STYLE
3438 RTCCUINTREG uEAX, uECX, uEDI;
3439 cBits = RT_ALIGN_32(cBits, 32);
3440 __asm__ __volatile__("repe; scasl\n\t"
3441 "je 1f\n\t"
3442# ifdef __AMD64__
3443 "lea -4(%%rdi), %%rdi\n\t"
3444 "xorl (%%rdi), %%eax\n\t"
3445 "subq %5, %%rdi\n\t"
3446# else
3447 "lea -4(%%edi), %%edi\n\t"
3448 "xorl (%%edi), %%eax\n\t"
3449 "subl %5, %%edi\n\t"
3450# endif
3451 "shll $3, %%edi\n\t"
3452 "bsfl %%eax, %%edx\n\t"
3453 "addl %%edi, %%edx\n\t"
3454 "1:\t\n"
3455 : "=d" (iBit),
3456 "=&c" (uECX),
3457 "=&D" (uEDI),
3458 "=&a" (uEAX)
3459 : "0" (0xffffffff),
3460 "mr" (pvBitmap),
3461 "1" (cBits >> 5),
3462 "2" (pvBitmap),
3463 "3" (0xffffffff));
3464# else
3465 cBits = RT_ALIGN_32(cBits, 32);
3466 __asm
3467 {
3468# ifdef __AMD64__
3469 mov rdi, [pvBitmap]
3470 mov rbx, rdi
3471# else
3472 mov edi, [pvBitmap]
3473 mov ebx, edi
3474# endif
3475 mov edx, 0ffffffffh
3476 mov eax, edx
3477 mov ecx, [cBits]
3478 shr ecx, 5
3479 repe scasd
3480 je done
3481
3482# ifdef __AMD64__
3483 lea rdi, [rdi - 4]
3484 xor eax, [rdi]
3485 sub rdi, rbx
3486# else
3487 lea edi, [edi - 4]
3488 xor eax, [edi]
3489 sub edi, ebx
3490# endif
3491 shl edi, 3
3492 bsf edx, eax
3493 add edx, edi
3494 done:
3495 mov [iBit], edx
3496 }
3497# endif
3498 return iBit;
3499 }
3500 return -1;
3501}
3502#endif
3503
3504
3505/**
3506 * Finds the next clear bit in a bitmap.
3507 *
3508 * @returns Index of the first zero bit.
3509 * @returns -1 if no clear bit was found.
3510 * @param pvBitmap Pointer to the bitmap.
3511 * @param cBits The number of bits in the bitmap. Multiple of 32.
3512 * @param iBitPrev The bit returned from the last search.
3513 * The search will start at iBitPrev + 1.
3514 */
3515#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3516DECLASM(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3517#else
3518DECLINLINE(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3519{
3520 int iBit = ++iBitPrev & 31;
3521 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
3522 cBits -= iBitPrev & ~31;
3523 if (iBit)
3524 {
3525 /* inspect the first dword. */
3526 uint32_t u32 = (~*(volatile uint32_t *)pvBitmap) >> iBit;
3527# if RT_INLINE_ASM_USES_INTRIN
3528 unsigned long ulBit = 0;
3529 if (_BitScanForward(&ulBit, u32))
3530 return ulBit + iBitPrev;
3531 iBit = -1;
3532# else
3533# if RT_INLINE_ASM_GNU_STYLE
3534 __asm__ __volatile__("bsf %1, %0\n\t"
3535 "jnz 1f\n\t"
3536 "movl $-1, %0\n\t"
3537 "1:\n\t"
3538 : "=r" (iBit)
3539 : "r" (u32));
3540# else
3541 __asm
3542 {
3543 mov edx, [u32]
3544 bsf eax, edx
3545 jnz done
3546 mov eax, 0ffffffffh
3547 done:
3548 mov [iBit], eax
3549 }
3550# endif
3551 if (iBit >= 0)
3552 return iBit + iBitPrev;
3553# endif
3554 /* Search the rest of the bitmap, if there is anything. */
3555 if (cBits > 32)
3556 {
3557 iBit = ASMBitFirstClear((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
3558 if (iBit >= 0)
3559 return iBit + (iBitPrev & ~31) + 32;
3560 }
3561 }
3562 else
3563 {
3564 /* Search the rest of the bitmap. */
3565 iBit = ASMBitFirstClear(pvBitmap, cBits);
3566 if (iBit >= 0)
3567 return iBit + (iBitPrev & ~31);
3568 }
3569 return iBit;
3570}
3571#endif
3572
3573
3574/**
3575 * Finds the first set bit in a bitmap.
3576 *
3577 * @returns Index of the first set bit.
3578 * @returns -1 if no clear bit was found.
3579 * @param pvBitmap Pointer to the bitmap.
3580 * @param cBits The number of bits in the bitmap. Multiple of 32.
3581 */
3582#if RT_INLINE_ASM_EXTERNAL
3583DECLASM(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits);
3584#else
3585DECLINLINE(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits)
3586{
3587 if (cBits)
3588 {
3589 int32_t iBit;
3590# if RT_INLINE_ASM_GNU_STYLE
3591 RTCCUINTREG uEAX, uECX, uEDI;
3592 cBits = RT_ALIGN_32(cBits, 32);
3593 __asm__ __volatile__("repe; scasl\n\t"
3594 "je 1f\n\t"
3595# ifdef __AMD64__
3596 "lea -4(%%rdi), %%rdi\n\t"
3597 "movl (%%rdi), %%eax\n\t"
3598 "subq %5, %%rdi\n\t"
3599# else
3600 "lea -4(%%edi), %%edi\n\t"
3601 "movl (%%edi), %%eax\n\t"
3602 "subl %5, %%edi\n\t"
3603# endif
3604 "shll $3, %%edi\n\t"
3605 "bsfl %%eax, %%edx\n\t"
3606 "addl %%edi, %%edx\n\t"
3607 "1:\t\n"
3608 : "=d" (iBit),
3609 "=&c" (uECX),
3610 "=&D" (uEDI),
3611 "=&a" (uEAX)
3612 : "0" (0xffffffff),
3613 "mr" (pvBitmap),
3614 "1" (cBits >> 5),
3615 "2" (pvBitmap),
3616 "3" (0));
3617# else
3618 cBits = RT_ALIGN_32(cBits, 32);
3619 __asm
3620 {
3621# ifdef __AMD64__
3622 mov rdi, [pvBitmap]
3623 mov rbx, rdi
3624# else
3625 mov edi, [pvBitmap]
3626 mov ebx, edi
3627# endif
3628 mov edx, 0ffffffffh
3629 xor eax, eax
3630 mov ecx, [cBits]
3631 shr ecx, 5
3632 repe scasd
3633 je done
3634# ifdef __AMD64__
3635 lea rdi, [rdi - 4]
3636 mov eax, [rdi]
3637 sub rdi, rbx
3638# else
3639 lea edi, [edi - 4]
3640 mov eax, [edi]
3641 sub edi, ebx
3642# endif
3643 shl edi, 3
3644 bsf edx, eax
3645 add edx, edi
3646 done:
3647 mov [iBit], edx
3648 }
3649# endif
3650 return iBit;
3651 }
3652 return -1;
3653}
3654#endif
3655
3656
3657/**
3658 * Finds the next set bit in a bitmap.
3659 *
3660 * @returns Index of the next set bit.
3661 * @returns -1 if no set bit was found.
3662 * @param pvBitmap Pointer to the bitmap.
3663 * @param cBits The number of bits in the bitmap. Multiple of 32.
3664 * @param iBitPrev The bit returned from the last search.
3665 * The search will start at iBitPrev + 1.
3666 */
3667#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3668DECLASM(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3669#else
3670DECLINLINE(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3671{
3672 int iBit = ++iBitPrev & 31;
3673 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
3674 cBits -= iBitPrev & ~31;
3675 if (iBit)
3676 {
3677 /* inspect the first dword. */
3678 uint32_t u32 = *(volatile uint32_t *)pvBitmap >> iBit;
3679# if RT_INLINE_ASM_USES_INTRIN
3680 unsigned long ulBit = 0;
3681 if (_BitScanForward(&ulBit, u32))
3682 return ulBit + iBitPrev;
3683 iBit = -1;
3684# else
3685# if RT_INLINE_ASM_GNU_STYLE
3686 __asm__ __volatile__("bsf %1, %0\n\t"
3687 "jnz 1f\n\t"
3688 "movl $-1, %0\n\t"
3689 "1:\n\t"
3690 : "=r" (iBit)
3691 : "r" (u32));
3692# else
3693 __asm
3694 {
3695 mov edx, u32
3696 bsf eax, edx
3697 jnz done
3698 mov eax, 0ffffffffh
3699 done:
3700 mov [iBit], eax
3701 }
3702# endif
3703 if (iBit >= 0)
3704 return iBit + iBitPrev;
3705# endif
3706 /* Search the rest of the bitmap, if there is anything. */
3707 if (cBits > 32)
3708 {
3709 iBit = ASMBitFirstSet((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
3710 if (iBit >= 0)
3711 return iBit + (iBitPrev & ~31) + 32;
3712 }
3713
3714 }
3715 else
3716 {
3717 /* Search the rest of the bitmap. */
3718 iBit = ASMBitFirstSet(pvBitmap, cBits);
3719 if (iBit >= 0)
3720 return iBit + (iBitPrev & ~31);
3721 }
3722 return iBit;
3723}
3724#endif
3725
3726
3727/**
3728 * Finds the first bit which is set in the given 32-bit integer.
3729 * Bits are numbered from 1 (least significant) to 32.
3730 *
3731 * @returns index [1..32] of the first set bit.
3732 * @returns 0 if all bits are cleared.
3733 * @param u32 Integer to search for set bits.
3734 * @remark Similar to ffs() in BSD.
3735 */
3736DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
3737{
3738# if RT_INLINE_ASM_USES_INTRIN
3739 unsigned long iBit;
3740 if (_BitScanForward(&iBit, u32))
3741 iBit++;
3742 else
3743 iBit = 0;
3744# elif RT_INLINE_ASM_GNU_STYLE
3745 uint32_t iBit;
3746 __asm__ __volatile__("bsf %1, %0\n\t"
3747 "jnz 1f\n\t"
3748 "xorl %0, %0\n\t"
3749 "jmp 2f\n"
3750 "1:\n\t"
3751 "incl %0\n"
3752 "2:\n\t"
3753 : "=r" (iBit)
3754 : "rm" (u32));
3755# else
3756 uint32_t iBit;
3757 _asm
3758 {
3759 bsf eax, [u32]
3760 jnz found
3761 xor eax, eax
3762 jmp done
3763 found:
3764 inc eax
3765 done:
3766 mov [iBit], eax
3767 }
3768# endif
3769 return iBit;
3770}
3771
3772
3773/**
3774 * Finds the first bit which is set in the given 32-bit integer.
3775 * Bits are numbered from 1 (least significant) to 32.
3776 *
3777 * @returns index [1..32] of the first set bit.
3778 * @returns 0 if all bits are cleared.
3779 * @param i32 Integer to search for set bits.
3780 * @remark Similar to ffs() in BSD.
3781 */
3782DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
3783{
3784 return ASMBitFirstSetU32((uint32_t)i32);
3785}
3786
3787
3788/**
3789 * Finds the last bit which is set in the given 32-bit integer.
3790 * Bits are numbered from 1 (least significant) to 32.
3791 *
3792 * @returns index [1..32] of the last set bit.
3793 * @returns 0 if all bits are cleared.
3794 * @param u32 Integer to search for set bits.
3795 * @remark Similar to fls() in BSD.
3796 */
3797DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
3798{
3799# if RT_INLINE_ASM_USES_INTRIN
3800 unsigned long iBit;
3801 if (_BitScanReverse(&iBit, u32))
3802 iBit++;
3803 else
3804 iBit = 0;
3805# elif RT_INLINE_ASM_GNU_STYLE
3806 uint32_t iBit;
3807 __asm__ __volatile__("bsrl %1, %0\n\t"
3808 "jnz 1f\n\t"
3809 "xorl %0, %0\n\t"
3810 "jmp 2f\n"
3811 "1:\n\t"
3812 "incl %0\n"
3813 "2:\n\t"
3814 : "=r" (iBit)
3815 : "rm" (u32));
3816# else
3817 uint32_t iBit;
3818 _asm
3819 {
3820 bsr eax, [u32]
3821 jnz found
3822 xor eax, eax
3823 jmp done
3824 found:
3825 inc eax
3826 done:
3827 mov [iBit], eax
3828 }
3829# endif
3830 return iBit;
3831}
3832
3833
3834/**
3835 * Finds the last bit which is set in the given 32-bit integer.
3836 * Bits are numbered from 1 (least significant) to 32.
3837 *
3838 * @returns index [1..32] of the last set bit.
3839 * @returns 0 if all bits are cleared.
3840 * @param i32 Integer to search for set bits.
3841 * @remark Similar to fls() in BSD.
3842 */
3843DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
3844{
3845 return ASMBitLastSetS32((uint32_t)i32);
3846}
3847
3848
3849/**
3850 * Reverse the byte order of the given 32-bit integer.
3851 * @param u32 Integer
3852 */
3853DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
3854{
3855#if RT_INLINE_ASM_GNU_STYLE
3856 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
3857#else
3858 _asm
3859 {
3860 mov eax, [u32]
3861 bswap eax
3862 mov [u32], eax
3863 }
3864#endif
3865 return u32;
3866}
3867
3868/** @} */
3869
3870
3871/** @} */
3872#endif
3873
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette