VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 1890

Last change on this file since 1890 was 1888, checked in by vboxsync, 18 years ago

ASMMultU64ByU32DivByU32 (for TSC calculation).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 96.4 KB
Line 
1/** @file
2 * InnoTek Portable Runtime - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006 InnoTek Systemberatung GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License as published by the Free Software Foundation,
12 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
13 * distribution. VirtualBox OSE is distributed in the hope that it will
14 * be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * If you received this file as part of a commercial VirtualBox
17 * distribution, then only the terms of your commercial VirtualBox
18 * license agreement apply instead of the previous paragraph.
19 */
20
21#ifndef __iprt_asm_h__
22#define __iprt_asm_h__
23
24#include <iprt/cdefs.h>
25#include <iprt/types.h>
26/** @todo #include <iprt/param.h> for PAGE_SIZE. */
27/** @def RT_INLINE_ASM_USES_INTRIN
28 * Defined as 1 if we're using a _MSC_VER 1400.
29 * Otherwise defined as 0.
30 */
31
32#ifdef _MSC_VER
33# if _MSC_VER >= 1400
34# define RT_INLINE_ASM_USES_INTRIN 1
35# include <intrin.h>
36 /* Emit the intrinsics at all optimization levels. */
37# pragma intrinsic(__cpuid)
38# pragma intrinsic(_enable)
39# pragma intrinsic(_disable)
40# pragma intrinsic(__rdtsc)
41# pragma intrinsic(__readmsr)
42# pragma intrinsic(__writemsr)
43# pragma intrinsic(__outbyte)
44# pragma intrinsic(__outword)
45# pragma intrinsic(__outdword)
46# pragma intrinsic(__inbyte)
47# pragma intrinsic(__inword)
48# pragma intrinsic(__indword)
49# pragma intrinsic(__invlpg)
50# pragma intrinsic(__stosd)
51# pragma intrinsic(__stosw)
52# pragma intrinsic(__stosb)
53# pragma intrinsic(__readcr0)
54# pragma intrinsic(__readcr2)
55# pragma intrinsic(__readcr3)
56# pragma intrinsic(__readcr4)
57# pragma intrinsic(__writecr0)
58# pragma intrinsic(__writecr3)
59# pragma intrinsic(__writecr4)
60# pragma intrinsic(_BitScanForward)
61# pragma intrinsic(_BitScanReverse)
62# pragma intrinsic(_bittest)
63# pragma intrinsic(_bittestandset)
64# pragma intrinsic(_bittestandreset)
65# pragma intrinsic(_bittestandcomplement)
66# pragma intrinsic(_byteswap_ushort)
67# pragma intrinsic(_byteswap_ulong)
68# pragma intrinsic(_interlockedbittestandset)
69# pragma intrinsic(_interlockedbittestandreset)
70# pragma intrinsic(_InterlockedAnd)
71# pragma intrinsic(_InterlockedOr)
72# pragma intrinsic(_InterlockedIncrement)
73# pragma intrinsic(_InterlockedDecrement)
74# pragma intrinsic(_InterlockedExchange)
75# pragma intrinsic(_InterlockedCompareExchange)
76# pragma intrinsic(_InterlockedCompareExchange64)
77# ifdef __AMD64__
78# pragma intrinsic(__stosq)
79# pragma intrinsic(__readcr8)
80# pragma intrinsic(__writecr8)
81# pragma intrinsic(_byteswap_uint64)
82# pragma intrinsic(_InterlockedExchange64)
83# endif
84# endif
85#endif
86#ifndef RT_INLINE_ASM_USES_INTRIN
87# define RT_INLINE_ASM_USES_INTRIN 0
88#endif
89
90
91
92/** @defgroup grp_asm ASM - Assembly Routines
93 * @ingroup grp_rt
94 * @{
95 */
96
97/** @def RT_INLINE_ASM_EXTERNAL
98 * Defined as 1 if the compiler does not support inline assembly.
99 * The ASM* functions will then be implemented in an external .asm file.
100 *
101 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
102 * inline assmebly in their AMD64 compiler.
103 */
104#if defined(_MSC_VER) && defined(__AMD64__)
105# define RT_INLINE_ASM_EXTERNAL 1
106#else
107# define RT_INLINE_ASM_EXTERNAL 0
108#endif
109
110/** @def RT_INLINE_ASM_GNU_STYLE
111 * Defined as 1 if the compiler understand GNU style inline assembly.
112 */
113#if defined(_MSC_VER)
114# define RT_INLINE_ASM_GNU_STYLE 0
115#else
116# define RT_INLINE_ASM_GNU_STYLE 1
117#endif
118
119
120/** @todo find a more proper place for this structure? */
121#pragma pack(1)
122/** IDTR */
123typedef struct RTIDTR
124{
125 /** Size of the IDT. */
126 uint16_t cbIdt;
127 /** Address of the IDT. */
128 uintptr_t pIdt;
129} RTIDTR, *PRTIDTR;
130#pragma pack()
131
132#pragma pack(1)
133/** GDTR */
134typedef struct RTGDTR
135{
136 /** Size of the GDT. */
137 uint16_t cbGdt;
138 /** Address of the GDT. */
139 uintptr_t pGdt;
140} RTGDTR, *PRTGDTR;
141#pragma pack()
142
143
144/** @def ASMReturnAddress
145 * Gets the return address of the current (or calling if you like) function or method.
146 */
147#ifdef _MSC_VER
148# ifdef __cplusplus
149extern "C"
150# endif
151void * _ReturnAddress(void);
152# pragma intrinsic(_ReturnAddress)
153# define ASMReturnAddress() _ReturnAddress()
154#elif defined(__GNUC__) || defined(__DOXYGEN__)
155# define ASMReturnAddress() __builtin_return_address(0)
156#else
157# error "Unsupported compiler."
158#endif
159
160
161/**
162 * Gets the content of the IDTR CPU register.
163 * @param pIdtr Where to store the IDTR contents.
164 */
165#if RT_INLINE_ASM_EXTERNAL
166DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
167#else
168DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
169{
170# if RT_INLINE_ASM_GNU_STYLE
171 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
172# else
173 __asm
174 {
175# ifdef __AMD64__
176 mov rax, [pIdtr]
177 sidt [rax]
178# else
179 mov eax, [pIdtr]
180 sidt [eax]
181# endif
182 }
183# endif
184}
185#endif
186
187
188/**
189 * Sets the content of the IDTR CPU register.
190 * @param pIdtr Where to load the IDTR contents from
191 */
192#if RT_INLINE_ASM_EXTERNAL
193DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
194#else
195DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
196{
197# if RT_INLINE_ASM_GNU_STYLE
198 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
199# else
200 __asm
201 {
202# ifdef __AMD64__
203 mov rax, [pIdtr]
204 lidt [rax]
205# else
206 mov eax, [pIdtr]
207 lidt [eax]
208# endif
209 }
210# endif
211}
212#endif
213
214
215/**
216 * Gets the content of the GDTR CPU register.
217 * @param pGdtr Where to store the GDTR contents.
218 */
219#if RT_INLINE_ASM_EXTERNAL
220DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
221#else
222DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
223{
224# if RT_INLINE_ASM_GNU_STYLE
225 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
226# else
227 __asm
228 {
229# ifdef __AMD64__
230 mov rax, [pGdtr]
231 sgdt [rax]
232# else
233 mov eax, [pGdtr]
234 sgdt [eax]
235# endif
236 }
237# endif
238}
239#endif
240
241/**
242 * Get the cs register.
243 * @returns cs.
244 */
245#if RT_INLINE_ASM_EXTERNAL
246DECLASM(RTSEL) ASMGetCS(void);
247#else
248DECLINLINE(RTSEL) ASMGetCS(void)
249{
250 RTSEL SelCS;
251# if RT_INLINE_ASM_GNU_STYLE
252 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
253# else
254 __asm
255 {
256 mov ax, cs
257 mov [SelCS], ax
258 }
259# endif
260 return SelCS;
261}
262#endif
263
264
265/**
266 * Get the DS register.
267 * @returns DS.
268 */
269#if RT_INLINE_ASM_EXTERNAL
270DECLASM(RTSEL) ASMGetDS(void);
271#else
272DECLINLINE(RTSEL) ASMGetDS(void)
273{
274 RTSEL SelDS;
275# if RT_INLINE_ASM_GNU_STYLE
276 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
277# else
278 __asm
279 {
280 mov ax, ds
281 mov [SelDS], ax
282 }
283# endif
284 return SelDS;
285}
286#endif
287
288
289/**
290 * Get the ES register.
291 * @returns ES.
292 */
293#if RT_INLINE_ASM_EXTERNAL
294DECLASM(RTSEL) ASMGetES(void);
295#else
296DECLINLINE(RTSEL) ASMGetES(void)
297{
298 RTSEL SelES;
299# if RT_INLINE_ASM_GNU_STYLE
300 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
301# else
302 __asm
303 {
304 mov ax, es
305 mov [SelES], ax
306 }
307# endif
308 return SelES;
309}
310#endif
311
312
313/**
314 * Get the FS register.
315 * @returns FS.
316 */
317#if RT_INLINE_ASM_EXTERNAL
318DECLASM(RTSEL) ASMGetFS(void);
319#else
320DECLINLINE(RTSEL) ASMGetFS(void)
321{
322 RTSEL SelFS;
323# if RT_INLINE_ASM_GNU_STYLE
324 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
325# else
326 __asm
327 {
328 mov ax, fs
329 mov [SelFS], ax
330 }
331# endif
332 return SelFS;
333}
334# endif
335
336
337/**
338 * Get the GS register.
339 * @returns GS.
340 */
341#if RT_INLINE_ASM_EXTERNAL
342DECLASM(RTSEL) ASMGetGS(void);
343#else
344DECLINLINE(RTSEL) ASMGetGS(void)
345{
346 RTSEL SelGS;
347# if RT_INLINE_ASM_GNU_STYLE
348 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
349# else
350 __asm
351 {
352 mov ax, gs
353 mov [SelGS], ax
354 }
355# endif
356 return SelGS;
357}
358#endif
359
360
361/**
362 * Get the SS register.
363 * @returns SS.
364 */
365#if RT_INLINE_ASM_EXTERNAL
366DECLASM(RTSEL) ASMGetSS(void);
367#else
368DECLINLINE(RTSEL) ASMGetSS(void)
369{
370 RTSEL SelSS;
371# if RT_INLINE_ASM_GNU_STYLE
372 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
373# else
374 __asm
375 {
376 mov ax, ss
377 mov [SelSS], ax
378 }
379# endif
380 return SelSS;
381}
382#endif
383
384
385/**
386 * Get the TR register.
387 * @returns TR.
388 */
389#if RT_INLINE_ASM_EXTERNAL
390DECLASM(RTSEL) ASMGetTR(void);
391#else
392DECLINLINE(RTSEL) ASMGetTR(void)
393{
394 RTSEL SelTR;
395# if RT_INLINE_ASM_GNU_STYLE
396 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
397# else
398 __asm
399 {
400 str ax
401 mov [SelTR], ax
402 }
403# endif
404 return SelTR;
405}
406#endif
407
408
409/**
410 * Get the [RE]FLAGS register.
411 * @returns [RE]FLAGS.
412 */
413#if RT_INLINE_ASM_EXTERNAL
414DECLASM(RTCCUINTREG) ASMGetFlags(void);
415#else
416DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
417{
418 RTCCUINTREG uFlags;
419# if RT_INLINE_ASM_GNU_STYLE
420# ifdef __AMD64__
421 __asm__ __volatile__("pushfq\n\t"
422 "popq %0\n\t"
423 : "=m" (uFlags));
424# else
425 __asm__ __volatile__("pushfl\n\t"
426 "popl %0\n\t"
427 : "=m" (uFlags));
428# endif
429# else
430 __asm
431 {
432# ifdef __AMD64__
433 pushfq
434 pop [uFlags]
435# else
436 pushfd
437 pop [uFlags]
438# endif
439 }
440# endif
441 return uFlags;
442}
443#endif
444
445
446/**
447 * Set the [RE]FLAGS register.
448 * @param uFlags The new [RE]FLAGS value.
449 */
450#if RT_INLINE_ASM_EXTERNAL
451DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
452#else
453DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
454{
455# if RT_INLINE_ASM_GNU_STYLE
456# ifdef __AMD64__
457 __asm__ __volatile__("pushq %0\n\t"
458 "popfq\n\t"
459 : : "m" (uFlags));
460# else
461 __asm__ __volatile__("pushl %0\n\t"
462 "popfl\n\t"
463 : : "m" (uFlags));
464# endif
465# else
466 __asm
467 {
468# ifdef __AMD64__
469 push [uFlags]
470 popfq
471# else
472 push [uFlags]
473 popfd
474# endif
475 }
476# endif
477}
478#endif
479
480
481/**
482 * Gets the content of the CPU timestamp counter register.
483 *
484 * @returns TSC.
485 */
486#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
487DECLASM(uint64_t) ASMReadTSC(void);
488#else
489DECLINLINE(uint64_t) ASMReadTSC(void)
490{
491 RTUINT64U u;
492# if RT_INLINE_ASM_GNU_STYLE
493 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
494# else
495# if RT_INLINE_ASM_USES_INTRIN
496 u.u = __rdtsc();
497# else
498 __asm
499 {
500 rdtsc
501 mov [u.s.Lo], eax
502 mov [u.s.Hi], edx
503 }
504# endif
505# endif
506 return u.u;
507}
508#endif
509
510
511/**
512 * Performs the cpuid instruction returning all registers.
513 *
514 * @param uOperator CPUID operation (eax).
515 * @param pvEAX Where to store eax.
516 * @param pvEBX Where to store ebx.
517 * @param pvECX Where to store ecx.
518 * @param pvEDX Where to store edx.
519 * @remark We're using void pointers to ease the use of special bitfield structures and such.
520 */
521#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
522DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
523#else
524DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
525{
526# if RT_INLINE_ASM_GNU_STYLE
527# ifdef __AMD64__
528 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
529 __asm__ ("cpuid\n\t"
530 : "=a" (uRAX),
531 "=b" (uRBX),
532 "=c" (uRCX),
533 "=d" (uRDX)
534 : "0" (uOperator));
535 *(uint32_t *)pvEAX = (uint32_t)uRAX;
536 *(uint32_t *)pvEBX = (uint32_t)uRBX;
537 *(uint32_t *)pvECX = (uint32_t)uRCX;
538 *(uint32_t *)pvEDX = (uint32_t)uRDX;
539# else
540 __asm__ ("xchgl %%ebx, %1\n\t"
541 "cpuid\n\t"
542 "xchgl %%ebx, %1\n\t"
543 : "=a" (*(uint32_t *)pvEAX),
544 "=r" (*(uint32_t *)pvEBX),
545 "=c" (*(uint32_t *)pvECX),
546 "=d" (*(uint32_t *)pvEDX)
547 : "0" (uOperator));
548# endif
549
550# elif RT_INLINE_ASM_USES_INTRIN
551 int aInfo[4];
552 __cpuid(aInfo, uOperator);
553 *(uint32_t *)pvEAX = aInfo[0];
554 *(uint32_t *)pvEBX = aInfo[1];
555 *(uint32_t *)pvECX = aInfo[2];
556 *(uint32_t *)pvEDX = aInfo[3];
557
558# else
559 uint32_t uEAX;
560 uint32_t uEBX;
561 uint32_t uECX;
562 uint32_t uEDX;
563 __asm
564 {
565 push ebx
566 mov eax, [uOperator]
567 cpuid
568 mov [uEAX], eax
569 mov [uEBX], ebx
570 mov [uECX], ecx
571 mov [uEDX], edx
572 pop ebx
573 }
574 *(uint32_t *)pvEAX = uEAX;
575 *(uint32_t *)pvEBX = uEBX;
576 *(uint32_t *)pvECX = uECX;
577 *(uint32_t *)pvEDX = uEDX;
578# endif
579}
580#endif
581
582
583/**
584 * Performs the cpuid instruction returning ecx and edx.
585 *
586 * @param uOperator CPUID operation (eax).
587 * @param pvECX Where to store ecx.
588 * @param pvEDX Where to store edx.
589 * @remark We're using void pointers to ease the use of special bitfield structures and such.
590 */
591#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
592DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
593#else
594DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
595{
596 uint32_t uEBX;
597 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
598}
599#endif
600
601
602/**
603 * Performs the cpuid instruction returning edx.
604 *
605 * @param uOperator CPUID operation (eax).
606 * @returns EDX after cpuid operation.
607 */
608#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
609DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
610#else
611DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
612{
613 RTCCUINTREG xDX;
614# if RT_INLINE_ASM_GNU_STYLE
615# ifdef __AMD64__
616 RTCCUINTREG uSpill;
617 __asm__ ("cpuid"
618 : "=a" (uSpill),
619 "=d" (xDX)
620 : "0" (uOperator)
621 : "rbx", "rcx");
622# elif (defined(PIC) || defined(__DARWIN__)) && defined(__i386__) /* darwin: PIC by default. */
623 __asm__ ("push %%ebx\n\t"
624 "cpuid\n\t"
625 "pop %%ebx\n\t"
626 : "=a" (uOperator),
627 "=d" (xDX)
628 : "0" (uOperator)
629 : "ecx");
630# else
631 __asm__ ("cpuid"
632 : "=a" (uOperator),
633 "=d" (xDX)
634 : "0" (uOperator)
635 : "ebx", "ecx");
636# endif
637
638# elif RT_INLINE_ASM_USES_INTRIN
639 int aInfo[4];
640 __cpuid(aInfo, uOperator);
641 xDX = aInfo[3];
642
643# else
644 __asm
645 {
646 push ebx
647 mov eax, [uOperator]
648 cpuid
649 mov [xDX], edx
650 pop ebx
651 }
652# endif
653 return (uint32_t)xDX;
654}
655#endif
656
657
658/**
659 * Performs the cpuid instruction returning ecx.
660 *
661 * @param uOperator CPUID operation (eax).
662 * @returns ECX after cpuid operation.
663 */
664#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
665DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
666#else
667DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
668{
669 RTCCUINTREG xCX;
670# if RT_INLINE_ASM_GNU_STYLE
671# ifdef __AMD64__
672 RTCCUINTREG uSpill;
673 __asm__ ("cpuid"
674 : "=a" (uSpill),
675 "=c" (xCX)
676 : "0" (uOperator)
677 : "rbx", "rdx");
678# elif (defined(PIC) || defined(__DARWIN__)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
679 __asm__ ("push %%ebx\n\t"
680 "cpuid\n\t"
681 "pop %%ebx\n\t"
682 : "=a" (uOperator),
683 "=c" (xCX)
684 : "0" (uOperator)
685 : "edx");
686# else
687 __asm__ ("cpuid"
688 : "=a" (uOperator),
689 "=c" (xCX)
690 : "0" (uOperator)
691 : "ebx", "edx");
692
693# endif
694
695# elif RT_INLINE_ASM_USES_INTRIN
696 int aInfo[4];
697 __cpuid(aInfo, uOperator);
698 xCX = aInfo[2];
699
700# else
701 __asm
702 {
703 push ebx
704 mov eax, [uOperator]
705 cpuid
706 mov [xCX], ecx
707 pop ebx
708 }
709# endif
710 return (uint32_t)xCX;
711}
712#endif
713
714
715/**
716 * Checks if the current CPU supports CPUID.
717 *
718 * @returns true if CPUID is supported.
719 */
720DECLINLINE(bool) ASMHasCpuId(void)
721{
722#ifdef __AMD64__
723 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
724#else /* !__AMD64__ */
725 bool fRet = false;
726# if RT_INLINE_ASM_GNU_STYLE
727 uint32_t u1;
728 uint32_t u2;
729 __asm__ ("pushf\n\t"
730 "pop %1\n\t"
731 "mov %1, %2\n\t"
732 "xorl $0x200000, %1\n\t"
733 "push %1\n\t"
734 "popf\n\t"
735 "pushf\n\t"
736 "pop %1\n\t"
737 "cmpl %1, %2\n\t"
738 "setne %0\n\t"
739 "push %2\n\t"
740 "popf\n\t"
741 : "=m" (fRet), "=r" (u1), "=r" (u2));
742# else
743 __asm
744 {
745 pushfd
746 pop eax
747 mov ebx, eax
748 xor eax, 0200000h
749 push eax
750 popfd
751 pushfd
752 pop eax
753 cmp eax, ebx
754 setne fRet
755 push ebx
756 popfd
757 }
758# endif
759 return fRet;
760#endif /* !__AMD64__ */
761}
762
763
764/**
765 * Gets the APIC ID of the current CPU.
766 *
767 * @returns the APIC ID.
768 */
769#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
770DECLASM(uint8_t) ASMGetApicId(void);
771#else
772DECLINLINE(uint8_t) ASMGetApicId(void)
773{
774 RTCCUINTREG xBX;
775# if RT_INLINE_ASM_GNU_STYLE
776# ifdef __AMD64__
777 RTCCUINTREG uSpill;
778 __asm__ ("cpuid"
779 : "=a" (uSpill),
780 "=b" (xBX)
781 : "0" (1)
782 : "rcx", "rdx");
783# elif (defined(PIC) || defined(__DARWIN__)) && defined(__i386__)
784 RTCCUINTREG uSpill;
785 __asm__ ("mov %%ebx,%1\n\t"
786 "cpuid\n\t"
787 "xchgl %%ebx,%1\n\t"
788 : "=a" (uSpill),
789 "=r" (xBX)
790 : "0" (1)
791 : "ecx", "edx");
792# else
793 RTCCUINTREG uSpill;
794 __asm__ ("cpuid"
795 : "=a" (uSpill),
796 "=b" (xBX)
797 : "0" (1)
798 : "ecx", "edx");
799# endif
800
801# elif RT_INLINE_ASM_USES_INTRIN
802 int aInfo[4];
803 __cpuid(aInfo, 1);
804 xBX = aInfo[1];
805
806# else
807 __asm
808 {
809 push ebx
810 mov eax, 1
811 cpuid
812 mov [xBX], ebx
813 pop ebx
814 }
815# endif
816 return (uint8_t)(xBX >> 24);
817}
818#endif
819
820/**
821 * Get cr0.
822 * @returns cr0.
823 */
824#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
825DECLASM(RTCCUINTREG) ASMGetCR0(void);
826#else
827DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
828{
829 RTCCUINTREG uCR0;
830# if RT_INLINE_ASM_USES_INTRIN
831 uCR0 = __readcr0();
832
833# elif RT_INLINE_ASM_GNU_STYLE
834# ifdef __AMD64__
835 __asm__ ("movq %%cr0, %0\t\n" : "=r" (uCR0));
836# else
837 __asm__ ("movl %%cr0, %0\t\n" : "=r" (uCR0));
838# endif
839# else
840 __asm
841 {
842# ifdef __AMD64__
843 mov rax, cr0
844 mov [uCR0], rax
845# else
846 mov eax, cr0
847 mov [uCR0], eax
848# endif
849 }
850# endif
851 return uCR0;
852}
853#endif
854
855
856/**
857 * Sets the CR0 register.
858 * @param uCR0 The new CR0 value.
859 */
860#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
861DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
862#else
863DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
864{
865# if RT_INLINE_ASM_USES_INTRIN
866 __writecr0(uCR0);
867
868# elif RT_INLINE_ASM_GNU_STYLE
869# ifdef __AMD64__
870 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
871# else
872 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
873# endif
874# else
875 __asm
876 {
877# ifdef __AMD64__
878 mov rax, [uCR0]
879 mov cr0, rax
880# else
881 mov eax, [uCR0]
882 mov cr0, eax
883# endif
884 }
885# endif
886}
887#endif
888
889
890/**
891 * Get cr2.
892 * @returns cr2.
893 */
894#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
895DECLASM(RTCCUINTREG) ASMGetCR2(void);
896#else
897DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
898{
899 RTCCUINTREG uCR2;
900# if RT_INLINE_ASM_USES_INTRIN
901 uCR2 = __readcr2();
902
903# elif RT_INLINE_ASM_GNU_STYLE
904# ifdef __AMD64__
905 __asm__ ("movq %%cr2, %0\t\n" : "=r" (uCR2));
906# else
907 __asm__ ("movl %%cr2, %0\t\n" : "=r" (uCR2));
908# endif
909# else
910 __asm
911 {
912# ifdef __AMD64__
913 mov rax, cr2
914 mov [uCR2], rax
915# else
916 mov eax, cr2
917 mov [uCR2], eax
918# endif
919 }
920# endif
921 return uCR2;
922}
923#endif
924
925
926/**
927 * Sets the CR2 register.
928 * @param uCR2 The new CR0 value.
929 */
930#if RT_INLINE_ASM_EXTERNAL
931DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
932#else
933DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
934{
935# if RT_INLINE_ASM_GNU_STYLE
936# ifdef __AMD64__
937 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
938# else
939 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
940# endif
941# else
942 __asm
943 {
944# ifdef __AMD64__
945 mov rax, [uCR2]
946 mov cr2, rax
947# else
948 mov eax, [uCR2]
949 mov cr2, eax
950# endif
951 }
952# endif
953}
954#endif
955
956
957/**
958 * Get cr3.
959 * @returns cr3.
960 */
961#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
962DECLASM(RTCCUINTREG) ASMGetCR3(void);
963#else
964DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
965{
966 RTCCUINTREG uCR3;
967# if RT_INLINE_ASM_USES_INTRIN
968 uCR3 = __readcr3();
969
970# elif RT_INLINE_ASM_GNU_STYLE
971# ifdef __AMD64__
972 __asm__ ("movq %%cr3, %0\t\n" : "=r" (uCR3));
973# else
974 __asm__ ("movl %%cr3, %0\t\n" : "=r" (uCR3));
975# endif
976# else
977 __asm
978 {
979# ifdef __AMD64__
980 mov rax, cr3
981 mov [uCR3], rax
982# else
983 mov eax, cr3
984 mov [uCR3], eax
985# endif
986 }
987# endif
988 return uCR3;
989}
990#endif
991
992
993/**
994 * Sets the CR3 register.
995 *
996 * @param uCR3 New CR3 value.
997 */
998#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
999DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1000#else
1001DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1002{
1003# if RT_INLINE_ASM_USES_INTRIN
1004 __writecr3(uCR3);
1005
1006# elif RT_INLINE_ASM_GNU_STYLE
1007# ifdef __AMD64__
1008 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1009# else
1010 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1011# endif
1012# else
1013 __asm
1014 {
1015# ifdef __AMD64__
1016 mov rax, [uCR3]
1017 mov cr3, rax
1018# else
1019 mov eax, [uCR3]
1020 mov cr3, eax
1021# endif
1022 }
1023# endif
1024}
1025#endif
1026
1027
1028/**
1029 * Reloads the CR3 register.
1030 */
1031#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1032DECLASM(void) ASMReloadCR3(void);
1033#else
1034DECLINLINE(void) ASMReloadCR3(void)
1035{
1036# if RT_INLINE_ASM_USES_INTRIN
1037 __writecr3(__readcr3());
1038
1039# elif RT_INLINE_ASM_GNU_STYLE
1040 RTCCUINTREG u;
1041# ifdef __AMD64__
1042 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1043 "movq %0, %%cr3\n\t"
1044 : "=r" (u));
1045# else
1046 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1047 "movl %0, %%cr3\n\t"
1048 : "=r" (u));
1049# endif
1050# else
1051 __asm
1052 {
1053# ifdef __AMD64__
1054 mov rax, cr3
1055 mov cr3, rax
1056# else
1057 mov eax, cr3
1058 mov cr3, eax
1059# endif
1060 }
1061# endif
1062}
1063#endif
1064
1065
1066/**
1067 * Get cr4.
1068 * @returns cr4.
1069 */
1070#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1071DECLASM(RTCCUINTREG) ASMGetCR4(void);
1072#else
1073DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1074{
1075 RTCCUINTREG uCR4;
1076# if RT_INLINE_ASM_USES_INTRIN
1077 uCR4 = __readcr4();
1078
1079# elif RT_INLINE_ASM_GNU_STYLE
1080# ifdef __AMD64__
1081 __asm__ ("movq %%cr4, %0\t\n" : "=r" (uCR4));
1082# else
1083 __asm__ ("movl %%cr4, %0\t\n" : "=r" (uCR4));
1084# endif
1085# else
1086 __asm
1087 {
1088# ifdef __AMD64__
1089 mov rax, cr4
1090 mov [uCR4], rax
1091# else
1092 push eax /* just in case */
1093 /*mov eax, cr4*/
1094 _emit 0x0f
1095 _emit 0x20
1096 _emit 0xe0
1097 mov [uCR4], eax
1098 pop eax
1099# endif
1100 }
1101# endif
1102 return uCR4;
1103}
1104#endif
1105
1106
1107/**
1108 * Sets the CR4 register.
1109 *
1110 * @param uCR4 New CR4 value.
1111 */
1112#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1113DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1114#else
1115DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1116{
1117# if RT_INLINE_ASM_USES_INTRIN
1118 __writecr4(uCR4);
1119
1120# elif RT_INLINE_ASM_GNU_STYLE
1121# ifdef __AMD64__
1122 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1123# else
1124 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1125# endif
1126# else
1127 __asm
1128 {
1129# ifdef __AMD64__
1130 mov rax, [uCR4]
1131 mov cr4, rax
1132# else
1133 mov eax, [uCR4]
1134 _emit 0x0F
1135 _emit 0x22
1136 _emit 0xE0 /* mov cr4, eax */
1137# endif
1138 }
1139# endif
1140}
1141#endif
1142
1143
1144/**
1145 * Get cr8.
1146 * @returns cr8.
1147 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1148 */
1149#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1150DECLASM(RTCCUINTREG) ASMGetCR8(void);
1151#else
1152DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1153{
1154# ifdef __AMD64__
1155 RTCCUINTREG uCR8;
1156# if RT_INLINE_ASM_USES_INTRIN
1157 uCR8 = __readcr8();
1158
1159# elif RT_INLINE_ASM_GNU_STYLE
1160 __asm__ ("movq %%cr8, %0\t\n" : "=r" (uCR8));
1161# else
1162 __asm
1163 {
1164 mov rax, cr8
1165 mov [uCR8], rax
1166 }
1167# endif
1168 return uCR8;
1169# else /* !__AMD64__ */
1170 return 0;
1171# endif /* !__AMD64__ */
1172}
1173#endif
1174
1175
1176/**
1177 * Enables interrupts (EFLAGS.IF).
1178 */
1179#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1180DECLASM(void) ASMIntEnable(void);
1181#else
1182DECLINLINE(void) ASMIntEnable(void)
1183{
1184# if RT_INLINE_ASM_GNU_STYLE
1185 __asm("sti\n");
1186# elif RT_INLINE_ASM_USES_INTRIN
1187 _enable();
1188# else
1189 __asm sti
1190# endif
1191}
1192#endif
1193
1194
1195/**
1196 * Disables interrupts (!EFLAGS.IF).
1197 */
1198#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1199DECLASM(void) ASMIntDisable(void);
1200#else
1201DECLINLINE(void) ASMIntDisable(void)
1202{
1203# if RT_INLINE_ASM_GNU_STYLE
1204 __asm("cli\n");
1205# elif RT_INLINE_ASM_USES_INTRIN
1206 _disable();
1207# else
1208 __asm cli
1209# endif
1210}
1211#endif
1212
1213
1214/**
1215 * Disables interrupts and returns previous xFLAGS.
1216 */
1217#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1218DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1219#else
1220DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1221{
1222 RTCCUINTREG xFlags;
1223# if RT_INLINE_ASM_GNU_STYLE
1224# ifdef __AMD64__
1225 __asm__ __volatile__("pushfq\n\t"
1226 "cli\n\t"
1227 "popq %0\n\t"
1228 : "=m" (xFlags));
1229# else
1230 __asm__ __volatile__("pushfl\n\t"
1231 "cli\n\t"
1232 "popl %0\n\t"
1233 : "=m" (xFlags));
1234# endif
1235# elif RT_INLINE_ASM_USES_INTRIN && !defined(__X86__)
1236 xFlags = ASMGetFlags();
1237 _disable();
1238# else
1239 __asm {
1240 pushfd
1241 cli
1242 pop [xFlags]
1243 }
1244# endif
1245 return xFlags;
1246}
1247#endif
1248
1249
1250/**
1251 * Reads a machine specific register.
1252 *
1253 * @returns Register content.
1254 * @param uRegister Register to read.
1255 */
1256#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1257DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1258#else
1259DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1260{
1261 RTUINT64U u;
1262# if RT_INLINE_ASM_GNU_STYLE
1263 __asm__ ("rdmsr\n\t"
1264 : "=a" (u.s.Lo),
1265 "=d" (u.s.Hi)
1266 : "c" (uRegister));
1267
1268# elif RT_INLINE_ASM_USES_INTRIN
1269 u.u = __readmsr(uRegister);
1270
1271# else
1272 __asm
1273 {
1274 mov ecx, [uRegister]
1275 rdmsr
1276 mov [u.s.Lo], eax
1277 mov [u.s.Hi], edx
1278 }
1279# endif
1280
1281 return u.u;
1282}
1283#endif
1284
1285
1286/**
1287 * Writes a machine specific register.
1288 *
1289 * @returns Register content.
1290 * @param uRegister Register to write to.
1291 * @param u64Val Value to write.
1292 */
1293#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1294DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1295#else
1296DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1297{
1298 RTUINT64U u;
1299
1300 u.u = u64Val;
1301# if RT_INLINE_ASM_GNU_STYLE
1302 __asm__ __volatile__("wrmsr\n\t"
1303 ::"a" (u.s.Lo),
1304 "d" (u.s.Hi),
1305 "c" (uRegister));
1306
1307# elif RT_INLINE_ASM_USES_INTRIN
1308 __writemsr(uRegister, u.u);
1309
1310# else
1311 __asm
1312 {
1313 mov ecx, [uRegister]
1314 mov edx, [u.s.Hi]
1315 mov eax, [u.s.Lo]
1316 wrmsr
1317 }
1318# endif
1319}
1320#endif
1321
1322
1323/**
1324 * Reads low part of a machine specific register.
1325 *
1326 * @returns Register content.
1327 * @param uRegister Register to read.
1328 */
1329#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1330DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1331#else
1332DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1333{
1334 uint32_t u32;
1335# if RT_INLINE_ASM_GNU_STYLE
1336 __asm__ ("rdmsr\n\t"
1337 : "=a" (u32)
1338 : "c" (uRegister)
1339 : "edx");
1340
1341# elif RT_INLINE_ASM_USES_INTRIN
1342 u32 = (uint32_t)__readmsr(uRegister);
1343
1344#else
1345 __asm
1346 {
1347 mov ecx, [uRegister]
1348 rdmsr
1349 mov [u32], eax
1350 }
1351# endif
1352
1353 return u32;
1354}
1355#endif
1356
1357
1358/**
1359 * Reads high part of a machine specific register.
1360 *
1361 * @returns Register content.
1362 * @param uRegister Register to read.
1363 */
1364#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1365DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1366#else
1367DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1368{
1369 uint32_t u32;
1370# if RT_INLINE_ASM_GNU_STYLE
1371 __asm__ ("rdmsr\n\t"
1372 : "=d" (u32)
1373 : "c" (uRegister)
1374 : "eax");
1375
1376# elif RT_INLINE_ASM_USES_INTRIN
1377 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1378
1379# else
1380 __asm
1381 {
1382 mov ecx, [uRegister]
1383 rdmsr
1384 mov [u32], edx
1385 }
1386# endif
1387
1388 return u32;
1389}
1390#endif
1391
1392
1393/**
1394 * Gets dr7.
1395 *
1396 * @returns dr7.
1397 */
1398#if RT_INLINE_ASM_EXTERNAL
1399DECLASM(RTCCUINTREG) ASMGetDR7(void);
1400#else
1401DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1402{
1403 RTCCUINTREG uDR7;
1404# if RT_INLINE_ASM_GNU_STYLE
1405# ifdef __AMD64__
1406 __asm__ ("movq %%dr7, %0\n\t" : "=r" (uDR7));
1407# else
1408 __asm__ ("movl %%dr7, %0\n\t" : "=r" (uDR7));
1409# endif
1410# else
1411 __asm
1412 {
1413# ifdef __AMD64__
1414 mov rax, dr7
1415 mov [uDR7], rax
1416# else
1417 mov eax, dr7
1418 mov [uDR7], eax
1419# endif
1420 }
1421# endif
1422 return uDR7;
1423}
1424#endif
1425
1426
1427/**
1428 * Gets dr6.
1429 *
1430 * @returns dr6.
1431 */
1432#if RT_INLINE_ASM_EXTERNAL
1433DECLASM(RTCCUINTREG) ASMGetDR6(void);
1434#else
1435DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1436{
1437 RTCCUINTREG uDR6;
1438# if RT_INLINE_ASM_GNU_STYLE
1439# ifdef __AMD64__
1440 __asm__ ("movq %%dr6, %0\n\t" : "=r" (uDR6));
1441# else
1442 __asm__ ("movl %%dr6, %0\n\t" : "=r" (uDR6));
1443# endif
1444# else
1445 __asm
1446 {
1447# ifdef __AMD64__
1448 mov rax, dr6
1449 mov [uDR6], rax
1450# else
1451 mov eax, dr6
1452 mov [uDR6], eax
1453# endif
1454 }
1455# endif
1456 return uDR6;
1457}
1458#endif
1459
1460
1461/**
1462 * Reads and clears DR6.
1463 *
1464 * @returns DR6.
1465 */
1466#if RT_INLINE_ASM_EXTERNAL
1467DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1468#else
1469DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1470{
1471 RTCCUINTREG uDR6;
1472# if RT_INLINE_ASM_GNU_STYLE
1473 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1474# ifdef __AMD64__
1475 __asm__ ("movq %%dr6, %0\n\t"
1476 "movq %1, %%dr6\n\t"
1477 : "=r" (uDR6)
1478 : "r" (uNewValue));
1479# else
1480 __asm__ ("movl %%dr6, %0\n\t"
1481 "movl %1, %%dr6\n\t"
1482 : "=r" (uDR6)
1483 : "r" (uNewValue));
1484# endif
1485# else
1486 __asm
1487 {
1488# ifdef __AMD64__
1489 mov rax, dr6
1490 mov [uDR6], rax
1491 mov rcx, rax
1492 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1493 mov dr6, rcx
1494# else
1495 mov eax, dr6
1496 mov [uDR6], eax
1497 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1498 mov dr6, ecx
1499# endif
1500 }
1501# endif
1502 return uDR6;
1503}
1504#endif
1505
1506
1507/** @deprecated */
1508#define ASMOutB(p, b) ASMOutU8(p,b)
1509/** @deprecated */
1510#define ASMInB(p) ASMInU8(p)
1511
1512/**
1513 * Writes a 8-bit unsigned integer to an I/O port.
1514 *
1515 * @param Port I/O port to read from.
1516 * @param u8 8-bit integer to write.
1517 */
1518#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1519DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1520#else
1521DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1522{
1523# if RT_INLINE_ASM_GNU_STYLE
1524 __asm__ __volatile__("outb %b1, %w0\n\t"
1525 :: "Nd" (Port),
1526 "a" (u8));
1527
1528# elif RT_INLINE_ASM_USES_INTRIN
1529 __outbyte(Port, u8);
1530
1531# else
1532 __asm
1533 {
1534 mov dx, [Port]
1535 mov al, [u8]
1536 out dx, al
1537 }
1538# endif
1539}
1540#endif
1541
1542
1543/**
1544 * Gets a 8-bit unsigned integer from an I/O port.
1545 *
1546 * @returns 8-bit integer.
1547 * @param Port I/O port to read from.
1548 */
1549#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1550DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1551#else
1552DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1553{
1554 uint8_t u8;
1555# if RT_INLINE_ASM_GNU_STYLE
1556 __asm__ __volatile__("inb %w1, %b0\n\t"
1557 : "=a" (u8)
1558 : "Nd" (Port));
1559
1560# elif RT_INLINE_ASM_USES_INTRIN
1561 u8 = __inbyte(Port);
1562
1563# else
1564 __asm
1565 {
1566 mov dx, [Port]
1567 in al, dx
1568 mov [u8], al
1569 }
1570# endif
1571 return u8;
1572}
1573#endif
1574
1575
1576/**
1577 * Writes a 16-bit unsigned integer to an I/O port.
1578 *
1579 * @param Port I/O port to read from.
1580 * @param u16 16-bit integer to write.
1581 */
1582#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1583DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1584#else
1585DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1586{
1587# if RT_INLINE_ASM_GNU_STYLE
1588 __asm__ __volatile__("outw %w1, %w0\n\t"
1589 :: "Nd" (Port),
1590 "a" (u16));
1591
1592# elif RT_INLINE_ASM_USES_INTRIN
1593 __outword(Port, u16);
1594
1595# else
1596 __asm
1597 {
1598 mov dx, [Port]
1599 mov ax, [u16]
1600 out dx, ax
1601 }
1602# endif
1603}
1604#endif
1605
1606
1607/**
1608 * Gets a 16-bit unsigned integer from an I/O port.
1609 *
1610 * @returns 16-bit integer.
1611 * @param Port I/O port to read from.
1612 */
1613#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1614DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1615#else
1616DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1617{
1618 uint16_t u16;
1619# if RT_INLINE_ASM_GNU_STYLE
1620 __asm__ __volatile__("inw %w1, %w0\n\t"
1621 : "=a" (u16)
1622 : "Nd" (Port));
1623
1624# elif RT_INLINE_ASM_USES_INTRIN
1625 u16 = __inword(Port);
1626
1627# else
1628 __asm
1629 {
1630 mov dx, [Port]
1631 in ax, dx
1632 mov [u16], ax
1633 }
1634# endif
1635 return u16;
1636}
1637#endif
1638
1639
1640/**
1641 * Writes a 32-bit unsigned integer to an I/O port.
1642 *
1643 * @param Port I/O port to read from.
1644 * @param u32 32-bit integer to write.
1645 */
1646#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1647DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1648#else
1649DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1650{
1651# if RT_INLINE_ASM_GNU_STYLE
1652 __asm__ __volatile__("outl %1, %w0\n\t"
1653 :: "Nd" (Port),
1654 "a" (u32));
1655
1656# elif RT_INLINE_ASM_USES_INTRIN
1657 __outdword(Port, u32);
1658
1659# else
1660 __asm
1661 {
1662 mov dx, [Port]
1663 mov eax, [u32]
1664 out dx, eax
1665 }
1666# endif
1667}
1668#endif
1669
1670
1671/**
1672 * Gets a 32-bit unsigned integer from an I/O port.
1673 *
1674 * @returns 32-bit integer.
1675 * @param Port I/O port to read from.
1676 */
1677#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1678DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1679#else
1680DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1681{
1682 uint32_t u32;
1683# if RT_INLINE_ASM_GNU_STYLE
1684 __asm__ __volatile__("inl %w1, %0\n\t"
1685 : "=a" (u32)
1686 : "Nd" (Port));
1687
1688# elif RT_INLINE_ASM_USES_INTRIN
1689 u32 = __indword(Port);
1690
1691# else
1692 __asm
1693 {
1694 mov dx, [Port]
1695 in eax, dx
1696 mov [u32], eax
1697 }
1698# endif
1699 return u32;
1700}
1701#endif
1702
1703
1704/**
1705 * Atomically Exchange an unsigned 8-bit value.
1706 *
1707 * @returns Current *pu8 value
1708 * @param pu8 Pointer to the 8-bit variable to update.
1709 * @param u8 The 8-bit value to assign to *pu8.
1710 */
1711#if RT_INLINE_ASM_EXTERNAL
1712DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1713#else
1714DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1715{
1716# if RT_INLINE_ASM_GNU_STYLE
1717 __asm__ __volatile__("xchgb %0, %1\n\t"
1718 : "=m" (*pu8),
1719 "=r" (u8)
1720 : "1" (u8));
1721# else
1722 __asm
1723 {
1724# ifdef __AMD64__
1725 mov rdx, [pu8]
1726 mov al, [u8]
1727 xchg [rdx], al
1728 mov [u8], al
1729# else
1730 mov edx, [pu8]
1731 mov al, [u8]
1732 xchg [edx], al
1733 mov [u8], al
1734# endif
1735 }
1736# endif
1737 return u8;
1738}
1739#endif
1740
1741
1742/**
1743 * Atomically Exchange a signed 8-bit value.
1744 *
1745 * @returns Current *pu8 value
1746 * @param pi8 Pointer to the 8-bit variable to update.
1747 * @param i8 The 8-bit value to assign to *pi8.
1748 */
1749DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1750{
1751 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1752}
1753
1754
1755/**
1756 * Atomically Exchange an unsigned 16-bit value.
1757 *
1758 * @returns Current *pu16 value
1759 * @param pu16 Pointer to the 16-bit variable to update.
1760 * @param u16 The 16-bit value to assign to *pu16.
1761 */
1762#if RT_INLINE_ASM_EXTERNAL
1763DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
1764#else
1765DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
1766{
1767# if RT_INLINE_ASM_GNU_STYLE
1768 __asm__ __volatile__("xchgw %0, %1\n\t"
1769 : "=m" (*pu16),
1770 "=r" (u16)
1771 : "1" (u16));
1772# else
1773 __asm
1774 {
1775# ifdef __AMD64__
1776 mov rdx, [pu16]
1777 mov ax, [u16]
1778 xchg [rdx], ax
1779 mov [u16], ax
1780# else
1781 mov edx, [pu16]
1782 mov ax, [u16]
1783 xchg [edx], ax
1784 mov [u16], ax
1785# endif
1786 }
1787# endif
1788 return u16;
1789}
1790#endif
1791
1792
1793/**
1794 * Atomically Exchange a signed 16-bit value.
1795 *
1796 * @returns Current *pu16 value
1797 * @param pi16 Pointer to the 16-bit variable to update.
1798 * @param i16 The 16-bit value to assign to *pi16.
1799 */
1800DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
1801{
1802 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
1803}
1804
1805
1806/**
1807 * Atomically Exchange an unsigned 32-bit value.
1808 *
1809 * @returns Current *pu32 value
1810 * @param pu32 Pointer to the 32-bit variable to update.
1811 * @param u32 The 32-bit value to assign to *pu32.
1812 */
1813#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1814DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
1815#else
1816DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
1817{
1818# if RT_INLINE_ASM_GNU_STYLE
1819 __asm__ __volatile__("xchgl %0, %1\n\t"
1820 : "=m" (*pu32),
1821 "=r" (u32)
1822 : "1" (u32));
1823
1824# elif RT_INLINE_ASM_USES_INTRIN
1825 u32 = _InterlockedExchange((long *)pu32, u32);
1826
1827# else
1828 __asm
1829 {
1830# ifdef __AMD64__
1831 mov rdx, [pu32]
1832 mov eax, u32
1833 xchg [rdx], eax
1834 mov [u32], eax
1835# else
1836 mov edx, [pu32]
1837 mov eax, u32
1838 xchg [edx], eax
1839 mov [u32], eax
1840# endif
1841 }
1842# endif
1843 return u32;
1844}
1845#endif
1846
1847
1848/**
1849 * Atomically Exchange a signed 32-bit value.
1850 *
1851 * @returns Current *pu32 value
1852 * @param pi32 Pointer to the 32-bit variable to update.
1853 * @param i32 The 32-bit value to assign to *pi32.
1854 */
1855DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
1856{
1857 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
1858}
1859
1860
1861/**
1862 * Atomically Exchange an unsigned 64-bit value.
1863 *
1864 * @returns Current *pu64 value
1865 * @param pu64 Pointer to the 64-bit variable to update.
1866 * @param u64 The 64-bit value to assign to *pu64.
1867 */
1868#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1869DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
1870#else
1871DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
1872{
1873# if defined(__AMD64__)
1874# if RT_INLINE_ASM_USES_INTRIN
1875 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
1876
1877# elif RT_INLINE_ASM_GNU_STYLE
1878 __asm__ __volatile__("xchgq %0, %1\n\t"
1879 : "=m" (*pu64),
1880 "=r" (u64)
1881 : "1" (u64));
1882# else
1883 __asm
1884 {
1885 mov rdx, [pu64]
1886 mov rax, [u64]
1887 xchg [rdx], rax
1888 mov [u64], rax
1889 }
1890# endif
1891# else /* !__AMD64__ */
1892# if RT_INLINE_ASM_GNU_STYLE
1893# if defined(PIC) || defined(__DARWIN__) /* darwin: 4.0.1 compiler option / bug? */
1894 uint32_t u32 = (uint32_t)u64;
1895 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
1896 "xchgl %%ebx, %3\n\t"
1897 "1:\n\t"
1898 "lock; cmpxchg8b (%5)\n\t"
1899 "jnz 1b\n\t"
1900 "xchgl %%ebx, %3\n\t"
1901 /*"xchgl %%esi, %5\n\t"*/
1902 : "=A" (u64),
1903 "=m" (*pu64)
1904 : "0" (*pu64),
1905 "m" ( u32 ),
1906 "c" ( (uint32_t)(u64 >> 32) ),
1907 "S" (pu64) );
1908# else /* !PIC */
1909 __asm__ __volatile__("1:\n\t"
1910 "lock; cmpxchg8b %1\n\t"
1911 "jnz 1b\n\t"
1912 : "=A" (u64),
1913 "=m" (*pu64)
1914 : "0" (*pu64),
1915 "b" ( (uint32_t)u64 ),
1916 "c" ( (uint32_t)(u64 >> 32) ));
1917# endif
1918# else
1919 __asm
1920 {
1921 mov ebx, dword ptr [u64]
1922 mov ecx, dword ptr [u64 + 4]
1923 mov edi, pu64
1924 mov eax, dword ptr [edi]
1925 mov edx, dword ptr [edi + 4]
1926 retry:
1927 lock cmpxchg8b [edi]
1928 jnz retry
1929 mov dword ptr [u64], eax
1930 mov dword ptr [u64 + 4], edx
1931 }
1932# endif
1933# endif /* !__AMD64__ */
1934 return u64;
1935}
1936#endif
1937
1938
1939/**
1940 * Atomically Exchange an signed 64-bit value.
1941 *
1942 * @returns Current *pi64 value
1943 * @param pi64 Pointer to the 64-bit variable to update.
1944 * @param i64 The 64-bit value to assign to *pi64.
1945 */
1946DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
1947{
1948 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
1949}
1950
1951
1952#ifdef __AMD64__
1953/**
1954 * Atomically Exchange an unsigned 128-bit value.
1955 *
1956 * @returns Current *pu128.
1957 * @param pu128 Pointer to the 128-bit variable to update.
1958 * @param u128 The 128-bit value to assign to *pu128.
1959 *
1960 * @remark We cannot really assume that any hardware supports this. Nor do I have
1961 * GAS support for it. So, for the time being we'll BREAK the atomic
1962 * bit of this function and use two 64-bit exchanges instead.
1963 */
1964# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
1965DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
1966# else
1967DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
1968{
1969 if (true)/*ASMCpuId_ECX(1) & BIT(13))*/
1970 {
1971 /** @todo this is clumsy code */
1972 RTUINT128U u128Ret;
1973 u128Ret.u = u128;
1974 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
1975 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
1976 return u128Ret.u;
1977 }
1978#if 0 /* later? */
1979 else
1980 {
1981# if RT_INLINE_ASM_GNU_STYLE
1982 __asm__ __volatile__("1:\n\t"
1983 "lock; cmpxchg8b %1\n\t"
1984 "jnz 1b\n\t"
1985 : "=A" (u128),
1986 "=m" (*pu128)
1987 : "0" (*pu128),
1988 "b" ( (uint64_t)u128 ),
1989 "c" ( (uint64_t)(u128 >> 64) ));
1990# else
1991 __asm
1992 {
1993 mov rbx, dword ptr [u128]
1994 mov rcx, dword ptr [u128 + 4]
1995 mov rdi, pu128
1996 mov rax, dword ptr [rdi]
1997 mov rdx, dword ptr [rdi + 4]
1998 retry:
1999 lock cmpxchg16b [rdi]
2000 jnz retry
2001 mov dword ptr [u128], rax
2002 mov dword ptr [u128 + 4], rdx
2003 }
2004# endif
2005 }
2006 return u128;
2007#endif
2008}
2009# endif
2010#endif /* __AMD64__ */
2011
2012
2013/**
2014 * Atomically Reads a unsigned 64-bit value.
2015 *
2016 * @returns Current *pu64 value
2017 * @param pu64 Pointer to the 64-bit variable to read.
2018 * The memory pointed to must be writable.
2019 * @remark This will fault if the memory is read-only!
2020 */
2021#if RT_INLINE_ASM_EXTERNAL
2022DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
2023#else
2024DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
2025{
2026 uint64_t u64;
2027# ifdef __AMD64__
2028# if RT_INLINE_ASM_GNU_STYLE
2029 __asm__ __volatile__("movq %1, %0\n\t"
2030 : "=r" (u64)
2031 : "m" (*pu64));
2032# else
2033 __asm
2034 {
2035 mov rdx, [pu64]
2036 mov rax, [rdx]
2037 mov [u64], rax
2038 }
2039# endif
2040# else /* !__AMD64__ */
2041# if RT_INLINE_ASM_GNU_STYLE
2042# if defined(PIC) || defined(__DARWIN__) /* darwin: 4.0.1 compiler option / bug? */
2043 uint32_t u32EBX = 0;
2044 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2045 "lock; cmpxchg8b (%5)\n\t"
2046 "xchgl %%ebx, %3\n\t"
2047 : "=A" (u64),
2048 "=m" (*pu64)
2049 : "0" (0),
2050 "m" (u32EBX),
2051 "c" (0),
2052 "S" (pu64));
2053# else /* !PIC */
2054 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2055 : "=A" (u64),
2056 "=m" (*pu64)
2057 : "0" (0),
2058 "b" (0),
2059 "c" (0));
2060# endif
2061# else
2062 __asm
2063 {
2064 xor eax, eax
2065 xor edx, edx
2066 mov edi, pu64
2067 xor ecx, ecx
2068 xor ebx, ebx
2069 lock cmpxchg8b [edi]
2070 mov dword ptr [u64], eax
2071 mov dword ptr [u64 + 4], edx
2072 }
2073# endif
2074# endif /* !__AMD64__ */
2075 return u64;
2076}
2077#endif
2078
2079
2080/**
2081 * Atomically Reads a signed 64-bit value.
2082 *
2083 * @returns Current *pi64 value
2084 * @param pi64 Pointer to the 64-bit variable to read.
2085 * The memory pointed to must be writable.
2086 * @remark This will fault if the memory is read-only!
2087 */
2088DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
2089{
2090 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
2091}
2092
2093
2094/**
2095 * Atomically Exchange a value which size might differ
2096 * between platforms or compilers.
2097 *
2098 * @param pu Pointer to the variable to update.
2099 * @param uNew The value to assign to *pu.
2100 */
2101#define ASMAtomicXchgSize(pu, uNew) \
2102 do { \
2103 switch (sizeof(*(pu))) { \
2104 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2105 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2106 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2107 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2108 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2109 } \
2110 } while (0)
2111
2112
2113/**
2114 * Atomically Exchange a pointer value.
2115 *
2116 * @returns Current *ppv value
2117 * @param ppv Pointer to the pointer variable to update.
2118 * @param pv The pointer value to assign to *ppv.
2119 */
2120DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2121{
2122#if ARCH_BITS == 32
2123 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2124#elif ARCH_BITS == 64
2125 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2126#else
2127# error "ARCH_BITS is bogus"
2128#endif
2129}
2130
2131
2132/**
2133 * Atomically Compare and Exchange an unsigned 32-bit value.
2134 *
2135 * @returns true if xchg was done.
2136 * @returns false if xchg wasn't done.
2137 *
2138 * @param pu32 Pointer to the value to update.
2139 * @param u32New The new value to assigned to *pu32.
2140 * @param u32Old The old value to *pu32 compare with.
2141 */
2142#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2143DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2144#else
2145DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old)
2146{
2147# if RT_INLINE_ASM_GNU_STYLE
2148 uint32_t u32Ret;
2149 __asm__ __volatile__("lock; cmpxchgl %2, %0\n\t"
2150 "setz %%al\n\t"
2151 "movzx %%al, %%eax\n\t"
2152 : "=m" (*pu32),
2153 "=a" (u32Ret)
2154 : "r" (u32New),
2155 "1" (u32Old));
2156 return (bool)u32Ret;
2157
2158# elif RT_INLINE_ASM_USES_INTRIN
2159 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2160
2161# else
2162 uint32_t u32Ret;
2163 __asm
2164 {
2165# ifdef __AMD64__
2166 mov rdx, [pu32]
2167# else
2168 mov edx, [pu32]
2169# endif
2170 mov eax, [u32Old]
2171 mov ecx, [u32New]
2172# ifdef __AMD64__
2173 lock cmpxchg [rdx], ecx
2174# else
2175 lock cmpxchg [edx], ecx
2176# endif
2177 setz al
2178 movzx eax, al
2179 mov [u32Ret], eax
2180 }
2181 return !!u32Ret;
2182# endif
2183}
2184#endif
2185
2186
2187/**
2188 * Atomically Compare and Exchange a signed 32-bit value.
2189 *
2190 * @returns true if xchg was done.
2191 * @returns false if xchg wasn't done.
2192 *
2193 * @param pi32 Pointer to the value to update.
2194 * @param i32New The new value to assigned to *pi32.
2195 * @param i32Old The old value to *pi32 compare with.
2196 */
2197DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2198{
2199 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2200}
2201
2202
2203/**
2204 * Atomically Compare and exchange an unsigned 64-bit value.
2205 *
2206 * @returns true if xchg was done.
2207 * @returns false if xchg wasn't done.
2208 *
2209 * @param pu64 Pointer to the 64-bit variable to update.
2210 * @param u64New The 64-bit value to assign to *pu64.
2211 * @param u64Old The value to compare with.
2212 */
2213#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2214DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2215#else
2216DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old)
2217{
2218# if RT_INLINE_ASM_USES_INTRIN
2219 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2220
2221# elif defined(__AMD64__)
2222# if RT_INLINE_ASM_GNU_STYLE
2223 uint64_t u64Ret;
2224 __asm__ __volatile__("lock; cmpxchgq %2, %0\n\t"
2225 "setz %%al\n\t"
2226 "movzx %%al, %%eax\n\t"
2227 : "=m" (*pu64),
2228 "=a" (u64Ret)
2229 : "r" (u64New),
2230 "1" (u64Old));
2231 return (bool)u64Ret;
2232# else
2233 bool fRet;
2234 __asm
2235 {
2236 mov rdx, [pu32]
2237 mov rax, [u64Old]
2238 mov rcx, [u64New]
2239 lock cmpxchg [rdx], rcx
2240 setz al
2241 mov [fRet], al
2242 }
2243 return fRet;
2244# endif
2245# else /* !__AMD64__ */
2246 uint32_t u32Ret;
2247# if RT_INLINE_ASM_GNU_STYLE
2248# if defined(PIC) || defined(__DARWIN__) /* darwin: 4.0.1 compiler option / bug? */
2249 uint32_t u32 = (uint32_t)u64New;
2250 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2251 "lock; cmpxchg8b (%5)\n\t"
2252 "setz %%al\n\t"
2253 "xchgl %%ebx, %3\n\t"
2254 "movzx %%al, %%eax\n\t"
2255 : "=a" (u32Ret),
2256 "=m" (*pu64)
2257 : "A" (u64Old),
2258 "m" ( u32 ),
2259 "c" ( (uint32_t)(u64New >> 32) ),
2260 "S" (pu64) );
2261# else /* !PIC */
2262 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2263 "setz %%al\n\t"
2264 "movzx %%al, %%eax\n\t"
2265 : "=a" (u32Ret),
2266 "=m" (*pu64)
2267 : "A" (u64Old),
2268 "b" ( (uint32_t)u64New ),
2269 "c" ( (uint32_t)(u64New >> 32) ));
2270# endif
2271 return (bool)u32Ret;
2272# else
2273 __asm
2274 {
2275 mov ebx, dword ptr [u64New]
2276 mov ecx, dword ptr [u64New + 4]
2277 mov edi, [pu64]
2278 mov eax, dword ptr [u64Old]
2279 mov edx, dword ptr [u64Old + 4]
2280 lock cmpxchg8b [edi]
2281 setz al
2282 movzx eax, al
2283 mov dword ptr [u32Ret], eax
2284 }
2285 return !!u32Ret;
2286# endif
2287# endif /* !__AMD64__ */
2288}
2289#endif
2290
2291
2292/**
2293 * Atomically Compare and exchange a signed 64-bit value.
2294 *
2295 * @returns true if xchg was done.
2296 * @returns false if xchg wasn't done.
2297 *
2298 * @param pi64 Pointer to the 64-bit variable to update.
2299 * @param i64 The 64-bit value to assign to *pu64.
2300 * @param i64Old The value to compare with.
2301 */
2302DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2303{
2304 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2305}
2306
2307
2308
2309/** @def ASMAtomicCmpXchgSize
2310 * Atomically Compare and Exchange a value which size might differ
2311 * between platforms or compilers.
2312 *
2313 * @param pu Pointer to the value to update.
2314 * @param uNew The new value to assigned to *pu.
2315 * @param uOld The old value to *pu compare with.
2316 * @param fRc Where to store the result.
2317 */
2318#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2319 do { \
2320 switch (sizeof(*(pu))) { \
2321 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2322 break; \
2323 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2324 break; \
2325 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2326 (fRc) = false; \
2327 break; \
2328 } \
2329 } while (0)
2330
2331
2332/**
2333 * Atomically Compare and Exchange a pointer value.
2334 *
2335 * @returns true if xchg was done.
2336 * @returns false if xchg wasn't done.
2337 *
2338 * @param ppv Pointer to the value to update.
2339 * @param pvNew The new value to assigned to *ppv.
2340 * @param pvOld The old value to *ppv compare with.
2341 */
2342DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2343{
2344#if ARCH_BITS == 32
2345 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2346#elif ARCH_BITS == 64
2347 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2348#else
2349# error "ARCH_BITS is bogus"
2350#endif
2351}
2352
2353
2354/**
2355 * Atomically increment a 32-bit value.
2356 *
2357 * @returns The new value.
2358 * @param pu32 Pointer to the value to increment.
2359 */
2360#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2361DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2362#else
2363DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2364{
2365 uint32_t u32;
2366# if RT_INLINE_ASM_USES_INTRIN
2367 u32 = _InterlockedIncrement((long *)pu32);
2368
2369# elif RT_INLINE_ASM_GNU_STYLE
2370 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2371 "incl %0\n\t"
2372 : "=r" (u32),
2373 "=m" (*pu32)
2374 : "0" (1)
2375 : "memory");
2376# else
2377 __asm
2378 {
2379 mov eax, 1
2380# ifdef __AMD64__
2381 mov rdx, [pu32]
2382 lock xadd [rdx], eax
2383# else
2384 mov edx, [pu32]
2385 lock xadd [edx], eax
2386# endif
2387 inc eax
2388 mov u32, eax
2389 }
2390# endif
2391 return u32;
2392}
2393#endif
2394
2395
2396/**
2397 * Atomically increment a signed 32-bit value.
2398 *
2399 * @returns The new value.
2400 * @param pi32 Pointer to the value to increment.
2401 */
2402DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2403{
2404 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2405}
2406
2407
2408/**
2409 * Atomically decrement an unsigned 32-bit value.
2410 *
2411 * @returns The new value.
2412 * @param pu32 Pointer to the value to decrement.
2413 */
2414#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2415DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2416#else
2417DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2418{
2419 uint32_t u32;
2420# if RT_INLINE_ASM_USES_INTRIN
2421 u32 = _InterlockedDecrement((long *)pu32);
2422
2423# elif RT_INLINE_ASM_GNU_STYLE
2424 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2425 "decl %0\n\t"
2426 : "=r" (u32),
2427 "=m" (*pu32)
2428 : "0" (-1)
2429 : "memory");
2430# else
2431 __asm
2432 {
2433 mov eax, -1
2434# ifdef __AMD64__
2435 mov rdx, [pu32]
2436 lock xadd [rdx], eax
2437# else
2438 mov edx, [pu32]
2439 lock xadd [edx], eax
2440# endif
2441 dec eax
2442 mov u32, eax
2443 }
2444# endif
2445 return u32;
2446}
2447#endif
2448
2449
2450/**
2451 * Atomically decrement a signed 32-bit value.
2452 *
2453 * @returns The new value.
2454 * @param pi32 Pointer to the value to decrement.
2455 */
2456DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2457{
2458 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2459}
2460
2461
2462/**
2463 * Atomically Or an unsigned 32-bit value.
2464 *
2465 * @param pu32 Pointer to the pointer variable to OR u32 with.
2466 * @param u32 The value to OR *pu32 with.
2467 */
2468#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2469DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2470#else
2471DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2472{
2473# if RT_INLINE_ASM_USES_INTRIN
2474 _InterlockedOr((long volatile *)pu32, (long)u32);
2475
2476# elif RT_INLINE_ASM_GNU_STYLE
2477 __asm__ __volatile__("lock; orl %1, %0\n\t"
2478 : "=m" (*pu32)
2479 : "r" (u32));
2480# else
2481 __asm
2482 {
2483 mov eax, [u32]
2484# ifdef __AMD64__
2485 mov rdx, [pu32]
2486 lock or [rdx], eax
2487# else
2488 mov edx, [pu32]
2489 lock or [edx], eax
2490# endif
2491 }
2492# endif
2493}
2494#endif
2495
2496
2497/**
2498 * Atomically Or a signed 32-bit value.
2499 *
2500 * @param pi32 Pointer to the pointer variable to OR u32 with.
2501 * @param i32 The value to OR *pu32 with.
2502 */
2503DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2504{
2505 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2506}
2507
2508
2509/**
2510 * Atomically And an unsigned 32-bit value.
2511 *
2512 * @param pu32 Pointer to the pointer variable to AND u32 with.
2513 * @param u32 The value to AND *pu32 with.
2514 */
2515#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2516DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
2517#else
2518DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
2519{
2520# if RT_INLINE_ASM_USES_INTRIN
2521 _InterlockedAnd((long volatile *)pu32, u32);
2522
2523# elif RT_INLINE_ASM_GNU_STYLE
2524 __asm__ __volatile__("lock; andl %1, %0\n\t"
2525 : "=m" (*pu32)
2526 : "r" (u32));
2527# else
2528 __asm
2529 {
2530 mov eax, [u32]
2531# ifdef __AMD64__
2532 mov rdx, [pu32]
2533 lock and [rdx], eax
2534# else
2535 mov edx, [pu32]
2536 lock and [edx], eax
2537# endif
2538 }
2539# endif
2540}
2541#endif
2542
2543
2544/**
2545 * Atomically And a signed 32-bit value.
2546 *
2547 * @param pi32 Pointer to the pointer variable to AND i32 with.
2548 * @param i32 The value to AND *pi32 with.
2549 */
2550DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
2551{
2552 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
2553}
2554
2555
2556/**
2557 * Invalidate page.
2558 *
2559 * @param pv Address of the page to invalidate.
2560 */
2561#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2562DECLASM(void) ASMInvalidatePage(void *pv);
2563#else
2564DECLINLINE(void) ASMInvalidatePage(void *pv)
2565{
2566# if RT_INLINE_ASM_USES_INTRIN
2567 __invlpg(pv);
2568
2569# elif RT_INLINE_ASM_GNU_STYLE
2570 __asm__ __volatile__("invlpg %0\n\t"
2571 : : "m" (*(uint8_t *)pv));
2572# else
2573 __asm
2574 {
2575# ifdef __AMD64__
2576 mov rax, [pv]
2577 invlpg [rax]
2578# else
2579 mov eax, [pv]
2580 invlpg [eax]
2581# endif
2582 }
2583# endif
2584}
2585#endif
2586
2587
2588#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
2589# if PAGE_SIZE != 0x1000
2590# error "PAGE_SIZE is not 0x1000!"
2591# endif
2592#endif
2593
2594/**
2595 * Zeros a 4K memory page.
2596 *
2597 * @param pv Pointer to the memory block. This must be page aligned.
2598 */
2599#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2600DECLASM(void) ASMMemZeroPage(volatile void *pv);
2601# else
2602DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
2603{
2604# if RT_INLINE_ASM_USES_INTRIN
2605# ifdef __AMD64__
2606 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
2607# else
2608 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
2609# endif
2610
2611# elif RT_INLINE_ASM_GNU_STYLE
2612 RTUINTREG uDummy;
2613# ifdef __AMD64__
2614 __asm__ __volatile__ ("rep stosq"
2615 : "=D" (pv),
2616 "=c" (uDummy)
2617 : "0" (pv),
2618 "c" (0x1000 >> 3),
2619 "a" (0)
2620 : "memory");
2621# else
2622 __asm__ __volatile__ ("rep stosl"
2623 : "=D" (pv),
2624 "=c" (uDummy)
2625 : "0" (pv),
2626 "c" (0x1000 >> 2),
2627 "a" (0)
2628 : "memory");
2629# endif
2630# else
2631 __asm
2632 {
2633# ifdef __AMD64__
2634 xor rax, rax
2635 mov ecx, 0200h
2636 mov rdi, [pv]
2637 rep stosq
2638# else
2639 xor eax, eax
2640 mov ecx, 0400h
2641 mov edi, [pv]
2642 rep stosd
2643# endif
2644 }
2645# endif
2646}
2647# endif
2648
2649
2650/**
2651 * Zeros a memory block with a 32-bit aligned size.
2652 *
2653 * @param pv Pointer to the memory block.
2654 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2655 */
2656#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2657DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
2658#else
2659DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
2660{
2661# if RT_INLINE_ASM_USES_INTRIN
2662 __stosd((unsigned long *)pv, 0, cb >> 2);
2663
2664# elif RT_INLINE_ASM_GNU_STYLE
2665 __asm__ __volatile__ ("rep stosl"
2666 : "=D" (pv),
2667 "=c" (cb)
2668 : "0" (pv),
2669 "1" (cb >> 2),
2670 "a" (0)
2671 : "memory");
2672# else
2673 __asm
2674 {
2675 xor eax, eax
2676# ifdef __AMD64__
2677 mov rcx, [cb]
2678 shr rcx, 2
2679 mov rdi, [pv]
2680# else
2681 mov ecx, [cb]
2682 shr ecx, 2
2683 mov edi, [pv]
2684# endif
2685 rep stosd
2686 }
2687# endif
2688}
2689#endif
2690
2691
2692/**
2693 * Fills a memory block with a 32-bit aligned size.
2694 *
2695 * @param pv Pointer to the memory block.
2696 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2697 * @param u32 The value to fill with.
2698 */
2699#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2700DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
2701#else
2702DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
2703{
2704# if RT_INLINE_ASM_USES_INTRIN
2705 __stosd((unsigned long *)pv, 0, cb >> 2);
2706
2707# elif RT_INLINE_ASM_GNU_STYLE
2708 __asm__ __volatile__ ("rep stosl"
2709 : "=D" (pv),
2710 "=c" (cb)
2711 : "0" (pv),
2712 "1" (cb >> 2),
2713 "a" (u32)
2714 : "memory");
2715# else
2716 __asm
2717 {
2718# ifdef __AMD64__
2719 mov rcx, [cb]
2720 shr rcx, 2
2721 mov rdi, [pv]
2722# else
2723 mov ecx, [cb]
2724 shr ecx, 2
2725 mov edi, [pv]
2726# endif
2727 mov eax, [u32]
2728 rep stosd
2729 }
2730# endif
2731}
2732#endif
2733
2734
2735
2736/**
2737 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
2738 *
2739 * @returns u32F1 * u32F2.
2740 */
2741#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2742DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
2743#else
2744DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
2745{
2746# ifdef __AMD64__
2747 return (uint64_t)u32F1 * u32F2;
2748# else /* !__AMD64__ */
2749 uint64_t u64;
2750# if RT_INLINE_ASM_GNU_STYLE
2751 __asm__ __volatile__("mull %%edx"
2752 : "=A" (u64)
2753 : "a" (u32F2), "d" (u32F1));
2754# else
2755 __asm
2756 {
2757 mov edx, [u32F1]
2758 mov eax, [u32F2]
2759 mul edx
2760 mov dword ptr [u64], eax
2761 mov dword ptr [u64 + 4], edx
2762 }
2763# endif
2764 return u64;
2765# endif /* !__AMD64__ */
2766}
2767#endif
2768
2769
2770/**
2771 * Multiplies two signed 32-bit values returning a signed 64-bit result.
2772 *
2773 * @returns u32F1 * u32F2.
2774 */
2775#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2776DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
2777#else
2778DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
2779{
2780# ifdef __AMD64__
2781 return (int64_t)i32F1 * i32F2;
2782# else /* !__AMD64__ */
2783 int64_t i64;
2784# if RT_INLINE_ASM_GNU_STYLE
2785 __asm__ __volatile__("imull %%edx"
2786 : "=A" (i64)
2787 : "a" (i32F2), "d" (i32F1));
2788# else
2789 __asm
2790 {
2791 mov edx, [i32F1]
2792 mov eax, [i32F2]
2793 imul edx
2794 mov dword ptr [i64], eax
2795 mov dword ptr [i64 + 4], edx
2796 }
2797# endif
2798 return i64;
2799# endif /* !__AMD64__ */
2800}
2801#endif
2802
2803
2804/**
2805 * Devides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
2806 *
2807 * @returns u64 / u32.
2808 */
2809#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2810DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
2811#else
2812DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
2813{
2814# ifdef __AMD64__
2815 return (uint32_t)(u64 / u32);
2816# else /* !__AMD64__ */
2817# if RT_INLINE_ASM_GNU_STYLE
2818 RTUINTREG uDummy;
2819 __asm__ __volatile__("divl %3"
2820 : "=a" (u32), "=d"(uDummy)
2821 : "A" (u64), "r" (u32));
2822# else
2823 __asm
2824 {
2825 mov eax, dword ptr [u64]
2826 mov edx, dword ptr [u64 + 4]
2827 mov ecx, [u32]
2828 div ecx
2829 mov [u32], eax
2830 }
2831# endif
2832 return u32;
2833# endif /* !__AMD64__ */
2834}
2835#endif
2836
2837
2838/**
2839 * Devides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
2840 *
2841 * @returns u64 / u32.
2842 */
2843#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2844DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
2845#else
2846DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
2847{
2848# ifdef __AMD64__
2849 return (int32_t)(i64 / i32);
2850# else /* !__AMD64__ */
2851# if RT_INLINE_ASM_GNU_STYLE
2852 RTUINTREG iDummy;
2853 __asm__ __volatile__("idivl %3"
2854 : "=a" (i32), "=d"(iDummy)
2855 : "A" (i64), "r" (i32));
2856# else
2857 __asm
2858 {
2859 mov eax, dword ptr [i64]
2860 mov edx, dword ptr [i64 + 4]
2861 mov ecx, [i32]
2862 idiv ecx
2863 mov [i32], eax
2864 }
2865# endif
2866 return i32;
2867# endif /* !__AMD64__ */
2868}
2869#endif
2870
2871
2872/**
2873 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
2874 * using a 96 bit intermediate result.
2875 *
2876 * @returns (u64A * u32B) / u32C.
2877 * @param u64A The 64-bit value.
2878 * @param u32B The 32-bit value to multiple by A.
2879 * @param u32C The 32-bit value to divide A*B by.
2880 */
2881#if RT_INLINE_ASM_EXTERNAL
2882DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
2883#else
2884DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
2885{
2886# if RT_INLINE_ASM_GNU_STYLE && defined(__AMD64__)
2887 uint64_t u64Result, u64Spill;
2888 __asm__ __volatile__("mulq %2\n\t"
2889 "divq %3\n\t"
2890 : "=a" (u64Result),
2891 "=d" (u64Spill)
2892 : "r" ((uint64_t)u32B),
2893 "r" ((uint64_t)u32C),
2894 "0" (u64A),
2895 "1" (0));
2896 return u64Result;
2897# else
2898 RTUINT64U u;
2899 uint64_t u64Low = (uint64_t)(u64A & 0xffffffff) * u32B;
2900 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
2901 u64Hi += (u64Low >> 32);
2902 u.s.Hi = (uint32_t)(u64Hi / u32C);
2903 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Low & 0xffffffff)) / u32C);
2904 return u.u;
2905# endif
2906}
2907#endif
2908
2909
2910/**
2911 * Probes a byte pointer for read access.
2912 *
2913 * While the function will not fault if the byte is not read accessible,
2914 * the idea is to do this in a safe place like before acquiring locks
2915 * and such like.
2916 *
2917 * Also, this functions guarantees that an eager compiler is not going
2918 * to optimize the probing away.
2919 *
2920 * @param pvByte Pointer to the byte.
2921 */
2922#if RT_INLINE_ASM_EXTERNAL
2923DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
2924#else
2925DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
2926{
2927 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
2928 uint8_t u8;
2929# if RT_INLINE_ASM_GNU_STYLE
2930 __asm__ __volatile__("movb (%1), %0\n\t"
2931 : "=r" (u8)
2932 : "r" (pvByte));
2933# else
2934 __asm
2935 {
2936# ifdef __AMD64__
2937 mov rax, [pvByte]
2938 mov al, [rax]
2939# else
2940 mov eax, [pvByte]
2941 mov al, [eax]
2942# endif
2943 mov [u8], al
2944 }
2945# endif
2946 return u8;
2947}
2948#endif
2949
2950/**
2951 * Probes a buffer for read access page by page.
2952 *
2953 * While the function will fault if the buffer is not fully read
2954 * accessible, the idea is to do this in a safe place like before
2955 * acquiring locks and such like.
2956 *
2957 * Also, this functions guarantees that an eager compiler is not going
2958 * to optimize the probing away.
2959 *
2960 * @param pvBuf Pointer to the buffer.
2961 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
2962 */
2963DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
2964{
2965 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
2966 /* the first byte */
2967 const uint8_t *pu8 = (const uint8_t *)pvBuf;
2968 ASMProbeReadByte(pu8);
2969
2970 /* the pages in between pages. */
2971 while (cbBuf > /*PAGE_SIZE*/0x1000)
2972 {
2973 ASMProbeReadByte(pu8);
2974 cbBuf -= /*PAGE_SIZE*/0x1000;
2975 pu8 += /*PAGE_SIZE*/0x1000;
2976 }
2977
2978 /* the last byte */
2979 ASMProbeReadByte(pu8 + cbBuf - 1);
2980}
2981
2982
2983/** @def ASMBreakpoint
2984 * Debugger Breakpoint.
2985 * @remark In the gnu world we add a nop instruction after the int3 to
2986 * force gdb to remain at the int3 source line.
2987 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
2988 * @internal
2989 */
2990#if RT_INLINE_ASM_GNU_STYLE
2991# ifndef __L4ENV__
2992# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
2993# else
2994# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
2995# endif
2996#else
2997# define ASMBreakpoint() __debugbreak()
2998#endif
2999
3000
3001
3002/** @defgroup grp_inline_bits Bit Operations
3003 * @{
3004 */
3005
3006
3007/**
3008 * Sets a bit in a bitmap.
3009 *
3010 * @param pvBitmap Pointer to the bitmap.
3011 * @param iBit The bit to set.
3012 */
3013#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3014DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
3015#else
3016DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
3017{
3018# if RT_INLINE_ASM_USES_INTRIN
3019 _bittestandset((long *)pvBitmap, iBit);
3020
3021# elif RT_INLINE_ASM_GNU_STYLE
3022 __asm__ __volatile__ ("btsl %1, %0"
3023 : "=m" (*(volatile long *)pvBitmap)
3024 : "Ir" (iBit)
3025 : "memory");
3026# else
3027 __asm
3028 {
3029# ifdef __AMD64__
3030 mov rax, [pvBitmap]
3031 mov edx, [iBit]
3032 bts [rax], edx
3033# else
3034 mov eax, [pvBitmap]
3035 mov edx, [iBit]
3036 bts [eax], edx
3037# endif
3038 }
3039# endif
3040}
3041#endif
3042
3043
3044/**
3045 * Atomically sets a bit in a bitmap.
3046 *
3047 * @param pvBitmap Pointer to the bitmap.
3048 * @param iBit The bit to set.
3049 */
3050#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3051DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
3052#else
3053DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
3054{
3055# if RT_INLINE_ASM_USES_INTRIN
3056 _interlockedbittestandset((long *)pvBitmap, iBit);
3057# elif RT_INLINE_ASM_GNU_STYLE
3058 __asm__ __volatile__ ("lock; btsl %1, %0"
3059 : "=m" (*(volatile long *)pvBitmap)
3060 : "Ir" (iBit)
3061 : "memory");
3062# else
3063 __asm
3064 {
3065# ifdef __AMD64__
3066 mov rax, [pvBitmap]
3067 mov edx, [iBit]
3068 lock bts [rax], edx
3069# else
3070 mov eax, [pvBitmap]
3071 mov edx, [iBit]
3072 lock bts [eax], edx
3073# endif
3074 }
3075# endif
3076}
3077#endif
3078
3079
3080/**
3081 * Clears a bit in a bitmap.
3082 *
3083 * @param pvBitmap Pointer to the bitmap.
3084 * @param iBit The bit to clear.
3085 */
3086#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3087DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
3088#else
3089DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
3090{
3091# if RT_INLINE_ASM_USES_INTRIN
3092 _bittestandreset((long *)pvBitmap, iBit);
3093
3094# elif RT_INLINE_ASM_GNU_STYLE
3095 __asm__ __volatile__ ("btrl %1, %0"
3096 : "=m" (*(volatile long *)pvBitmap)
3097 : "Ir" (iBit)
3098 : "memory");
3099# else
3100 __asm
3101 {
3102# ifdef __AMD64__
3103 mov rax, [pvBitmap]
3104 mov edx, [iBit]
3105 btr [rax], edx
3106# else
3107 mov eax, [pvBitmap]
3108 mov edx, [iBit]
3109 btr [eax], edx
3110# endif
3111 }
3112# endif
3113}
3114#endif
3115
3116
3117/**
3118 * Atomically clears a bit in a bitmap.
3119 *
3120 * @param pvBitmap Pointer to the bitmap.
3121 * @param iBit The bit to toggle set.
3122 * @remark No memory barrier, take care on smp.
3123 */
3124#if RT_INLINE_ASM_EXTERNAL
3125DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
3126#else
3127DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
3128{
3129# if RT_INLINE_ASM_GNU_STYLE
3130 __asm__ __volatile__ ("lock; btrl %1, %0"
3131 : "=m" (*(volatile long *)pvBitmap)
3132 : "Ir" (iBit)
3133 : "memory");
3134# else
3135 __asm
3136 {
3137# ifdef __AMD64__
3138 mov rax, [pvBitmap]
3139 mov edx, [iBit]
3140 lock btr [rax], edx
3141# else
3142 mov eax, [pvBitmap]
3143 mov edx, [iBit]
3144 lock btr [eax], edx
3145# endif
3146 }
3147# endif
3148}
3149#endif
3150
3151
3152/**
3153 * Toggles a bit in a bitmap.
3154 *
3155 * @param pvBitmap Pointer to the bitmap.
3156 * @param iBit The bit to toggle.
3157 */
3158#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3159DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
3160#else
3161DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
3162{
3163# if RT_INLINE_ASM_USES_INTRIN
3164 _bittestandcomplement((long *)pvBitmap, iBit);
3165# elif RT_INLINE_ASM_GNU_STYLE
3166 __asm__ __volatile__ ("btcl %1, %0"
3167 : "=m" (*(volatile long *)pvBitmap)
3168 : "Ir" (iBit)
3169 : "memory");
3170# else
3171 __asm
3172 {
3173# ifdef __AMD64__
3174 mov rax, [pvBitmap]
3175 mov edx, [iBit]
3176 btc [rax], edx
3177# else
3178 mov eax, [pvBitmap]
3179 mov edx, [iBit]
3180 btc [eax], edx
3181# endif
3182 }
3183# endif
3184}
3185#endif
3186
3187
3188/**
3189 * Atomically toggles a bit in a bitmap.
3190 *
3191 * @param pvBitmap Pointer to the bitmap.
3192 * @param iBit The bit to test and set.
3193 */
3194#if RT_INLINE_ASM_EXTERNAL
3195DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
3196#else
3197DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
3198{
3199# if RT_INLINE_ASM_GNU_STYLE
3200 __asm__ __volatile__ ("lock; btcl %1, %0"
3201 : "=m" (*(volatile long *)pvBitmap)
3202 : "Ir" (iBit)
3203 : "memory");
3204# else
3205 __asm
3206 {
3207# ifdef __AMD64__
3208 mov rax, [pvBitmap]
3209 mov edx, [iBit]
3210 lock btc [rax], edx
3211# else
3212 mov eax, [pvBitmap]
3213 mov edx, [iBit]
3214 lock btc [eax], edx
3215# endif
3216 }
3217# endif
3218}
3219#endif
3220
3221
3222/**
3223 * Tests and sets a bit in a bitmap.
3224 *
3225 * @returns true if the bit was set.
3226 * @returns false if the bit was clear.
3227 * @param pvBitmap Pointer to the bitmap.
3228 * @param iBit The bit to test and set.
3229 */
3230#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3231DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3232#else
3233DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3234{
3235 union { bool f; uint32_t u32; uint8_t u8; } rc;
3236# if RT_INLINE_ASM_USES_INTRIN
3237 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
3238
3239# elif RT_INLINE_ASM_GNU_STYLE
3240 __asm__ __volatile__ ("btsl %2, %1\n\t"
3241 "setc %b0\n\t"
3242 "andl $1, %0\n\t"
3243 : "=q" (rc.u32),
3244 "=m" (*(volatile long *)pvBitmap)
3245 : "Ir" (iBit)
3246 : "memory");
3247# else
3248 __asm
3249 {
3250 mov edx, [iBit]
3251# ifdef __AMD64__
3252 mov rax, [pvBitmap]
3253 bts [rax], edx
3254# else
3255 mov eax, [pvBitmap]
3256 bts [eax], edx
3257# endif
3258 setc al
3259 and eax, 1
3260 mov [rc.u32], eax
3261 }
3262# endif
3263 return rc.f;
3264}
3265#endif
3266
3267
3268/**
3269 * Atomically tests and sets a bit in a bitmap.
3270 *
3271 * @returns true if the bit was set.
3272 * @returns false if the bit was clear.
3273 * @param pvBitmap Pointer to the bitmap.
3274 * @param iBit The bit to set.
3275 */
3276#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3277DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3278#else
3279DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3280{
3281 union { bool f; uint32_t u32; uint8_t u8; } rc;
3282# if RT_INLINE_ASM_USES_INTRIN
3283 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
3284# elif RT_INLINE_ASM_GNU_STYLE
3285 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
3286 "setc %b0\n\t"
3287 "andl $1, %0\n\t"
3288 : "=q" (rc.u32),
3289 "=m" (*(volatile long *)pvBitmap)
3290 : "Ir" (iBit)
3291 : "memory");
3292# else
3293 __asm
3294 {
3295 mov edx, [iBit]
3296# ifdef __AMD64__
3297 mov rax, [pvBitmap]
3298 lock bts [rax], edx
3299# else
3300 mov eax, [pvBitmap]
3301 lock bts [eax], edx
3302# endif
3303 setc al
3304 and eax, 1
3305 mov [rc.u32], eax
3306 }
3307# endif
3308 return rc.f;
3309}
3310#endif
3311
3312
3313/**
3314 * Tests and clears a bit in a bitmap.
3315 *
3316 * @returns true if the bit was set.
3317 * @returns false if the bit was clear.
3318 * @param pvBitmap Pointer to the bitmap.
3319 * @param iBit The bit to test and clear.
3320 */
3321#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3322DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3323#else
3324DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3325{
3326 union { bool f; uint32_t u32; uint8_t u8; } rc;
3327# if RT_INLINE_ASM_USES_INTRIN
3328 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
3329
3330# elif RT_INLINE_ASM_GNU_STYLE
3331 __asm__ __volatile__ ("btrl %2, %1\n\t"
3332 "setc %b0\n\t"
3333 "andl $1, %0\n\t"
3334 : "=q" (rc.u32),
3335 "=m" (*(volatile long *)pvBitmap)
3336 : "Ir" (iBit)
3337 : "memory");
3338# else
3339 __asm
3340 {
3341 mov edx, [iBit]
3342# ifdef __AMD64__
3343 mov rax, [pvBitmap]
3344 btr [rax], edx
3345# else
3346 mov eax, [pvBitmap]
3347 btr [eax], edx
3348# endif
3349 setc al
3350 and eax, 1
3351 mov [rc.u32], eax
3352 }
3353# endif
3354 return rc.f;
3355}
3356#endif
3357
3358
3359/**
3360 * Atomically tests and clears a bit in a bitmap.
3361 *
3362 * @returns true if the bit was set.
3363 * @returns false if the bit was clear.
3364 * @param pvBitmap Pointer to the bitmap.
3365 * @param iBit The bit to test and clear.
3366 * @remark No memory barrier, take care on smp.
3367 */
3368#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3369DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3370#else
3371DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3372{
3373 union { bool f; uint32_t u32; uint8_t u8; } rc;
3374# if RT_INLINE_ASM_USES_INTRIN
3375 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
3376
3377# elif RT_INLINE_ASM_GNU_STYLE
3378 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
3379 "setc %b0\n\t"
3380 "andl $1, %0\n\t"
3381 : "=q" (rc.u32),
3382 "=m" (*(volatile long *)pvBitmap)
3383 : "Ir" (iBit)
3384 : "memory");
3385# else
3386 __asm
3387 {
3388 mov edx, [iBit]
3389# ifdef __AMD64__
3390 mov rax, [pvBitmap]
3391 lock btr [rax], edx
3392# else
3393 mov eax, [pvBitmap]
3394 lock btr [eax], edx
3395# endif
3396 setc al
3397 and eax, 1
3398 mov [rc.u32], eax
3399 }
3400# endif
3401 return rc.f;
3402}
3403#endif
3404
3405
3406/**
3407 * Tests and toggles a bit in a bitmap.
3408 *
3409 * @returns true if the bit was set.
3410 * @returns false if the bit was clear.
3411 * @param pvBitmap Pointer to the bitmap.
3412 * @param iBit The bit to test and toggle.
3413 */
3414#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3415DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3416#else
3417DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3418{
3419 union { bool f; uint32_t u32; uint8_t u8; } rc;
3420# if RT_INLINE_ASM_USES_INTRIN
3421 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
3422
3423# elif RT_INLINE_ASM_GNU_STYLE
3424 __asm__ __volatile__ ("btcl %2, %1\n\t"
3425 "setc %b0\n\t"
3426 "andl $1, %0\n\t"
3427 : "=q" (rc.u32),
3428 "=m" (*(volatile long *)pvBitmap)
3429 : "Ir" (iBit)
3430 : "memory");
3431# else
3432 __asm
3433 {
3434 mov edx, [iBit]
3435# ifdef __AMD64__
3436 mov rax, [pvBitmap]
3437 btc [rax], edx
3438# else
3439 mov eax, [pvBitmap]
3440 btc [eax], edx
3441# endif
3442 setc al
3443 and eax, 1
3444 mov [rc.u32], eax
3445 }
3446# endif
3447 return rc.f;
3448}
3449#endif
3450
3451
3452/**
3453 * Atomically tests and toggles a bit in a bitmap.
3454 *
3455 * @returns true if the bit was set.
3456 * @returns false if the bit was clear.
3457 * @param pvBitmap Pointer to the bitmap.
3458 * @param iBit The bit to test and toggle.
3459 */
3460#if RT_INLINE_ASM_EXTERNAL
3461DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3462#else
3463DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3464{
3465 union { bool f; uint32_t u32; uint8_t u8; } rc;
3466# if RT_INLINE_ASM_GNU_STYLE
3467 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
3468 "setc %b0\n\t"
3469 "andl $1, %0\n\t"
3470 : "=q" (rc.u32),
3471 "=m" (*(volatile long *)pvBitmap)
3472 : "Ir" (iBit)
3473 : "memory");
3474# else
3475 __asm
3476 {
3477 mov edx, [iBit]
3478# ifdef __AMD64__
3479 mov rax, [pvBitmap]
3480 lock btc [rax], edx
3481# else
3482 mov eax, [pvBitmap]
3483 lock btc [eax], edx
3484# endif
3485 setc al
3486 and eax, 1
3487 mov [rc.u32], eax
3488 }
3489# endif
3490 return rc.f;
3491}
3492#endif
3493
3494
3495/**
3496 * Tests if a bit in a bitmap is set.
3497 *
3498 * @returns true if the bit is set.
3499 * @returns false if the bit is clear.
3500 * @param pvBitmap Pointer to the bitmap.
3501 * @param iBit The bit to test.
3502 */
3503#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3504DECLASM(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit);
3505#else
3506DECLINLINE(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit)
3507{
3508 union { bool f; uint32_t u32; uint8_t u8; } rc;
3509# if RT_INLINE_ASM_USES_INTRIN
3510 rc.u32 = _bittest((long *)pvBitmap, iBit);
3511# elif RT_INLINE_ASM_GNU_STYLE
3512
3513 __asm__ __volatile__ ("btl %2, %1\n\t"
3514 "setc %b0\n\t"
3515 "andl $1, %0\n\t"
3516 : "=q" (rc.u32),
3517 "=m" (*(volatile long *)pvBitmap)
3518 : "Ir" (iBit)
3519 : "memory");
3520# else
3521 __asm
3522 {
3523 mov edx, [iBit]
3524# ifdef __AMD64__
3525 mov rax, [pvBitmap]
3526 bt [rax], edx
3527# else
3528 mov eax, [pvBitmap]
3529 bt [eax], edx
3530# endif
3531 setc al
3532 and eax, 1
3533 mov [rc.u32], eax
3534 }
3535# endif
3536 return rc.f;
3537}
3538#endif
3539
3540
3541/**
3542 * Clears a bit range within a bitmap.
3543 *
3544 * @param pvBitmap Pointer to the bitmap.
3545 * @param iBitStart The First bit to clear.
3546 * @param iBitEnd The first bit not to clear.
3547 */
3548DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
3549{
3550 if (iBitStart < iBitEnd)
3551 {
3552 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
3553 int iStart = iBitStart & ~31;
3554 int iEnd = iBitEnd & ~31;
3555 if (iStart == iEnd)
3556 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
3557 else
3558 {
3559 /* bits in first dword. */
3560 if (iBitStart & 31)
3561 {
3562 *pu32 &= (1 << (iBitStart & 31)) - 1;
3563 pu32++;
3564 iBitStart = iStart + 32;
3565 }
3566
3567 /* whole dword. */
3568 if (iBitStart != iEnd)
3569 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
3570
3571 /* bits in last dword. */
3572 if (iBitEnd & 31)
3573 {
3574 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
3575 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
3576 }
3577 }
3578 }
3579}
3580
3581
3582/**
3583 * Finds the first clear bit in a bitmap.
3584 *
3585 * @returns Index of the first zero bit.
3586 * @returns -1 if no clear bit was found.
3587 * @param pvBitmap Pointer to the bitmap.
3588 * @param cBits The number of bits in the bitmap. Multiple of 32.
3589 */
3590#if RT_INLINE_ASM_EXTERNAL
3591DECLASM(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits);
3592#else
3593DECLINLINE(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits)
3594{
3595 if (cBits)
3596 {
3597 int32_t iBit;
3598# if RT_INLINE_ASM_GNU_STYLE
3599 RTCCUINTREG uEAX, uECX, uEDI;
3600 cBits = RT_ALIGN_32(cBits, 32);
3601 __asm__ __volatile__("repe; scasl\n\t"
3602 "je 1f\n\t"
3603# ifdef __AMD64__
3604 "lea -4(%%rdi), %%rdi\n\t"
3605 "xorl (%%rdi), %%eax\n\t"
3606 "subq %5, %%rdi\n\t"
3607# else
3608 "lea -4(%%edi), %%edi\n\t"
3609 "xorl (%%edi), %%eax\n\t"
3610 "subl %5, %%edi\n\t"
3611# endif
3612 "shll $3, %%edi\n\t"
3613 "bsfl %%eax, %%edx\n\t"
3614 "addl %%edi, %%edx\n\t"
3615 "1:\t\n"
3616 : "=d" (iBit),
3617 "=&c" (uECX),
3618 "=&D" (uEDI),
3619 "=&a" (uEAX)
3620 : "0" (0xffffffff),
3621 "mr" (pvBitmap),
3622 "1" (cBits >> 5),
3623 "2" (pvBitmap),
3624 "3" (0xffffffff));
3625# else
3626 cBits = RT_ALIGN_32(cBits, 32);
3627 __asm
3628 {
3629# ifdef __AMD64__
3630 mov rdi, [pvBitmap]
3631 mov rbx, rdi
3632# else
3633 mov edi, [pvBitmap]
3634 mov ebx, edi
3635# endif
3636 mov edx, 0ffffffffh
3637 mov eax, edx
3638 mov ecx, [cBits]
3639 shr ecx, 5
3640 repe scasd
3641 je done
3642
3643# ifdef __AMD64__
3644 lea rdi, [rdi - 4]
3645 xor eax, [rdi]
3646 sub rdi, rbx
3647# else
3648 lea edi, [edi - 4]
3649 xor eax, [edi]
3650 sub edi, ebx
3651# endif
3652 shl edi, 3
3653 bsf edx, eax
3654 add edx, edi
3655 done:
3656 mov [iBit], edx
3657 }
3658# endif
3659 return iBit;
3660 }
3661 return -1;
3662}
3663#endif
3664
3665
3666/**
3667 * Finds the next clear bit in a bitmap.
3668 *
3669 * @returns Index of the first zero bit.
3670 * @returns -1 if no clear bit was found.
3671 * @param pvBitmap Pointer to the bitmap.
3672 * @param cBits The number of bits in the bitmap. Multiple of 32.
3673 * @param iBitPrev The bit returned from the last search.
3674 * The search will start at iBitPrev + 1.
3675 */
3676#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3677DECLASM(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3678#else
3679DECLINLINE(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3680{
3681 int iBit = ++iBitPrev & 31;
3682 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
3683 cBits -= iBitPrev & ~31;
3684 if (iBit)
3685 {
3686 /* inspect the first dword. */
3687 uint32_t u32 = (~*(volatile uint32_t *)pvBitmap) >> iBit;
3688# if RT_INLINE_ASM_USES_INTRIN
3689 unsigned long ulBit = 0;
3690 if (_BitScanForward(&ulBit, u32))
3691 return ulBit + iBitPrev;
3692 iBit = -1;
3693# else
3694# if RT_INLINE_ASM_GNU_STYLE
3695 __asm__ __volatile__("bsf %1, %0\n\t"
3696 "jnz 1f\n\t"
3697 "movl $-1, %0\n\t"
3698 "1:\n\t"
3699 : "=r" (iBit)
3700 : "r" (u32));
3701# else
3702 __asm
3703 {
3704 mov edx, [u32]
3705 bsf eax, edx
3706 jnz done
3707 mov eax, 0ffffffffh
3708 done:
3709 mov [iBit], eax
3710 }
3711# endif
3712 if (iBit >= 0)
3713 return iBit + iBitPrev;
3714# endif
3715 /* Search the rest of the bitmap, if there is anything. */
3716 if (cBits > 32)
3717 {
3718 iBit = ASMBitFirstClear((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
3719 if (iBit >= 0)
3720 return iBit + (iBitPrev & ~31) + 32;
3721 }
3722 }
3723 else
3724 {
3725 /* Search the rest of the bitmap. */
3726 iBit = ASMBitFirstClear(pvBitmap, cBits);
3727 if (iBit >= 0)
3728 return iBit + (iBitPrev & ~31);
3729 }
3730 return iBit;
3731}
3732#endif
3733
3734
3735/**
3736 * Finds the first set bit in a bitmap.
3737 *
3738 * @returns Index of the first set bit.
3739 * @returns -1 if no clear bit was found.
3740 * @param pvBitmap Pointer to the bitmap.
3741 * @param cBits The number of bits in the bitmap. Multiple of 32.
3742 */
3743#if RT_INLINE_ASM_EXTERNAL
3744DECLASM(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits);
3745#else
3746DECLINLINE(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits)
3747{
3748 if (cBits)
3749 {
3750 int32_t iBit;
3751# if RT_INLINE_ASM_GNU_STYLE
3752 RTCCUINTREG uEAX, uECX, uEDI;
3753 cBits = RT_ALIGN_32(cBits, 32);
3754 __asm__ __volatile__("repe; scasl\n\t"
3755 "je 1f\n\t"
3756# ifdef __AMD64__
3757 "lea -4(%%rdi), %%rdi\n\t"
3758 "movl (%%rdi), %%eax\n\t"
3759 "subq %5, %%rdi\n\t"
3760# else
3761 "lea -4(%%edi), %%edi\n\t"
3762 "movl (%%edi), %%eax\n\t"
3763 "subl %5, %%edi\n\t"
3764# endif
3765 "shll $3, %%edi\n\t"
3766 "bsfl %%eax, %%edx\n\t"
3767 "addl %%edi, %%edx\n\t"
3768 "1:\t\n"
3769 : "=d" (iBit),
3770 "=&c" (uECX),
3771 "=&D" (uEDI),
3772 "=&a" (uEAX)
3773 : "0" (0xffffffff),
3774 "mr" (pvBitmap),
3775 "1" (cBits >> 5),
3776 "2" (pvBitmap),
3777 "3" (0));
3778# else
3779 cBits = RT_ALIGN_32(cBits, 32);
3780 __asm
3781 {
3782# ifdef __AMD64__
3783 mov rdi, [pvBitmap]
3784 mov rbx, rdi
3785# else
3786 mov edi, [pvBitmap]
3787 mov ebx, edi
3788# endif
3789 mov edx, 0ffffffffh
3790 xor eax, eax
3791 mov ecx, [cBits]
3792 shr ecx, 5
3793 repe scasd
3794 je done
3795# ifdef __AMD64__
3796 lea rdi, [rdi - 4]
3797 mov eax, [rdi]
3798 sub rdi, rbx
3799# else
3800 lea edi, [edi - 4]
3801 mov eax, [edi]
3802 sub edi, ebx
3803# endif
3804 shl edi, 3
3805 bsf edx, eax
3806 add edx, edi
3807 done:
3808 mov [iBit], edx
3809 }
3810# endif
3811 return iBit;
3812 }
3813 return -1;
3814}
3815#endif
3816
3817
3818/**
3819 * Finds the next set bit in a bitmap.
3820 *
3821 * @returns Index of the next set bit.
3822 * @returns -1 if no set bit was found.
3823 * @param pvBitmap Pointer to the bitmap.
3824 * @param cBits The number of bits in the bitmap. Multiple of 32.
3825 * @param iBitPrev The bit returned from the last search.
3826 * The search will start at iBitPrev + 1.
3827 */
3828#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3829DECLASM(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3830#else
3831DECLINLINE(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3832{
3833 int iBit = ++iBitPrev & 31;
3834 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
3835 cBits -= iBitPrev & ~31;
3836 if (iBit)
3837 {
3838 /* inspect the first dword. */
3839 uint32_t u32 = *(volatile uint32_t *)pvBitmap >> iBit;
3840# if RT_INLINE_ASM_USES_INTRIN
3841 unsigned long ulBit = 0;
3842 if (_BitScanForward(&ulBit, u32))
3843 return ulBit + iBitPrev;
3844 iBit = -1;
3845# else
3846# if RT_INLINE_ASM_GNU_STYLE
3847 __asm__ __volatile__("bsf %1, %0\n\t"
3848 "jnz 1f\n\t"
3849 "movl $-1, %0\n\t"
3850 "1:\n\t"
3851 : "=r" (iBit)
3852 : "r" (u32));
3853# else
3854 __asm
3855 {
3856 mov edx, u32
3857 bsf eax, edx
3858 jnz done
3859 mov eax, 0ffffffffh
3860 done:
3861 mov [iBit], eax
3862 }
3863# endif
3864 if (iBit >= 0)
3865 return iBit + iBitPrev;
3866# endif
3867 /* Search the rest of the bitmap, if there is anything. */
3868 if (cBits > 32)
3869 {
3870 iBit = ASMBitFirstSet((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
3871 if (iBit >= 0)
3872 return iBit + (iBitPrev & ~31) + 32;
3873 }
3874
3875 }
3876 else
3877 {
3878 /* Search the rest of the bitmap. */
3879 iBit = ASMBitFirstSet(pvBitmap, cBits);
3880 if (iBit >= 0)
3881 return iBit + (iBitPrev & ~31);
3882 }
3883 return iBit;
3884}
3885#endif
3886
3887
3888/**
3889 * Finds the first bit which is set in the given 32-bit integer.
3890 * Bits are numbered from 1 (least significant) to 32.
3891 *
3892 * @returns index [1..32] of the first set bit.
3893 * @returns 0 if all bits are cleared.
3894 * @param u32 Integer to search for set bits.
3895 * @remark Similar to ffs() in BSD.
3896 */
3897DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
3898{
3899# if RT_INLINE_ASM_USES_INTRIN
3900 unsigned long iBit;
3901 if (_BitScanForward(&iBit, u32))
3902 iBit++;
3903 else
3904 iBit = 0;
3905# elif RT_INLINE_ASM_GNU_STYLE
3906 uint32_t iBit;
3907 __asm__ __volatile__("bsf %1, %0\n\t"
3908 "jnz 1f\n\t"
3909 "xorl %0, %0\n\t"
3910 "jmp 2f\n"
3911 "1:\n\t"
3912 "incl %0\n"
3913 "2:\n\t"
3914 : "=r" (iBit)
3915 : "rm" (u32));
3916# else
3917 uint32_t iBit;
3918 _asm
3919 {
3920 bsf eax, [u32]
3921 jnz found
3922 xor eax, eax
3923 jmp done
3924 found:
3925 inc eax
3926 done:
3927 mov [iBit], eax
3928 }
3929# endif
3930 return iBit;
3931}
3932
3933
3934/**
3935 * Finds the first bit which is set in the given 32-bit integer.
3936 * Bits are numbered from 1 (least significant) to 32.
3937 *
3938 * @returns index [1..32] of the first set bit.
3939 * @returns 0 if all bits are cleared.
3940 * @param i32 Integer to search for set bits.
3941 * @remark Similar to ffs() in BSD.
3942 */
3943DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
3944{
3945 return ASMBitFirstSetU32((uint32_t)i32);
3946}
3947
3948
3949/**
3950 * Finds the last bit which is set in the given 32-bit integer.
3951 * Bits are numbered from 1 (least significant) to 32.
3952 *
3953 * @returns index [1..32] of the last set bit.
3954 * @returns 0 if all bits are cleared.
3955 * @param u32 Integer to search for set bits.
3956 * @remark Similar to fls() in BSD.
3957 */
3958DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
3959{
3960# if RT_INLINE_ASM_USES_INTRIN
3961 unsigned long iBit;
3962 if (_BitScanReverse(&iBit, u32))
3963 iBit++;
3964 else
3965 iBit = 0;
3966# elif RT_INLINE_ASM_GNU_STYLE
3967 uint32_t iBit;
3968 __asm__ __volatile__("bsrl %1, %0\n\t"
3969 "jnz 1f\n\t"
3970 "xorl %0, %0\n\t"
3971 "jmp 2f\n"
3972 "1:\n\t"
3973 "incl %0\n"
3974 "2:\n\t"
3975 : "=r" (iBit)
3976 : "rm" (u32));
3977# else
3978 uint32_t iBit;
3979 _asm
3980 {
3981 bsr eax, [u32]
3982 jnz found
3983 xor eax, eax
3984 jmp done
3985 found:
3986 inc eax
3987 done:
3988 mov [iBit], eax
3989 }
3990# endif
3991 return iBit;
3992}
3993
3994
3995/**
3996 * Finds the last bit which is set in the given 32-bit integer.
3997 * Bits are numbered from 1 (least significant) to 32.
3998 *
3999 * @returns index [1..32] of the last set bit.
4000 * @returns 0 if all bits are cleared.
4001 * @param i32 Integer to search for set bits.
4002 * @remark Similar to fls() in BSD.
4003 */
4004DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
4005{
4006 return ASMBitLastSetS32((uint32_t)i32);
4007}
4008
4009
4010/**
4011 * Reverse the byte order of the given 32-bit integer.
4012 * @param u32 Integer
4013 */
4014DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
4015{
4016#if RT_INLINE_ASM_USES_INTRIN
4017 u32 = _byteswap_ulong(u32);
4018#elif RT_INLINE_ASM_GNU_STYLE
4019 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
4020#else
4021 _asm
4022 {
4023 mov eax, [u32]
4024 bswap eax
4025 mov [u32], eax
4026 }
4027#endif
4028 return u32;
4029}
4030
4031/** @} */
4032
4033
4034/** @} */
4035#endif
4036
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette