VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 6079

Last change on this file since 6079 was 5999, checked in by vboxsync, 17 years ago

The Giant CDDL Dual-License Header Change.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 101.0 KB
Line 
1/** @file
2 * innotek Portable Runtime - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 innotek GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31/** @todo #include <iprt/param.h> for PAGE_SIZE. */
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37#ifdef _MSC_VER
38# if _MSC_VER >= 1400
39# define RT_INLINE_ASM_USES_INTRIN 1
40# include <intrin.h>
41 /* Emit the intrinsics at all optimization levels. */
42# pragma intrinsic(_ReadWriteBarrier)
43# pragma intrinsic(__cpuid)
44# pragma intrinsic(_enable)
45# pragma intrinsic(_disable)
46# pragma intrinsic(__rdtsc)
47# pragma intrinsic(__readmsr)
48# pragma intrinsic(__writemsr)
49# pragma intrinsic(__outbyte)
50# pragma intrinsic(__outword)
51# pragma intrinsic(__outdword)
52# pragma intrinsic(__inbyte)
53# pragma intrinsic(__inword)
54# pragma intrinsic(__indword)
55# pragma intrinsic(__invlpg)
56# pragma intrinsic(__stosd)
57# pragma intrinsic(__stosw)
58# pragma intrinsic(__stosb)
59# pragma intrinsic(__readcr0)
60# pragma intrinsic(__readcr2)
61# pragma intrinsic(__readcr3)
62# pragma intrinsic(__readcr4)
63# pragma intrinsic(__writecr0)
64# pragma intrinsic(__writecr3)
65# pragma intrinsic(__writecr4)
66# pragma intrinsic(_BitScanForward)
67# pragma intrinsic(_BitScanReverse)
68# pragma intrinsic(_bittest)
69# pragma intrinsic(_bittestandset)
70# pragma intrinsic(_bittestandreset)
71# pragma intrinsic(_bittestandcomplement)
72# pragma intrinsic(_byteswap_ushort)
73# pragma intrinsic(_byteswap_ulong)
74# pragma intrinsic(_interlockedbittestandset)
75# pragma intrinsic(_interlockedbittestandreset)
76# pragma intrinsic(_InterlockedAnd)
77# pragma intrinsic(_InterlockedOr)
78# pragma intrinsic(_InterlockedIncrement)
79# pragma intrinsic(_InterlockedDecrement)
80# pragma intrinsic(_InterlockedExchange)
81# pragma intrinsic(_InterlockedCompareExchange)
82# pragma intrinsic(_InterlockedCompareExchange64)
83# ifdef RT_ARCH_AMD64
84# pragma intrinsic(__stosq)
85# pragma intrinsic(__readcr8)
86# pragma intrinsic(__writecr8)
87# pragma intrinsic(_byteswap_uint64)
88# pragma intrinsic(_InterlockedExchange64)
89# endif
90# endif
91#endif
92#ifndef RT_INLINE_ASM_USES_INTRIN
93# define RT_INLINE_ASM_USES_INTRIN 0
94#endif
95
96
97
98/** @defgroup grp_asm ASM - Assembly Routines
99 * @ingroup grp_rt
100 * @{
101 */
102
103/** @def RT_INLINE_ASM_EXTERNAL
104 * Defined as 1 if the compiler does not support inline assembly.
105 * The ASM* functions will then be implemented in an external .asm file.
106 *
107 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
108 * inline assmebly in their AMD64 compiler.
109 */
110#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
111# define RT_INLINE_ASM_EXTERNAL 1
112#else
113# define RT_INLINE_ASM_EXTERNAL 0
114#endif
115
116/** @def RT_INLINE_ASM_GNU_STYLE
117 * Defined as 1 if the compiler understand GNU style inline assembly.
118 */
119#if defined(_MSC_VER)
120# define RT_INLINE_ASM_GNU_STYLE 0
121#else
122# define RT_INLINE_ASM_GNU_STYLE 1
123#endif
124
125
126/** @todo find a more proper place for this structure? */
127#pragma pack(1)
128/** IDTR */
129typedef struct RTIDTR
130{
131 /** Size of the IDT. */
132 uint16_t cbIdt;
133 /** Address of the IDT. */
134 uintptr_t pIdt;
135} RTIDTR, *PRTIDTR;
136#pragma pack()
137
138#pragma pack(1)
139/** GDTR */
140typedef struct RTGDTR
141{
142 /** Size of the GDT. */
143 uint16_t cbGdt;
144 /** Address of the GDT. */
145 uintptr_t pGdt;
146} RTGDTR, *PRTGDTR;
147#pragma pack()
148
149
150/** @def ASMReturnAddress
151 * Gets the return address of the current (or calling if you like) function or method.
152 */
153#ifdef _MSC_VER
154# ifdef __cplusplus
155extern "C"
156# endif
157void * _ReturnAddress(void);
158# pragma intrinsic(_ReturnAddress)
159# define ASMReturnAddress() _ReturnAddress()
160#elif defined(__GNUC__) || defined(__DOXYGEN__)
161# define ASMReturnAddress() __builtin_return_address(0)
162#else
163# error "Unsupported compiler."
164#endif
165
166
167/**
168 * Gets the content of the IDTR CPU register.
169 * @param pIdtr Where to store the IDTR contents.
170 */
171#if RT_INLINE_ASM_EXTERNAL
172DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
173#else
174DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
175{
176# if RT_INLINE_ASM_GNU_STYLE
177 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
178# else
179 __asm
180 {
181# ifdef RT_ARCH_AMD64
182 mov rax, [pIdtr]
183 sidt [rax]
184# else
185 mov eax, [pIdtr]
186 sidt [eax]
187# endif
188 }
189# endif
190}
191#endif
192
193
194/**
195 * Sets the content of the IDTR CPU register.
196 * @param pIdtr Where to load the IDTR contents from
197 */
198#if RT_INLINE_ASM_EXTERNAL
199DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
200#else
201DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
202{
203# if RT_INLINE_ASM_GNU_STYLE
204 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
205# else
206 __asm
207 {
208# ifdef RT_ARCH_AMD64
209 mov rax, [pIdtr]
210 lidt [rax]
211# else
212 mov eax, [pIdtr]
213 lidt [eax]
214# endif
215 }
216# endif
217}
218#endif
219
220
221/**
222 * Gets the content of the GDTR CPU register.
223 * @param pGdtr Where to store the GDTR contents.
224 */
225#if RT_INLINE_ASM_EXTERNAL
226DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
227#else
228DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
229{
230# if RT_INLINE_ASM_GNU_STYLE
231 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
232# else
233 __asm
234 {
235# ifdef RT_ARCH_AMD64
236 mov rax, [pGdtr]
237 sgdt [rax]
238# else
239 mov eax, [pGdtr]
240 sgdt [eax]
241# endif
242 }
243# endif
244}
245#endif
246
247/**
248 * Get the cs register.
249 * @returns cs.
250 */
251#if RT_INLINE_ASM_EXTERNAL
252DECLASM(RTSEL) ASMGetCS(void);
253#else
254DECLINLINE(RTSEL) ASMGetCS(void)
255{
256 RTSEL SelCS;
257# if RT_INLINE_ASM_GNU_STYLE
258 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
259# else
260 __asm
261 {
262 mov ax, cs
263 mov [SelCS], ax
264 }
265# endif
266 return SelCS;
267}
268#endif
269
270
271/**
272 * Get the DS register.
273 * @returns DS.
274 */
275#if RT_INLINE_ASM_EXTERNAL
276DECLASM(RTSEL) ASMGetDS(void);
277#else
278DECLINLINE(RTSEL) ASMGetDS(void)
279{
280 RTSEL SelDS;
281# if RT_INLINE_ASM_GNU_STYLE
282 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
283# else
284 __asm
285 {
286 mov ax, ds
287 mov [SelDS], ax
288 }
289# endif
290 return SelDS;
291}
292#endif
293
294
295/**
296 * Get the ES register.
297 * @returns ES.
298 */
299#if RT_INLINE_ASM_EXTERNAL
300DECLASM(RTSEL) ASMGetES(void);
301#else
302DECLINLINE(RTSEL) ASMGetES(void)
303{
304 RTSEL SelES;
305# if RT_INLINE_ASM_GNU_STYLE
306 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
307# else
308 __asm
309 {
310 mov ax, es
311 mov [SelES], ax
312 }
313# endif
314 return SelES;
315}
316#endif
317
318
319/**
320 * Get the FS register.
321 * @returns FS.
322 */
323#if RT_INLINE_ASM_EXTERNAL
324DECLASM(RTSEL) ASMGetFS(void);
325#else
326DECLINLINE(RTSEL) ASMGetFS(void)
327{
328 RTSEL SelFS;
329# if RT_INLINE_ASM_GNU_STYLE
330 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
331# else
332 __asm
333 {
334 mov ax, fs
335 mov [SelFS], ax
336 }
337# endif
338 return SelFS;
339}
340# endif
341
342
343/**
344 * Get the GS register.
345 * @returns GS.
346 */
347#if RT_INLINE_ASM_EXTERNAL
348DECLASM(RTSEL) ASMGetGS(void);
349#else
350DECLINLINE(RTSEL) ASMGetGS(void)
351{
352 RTSEL SelGS;
353# if RT_INLINE_ASM_GNU_STYLE
354 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
355# else
356 __asm
357 {
358 mov ax, gs
359 mov [SelGS], ax
360 }
361# endif
362 return SelGS;
363}
364#endif
365
366
367/**
368 * Get the SS register.
369 * @returns SS.
370 */
371#if RT_INLINE_ASM_EXTERNAL
372DECLASM(RTSEL) ASMGetSS(void);
373#else
374DECLINLINE(RTSEL) ASMGetSS(void)
375{
376 RTSEL SelSS;
377# if RT_INLINE_ASM_GNU_STYLE
378 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
379# else
380 __asm
381 {
382 mov ax, ss
383 mov [SelSS], ax
384 }
385# endif
386 return SelSS;
387}
388#endif
389
390
391/**
392 * Get the TR register.
393 * @returns TR.
394 */
395#if RT_INLINE_ASM_EXTERNAL
396DECLASM(RTSEL) ASMGetTR(void);
397#else
398DECLINLINE(RTSEL) ASMGetTR(void)
399{
400 RTSEL SelTR;
401# if RT_INLINE_ASM_GNU_STYLE
402 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
403# else
404 __asm
405 {
406 str ax
407 mov [SelTR], ax
408 }
409# endif
410 return SelTR;
411}
412#endif
413
414
415/**
416 * Get the [RE]FLAGS register.
417 * @returns [RE]FLAGS.
418 */
419#if RT_INLINE_ASM_EXTERNAL
420DECLASM(RTCCUINTREG) ASMGetFlags(void);
421#else
422DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
423{
424 RTCCUINTREG uFlags;
425# if RT_INLINE_ASM_GNU_STYLE
426# ifdef RT_ARCH_AMD64
427 __asm__ __volatile__("pushfq\n\t"
428 "popq %0\n\t"
429 : "=g" (uFlags));
430# else
431 __asm__ __volatile__("pushfl\n\t"
432 "popl %0\n\t"
433 : "=g" (uFlags));
434# endif
435# else
436 __asm
437 {
438# ifdef RT_ARCH_AMD64
439 pushfq
440 pop [uFlags]
441# else
442 pushfd
443 pop [uFlags]
444# endif
445 }
446# endif
447 return uFlags;
448}
449#endif
450
451
452/**
453 * Set the [RE]FLAGS register.
454 * @param uFlags The new [RE]FLAGS value.
455 */
456#if RT_INLINE_ASM_EXTERNAL
457DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
458#else
459DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
460{
461# if RT_INLINE_ASM_GNU_STYLE
462# ifdef RT_ARCH_AMD64
463 __asm__ __volatile__("pushq %0\n\t"
464 "popfq\n\t"
465 : : "g" (uFlags));
466# else
467 __asm__ __volatile__("pushl %0\n\t"
468 "popfl\n\t"
469 : : "g" (uFlags));
470# endif
471# else
472 __asm
473 {
474# ifdef RT_ARCH_AMD64
475 push [uFlags]
476 popfq
477# else
478 push [uFlags]
479 popfd
480# endif
481 }
482# endif
483}
484#endif
485
486
487/**
488 * Gets the content of the CPU timestamp counter register.
489 *
490 * @returns TSC.
491 */
492#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
493DECLASM(uint64_t) ASMReadTSC(void);
494#else
495DECLINLINE(uint64_t) ASMReadTSC(void)
496{
497 RTUINT64U u;
498# if RT_INLINE_ASM_GNU_STYLE
499 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
500# else
501# if RT_INLINE_ASM_USES_INTRIN
502 u.u = __rdtsc();
503# else
504 __asm
505 {
506 rdtsc
507 mov [u.s.Lo], eax
508 mov [u.s.Hi], edx
509 }
510# endif
511# endif
512 return u.u;
513}
514#endif
515
516
517/**
518 * Performs the cpuid instruction returning all registers.
519 *
520 * @param uOperator CPUID operation (eax).
521 * @param pvEAX Where to store eax.
522 * @param pvEBX Where to store ebx.
523 * @param pvECX Where to store ecx.
524 * @param pvEDX Where to store edx.
525 * @remark We're using void pointers to ease the use of special bitfield structures and such.
526 */
527#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
528DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
529#else
530DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
531{
532# if RT_INLINE_ASM_GNU_STYLE
533# ifdef RT_ARCH_AMD64
534 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
535 __asm__ ("cpuid\n\t"
536 : "=a" (uRAX),
537 "=b" (uRBX),
538 "=c" (uRCX),
539 "=d" (uRDX)
540 : "0" (uOperator));
541 *(uint32_t *)pvEAX = (uint32_t)uRAX;
542 *(uint32_t *)pvEBX = (uint32_t)uRBX;
543 *(uint32_t *)pvECX = (uint32_t)uRCX;
544 *(uint32_t *)pvEDX = (uint32_t)uRDX;
545# else
546 __asm__ ("xchgl %%ebx, %1\n\t"
547 "cpuid\n\t"
548 "xchgl %%ebx, %1\n\t"
549 : "=a" (*(uint32_t *)pvEAX),
550 "=r" (*(uint32_t *)pvEBX),
551 "=c" (*(uint32_t *)pvECX),
552 "=d" (*(uint32_t *)pvEDX)
553 : "0" (uOperator));
554# endif
555
556# elif RT_INLINE_ASM_USES_INTRIN
557 int aInfo[4];
558 __cpuid(aInfo, uOperator);
559 *(uint32_t *)pvEAX = aInfo[0];
560 *(uint32_t *)pvEBX = aInfo[1];
561 *(uint32_t *)pvECX = aInfo[2];
562 *(uint32_t *)pvEDX = aInfo[3];
563
564# else
565 uint32_t uEAX;
566 uint32_t uEBX;
567 uint32_t uECX;
568 uint32_t uEDX;
569 __asm
570 {
571 push ebx
572 mov eax, [uOperator]
573 cpuid
574 mov [uEAX], eax
575 mov [uEBX], ebx
576 mov [uECX], ecx
577 mov [uEDX], edx
578 pop ebx
579 }
580 *(uint32_t *)pvEAX = uEAX;
581 *(uint32_t *)pvEBX = uEBX;
582 *(uint32_t *)pvECX = uECX;
583 *(uint32_t *)pvEDX = uEDX;
584# endif
585}
586#endif
587
588
589/**
590 * Performs the cpuid instruction returning ecx and edx.
591 *
592 * @param uOperator CPUID operation (eax).
593 * @param pvECX Where to store ecx.
594 * @param pvEDX Where to store edx.
595 * @remark We're using void pointers to ease the use of special bitfield structures and such.
596 */
597#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
598DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
599#else
600DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
601{
602 uint32_t uEBX;
603 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
604}
605#endif
606
607
608/**
609 * Performs the cpuid instruction returning edx.
610 *
611 * @param uOperator CPUID operation (eax).
612 * @returns EDX after cpuid operation.
613 */
614#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
615DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
616#else
617DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
618{
619 RTCCUINTREG xDX;
620# if RT_INLINE_ASM_GNU_STYLE
621# ifdef RT_ARCH_AMD64
622 RTCCUINTREG uSpill;
623 __asm__ ("cpuid"
624 : "=a" (uSpill),
625 "=d" (xDX)
626 : "0" (uOperator)
627 : "rbx", "rcx");
628# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: PIC by default. */
629 __asm__ ("push %%ebx\n\t"
630 "cpuid\n\t"
631 "pop %%ebx\n\t"
632 : "=a" (uOperator),
633 "=d" (xDX)
634 : "0" (uOperator)
635 : "ecx");
636# else
637 __asm__ ("cpuid"
638 : "=a" (uOperator),
639 "=d" (xDX)
640 : "0" (uOperator)
641 : "ebx", "ecx");
642# endif
643
644# elif RT_INLINE_ASM_USES_INTRIN
645 int aInfo[4];
646 __cpuid(aInfo, uOperator);
647 xDX = aInfo[3];
648
649# else
650 __asm
651 {
652 push ebx
653 mov eax, [uOperator]
654 cpuid
655 mov [xDX], edx
656 pop ebx
657 }
658# endif
659 return (uint32_t)xDX;
660}
661#endif
662
663
664/**
665 * Performs the cpuid instruction returning ecx.
666 *
667 * @param uOperator CPUID operation (eax).
668 * @returns ECX after cpuid operation.
669 */
670#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
671DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
672#else
673DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
674{
675 RTCCUINTREG xCX;
676# if RT_INLINE_ASM_GNU_STYLE
677# ifdef RT_ARCH_AMD64
678 RTCCUINTREG uSpill;
679 __asm__ ("cpuid"
680 : "=a" (uSpill),
681 "=c" (xCX)
682 : "0" (uOperator)
683 : "rbx", "rdx");
684# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
685 __asm__ ("push %%ebx\n\t"
686 "cpuid\n\t"
687 "pop %%ebx\n\t"
688 : "=a" (uOperator),
689 "=c" (xCX)
690 : "0" (uOperator)
691 : "edx");
692# else
693 __asm__ ("cpuid"
694 : "=a" (uOperator),
695 "=c" (xCX)
696 : "0" (uOperator)
697 : "ebx", "edx");
698
699# endif
700
701# elif RT_INLINE_ASM_USES_INTRIN
702 int aInfo[4];
703 __cpuid(aInfo, uOperator);
704 xCX = aInfo[2];
705
706# else
707 __asm
708 {
709 push ebx
710 mov eax, [uOperator]
711 cpuid
712 mov [xCX], ecx
713 pop ebx
714 }
715# endif
716 return (uint32_t)xCX;
717}
718#endif
719
720
721/**
722 * Checks if the current CPU supports CPUID.
723 *
724 * @returns true if CPUID is supported.
725 */
726DECLINLINE(bool) ASMHasCpuId(void)
727{
728#ifdef RT_ARCH_AMD64
729 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
730#else /* !RT_ARCH_AMD64 */
731 bool fRet = false;
732# if RT_INLINE_ASM_GNU_STYLE
733 uint32_t u1;
734 uint32_t u2;
735 __asm__ ("pushf\n\t"
736 "pop %1\n\t"
737 "mov %1, %2\n\t"
738 "xorl $0x200000, %1\n\t"
739 "push %1\n\t"
740 "popf\n\t"
741 "pushf\n\t"
742 "pop %1\n\t"
743 "cmpl %1, %2\n\t"
744 "setne %0\n\t"
745 "push %2\n\t"
746 "popf\n\t"
747 : "=m" (fRet), "=r" (u1), "=r" (u2));
748# else
749 __asm
750 {
751 pushfd
752 pop eax
753 mov ebx, eax
754 xor eax, 0200000h
755 push eax
756 popfd
757 pushfd
758 pop eax
759 cmp eax, ebx
760 setne fRet
761 push ebx
762 popfd
763 }
764# endif
765 return fRet;
766#endif /* !RT_ARCH_AMD64 */
767}
768
769
770/**
771 * Gets the APIC ID of the current CPU.
772 *
773 * @returns the APIC ID.
774 */
775#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
776DECLASM(uint8_t) ASMGetApicId(void);
777#else
778DECLINLINE(uint8_t) ASMGetApicId(void)
779{
780 RTCCUINTREG xBX;
781# if RT_INLINE_ASM_GNU_STYLE
782# ifdef RT_ARCH_AMD64
783 RTCCUINTREG uSpill;
784 __asm__ ("cpuid"
785 : "=a" (uSpill),
786 "=b" (xBX)
787 : "0" (1)
788 : "rcx", "rdx");
789# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__)
790 RTCCUINTREG uSpill;
791 __asm__ ("mov %%ebx,%1\n\t"
792 "cpuid\n\t"
793 "xchgl %%ebx,%1\n\t"
794 : "=a" (uSpill),
795 "=r" (xBX)
796 : "0" (1)
797 : "ecx", "edx");
798# else
799 RTCCUINTREG uSpill;
800 __asm__ ("cpuid"
801 : "=a" (uSpill),
802 "=b" (xBX)
803 : "0" (1)
804 : "ecx", "edx");
805# endif
806
807# elif RT_INLINE_ASM_USES_INTRIN
808 int aInfo[4];
809 __cpuid(aInfo, 1);
810 xBX = aInfo[1];
811
812# else
813 __asm
814 {
815 push ebx
816 mov eax, 1
817 cpuid
818 mov [xBX], ebx
819 pop ebx
820 }
821# endif
822 return (uint8_t)(xBX >> 24);
823}
824#endif
825
826/**
827 * Get cr0.
828 * @returns cr0.
829 */
830#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
831DECLASM(RTCCUINTREG) ASMGetCR0(void);
832#else
833DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
834{
835 RTCCUINTREG uCR0;
836# if RT_INLINE_ASM_USES_INTRIN
837 uCR0 = __readcr0();
838
839# elif RT_INLINE_ASM_GNU_STYLE
840# ifdef RT_ARCH_AMD64
841 __asm__ ("movq %%cr0, %0\t\n" : "=r" (uCR0));
842# else
843 __asm__ ("movl %%cr0, %0\t\n" : "=r" (uCR0));
844# endif
845# else
846 __asm
847 {
848# ifdef RT_ARCH_AMD64
849 mov rax, cr0
850 mov [uCR0], rax
851# else
852 mov eax, cr0
853 mov [uCR0], eax
854# endif
855 }
856# endif
857 return uCR0;
858}
859#endif
860
861
862/**
863 * Sets the CR0 register.
864 * @param uCR0 The new CR0 value.
865 */
866#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
867DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
868#else
869DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
870{
871# if RT_INLINE_ASM_USES_INTRIN
872 __writecr0(uCR0);
873
874# elif RT_INLINE_ASM_GNU_STYLE
875# ifdef RT_ARCH_AMD64
876 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
877# else
878 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
879# endif
880# else
881 __asm
882 {
883# ifdef RT_ARCH_AMD64
884 mov rax, [uCR0]
885 mov cr0, rax
886# else
887 mov eax, [uCR0]
888 mov cr0, eax
889# endif
890 }
891# endif
892}
893#endif
894
895
896/**
897 * Get cr2.
898 * @returns cr2.
899 */
900#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
901DECLASM(RTCCUINTREG) ASMGetCR2(void);
902#else
903DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
904{
905 RTCCUINTREG uCR2;
906# if RT_INLINE_ASM_USES_INTRIN
907 uCR2 = __readcr2();
908
909# elif RT_INLINE_ASM_GNU_STYLE
910# ifdef RT_ARCH_AMD64
911 __asm__ ("movq %%cr2, %0\t\n" : "=r" (uCR2));
912# else
913 __asm__ ("movl %%cr2, %0\t\n" : "=r" (uCR2));
914# endif
915# else
916 __asm
917 {
918# ifdef RT_ARCH_AMD64
919 mov rax, cr2
920 mov [uCR2], rax
921# else
922 mov eax, cr2
923 mov [uCR2], eax
924# endif
925 }
926# endif
927 return uCR2;
928}
929#endif
930
931
932/**
933 * Sets the CR2 register.
934 * @param uCR2 The new CR0 value.
935 */
936#if RT_INLINE_ASM_EXTERNAL
937DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
938#else
939DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
940{
941# if RT_INLINE_ASM_GNU_STYLE
942# ifdef RT_ARCH_AMD64
943 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
944# else
945 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
946# endif
947# else
948 __asm
949 {
950# ifdef RT_ARCH_AMD64
951 mov rax, [uCR2]
952 mov cr2, rax
953# else
954 mov eax, [uCR2]
955 mov cr2, eax
956# endif
957 }
958# endif
959}
960#endif
961
962
963/**
964 * Get cr3.
965 * @returns cr3.
966 */
967#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
968DECLASM(RTCCUINTREG) ASMGetCR3(void);
969#else
970DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
971{
972 RTCCUINTREG uCR3;
973# if RT_INLINE_ASM_USES_INTRIN
974 uCR3 = __readcr3();
975
976# elif RT_INLINE_ASM_GNU_STYLE
977# ifdef RT_ARCH_AMD64
978 __asm__ ("movq %%cr3, %0\t\n" : "=r" (uCR3));
979# else
980 __asm__ ("movl %%cr3, %0\t\n" : "=r" (uCR3));
981# endif
982# else
983 __asm
984 {
985# ifdef RT_ARCH_AMD64
986 mov rax, cr3
987 mov [uCR3], rax
988# else
989 mov eax, cr3
990 mov [uCR3], eax
991# endif
992 }
993# endif
994 return uCR3;
995}
996#endif
997
998
999/**
1000 * Sets the CR3 register.
1001 *
1002 * @param uCR3 New CR3 value.
1003 */
1004#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1005DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1006#else
1007DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1008{
1009# if RT_INLINE_ASM_USES_INTRIN
1010 __writecr3(uCR3);
1011
1012# elif RT_INLINE_ASM_GNU_STYLE
1013# ifdef RT_ARCH_AMD64
1014 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1015# else
1016 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1017# endif
1018# else
1019 __asm
1020 {
1021# ifdef RT_ARCH_AMD64
1022 mov rax, [uCR3]
1023 mov cr3, rax
1024# else
1025 mov eax, [uCR3]
1026 mov cr3, eax
1027# endif
1028 }
1029# endif
1030}
1031#endif
1032
1033
1034/**
1035 * Reloads the CR3 register.
1036 */
1037#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1038DECLASM(void) ASMReloadCR3(void);
1039#else
1040DECLINLINE(void) ASMReloadCR3(void)
1041{
1042# if RT_INLINE_ASM_USES_INTRIN
1043 __writecr3(__readcr3());
1044
1045# elif RT_INLINE_ASM_GNU_STYLE
1046 RTCCUINTREG u;
1047# ifdef RT_ARCH_AMD64
1048 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1049 "movq %0, %%cr3\n\t"
1050 : "=r" (u));
1051# else
1052 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1053 "movl %0, %%cr3\n\t"
1054 : "=r" (u));
1055# endif
1056# else
1057 __asm
1058 {
1059# ifdef RT_ARCH_AMD64
1060 mov rax, cr3
1061 mov cr3, rax
1062# else
1063 mov eax, cr3
1064 mov cr3, eax
1065# endif
1066 }
1067# endif
1068}
1069#endif
1070
1071
1072/**
1073 * Get cr4.
1074 * @returns cr4.
1075 */
1076#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1077DECLASM(RTCCUINTREG) ASMGetCR4(void);
1078#else
1079DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1080{
1081 RTCCUINTREG uCR4;
1082# if RT_INLINE_ASM_USES_INTRIN
1083 uCR4 = __readcr4();
1084
1085# elif RT_INLINE_ASM_GNU_STYLE
1086# ifdef RT_ARCH_AMD64
1087 __asm__ ("movq %%cr4, %0\t\n" : "=r" (uCR4));
1088# else
1089 __asm__ ("movl %%cr4, %0\t\n" : "=r" (uCR4));
1090# endif
1091# else
1092 __asm
1093 {
1094# ifdef RT_ARCH_AMD64
1095 mov rax, cr4
1096 mov [uCR4], rax
1097# else
1098 push eax /* just in case */
1099 /*mov eax, cr4*/
1100 _emit 0x0f
1101 _emit 0x20
1102 _emit 0xe0
1103 mov [uCR4], eax
1104 pop eax
1105# endif
1106 }
1107# endif
1108 return uCR4;
1109}
1110#endif
1111
1112
1113/**
1114 * Sets the CR4 register.
1115 *
1116 * @param uCR4 New CR4 value.
1117 */
1118#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1119DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1120#else
1121DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1122{
1123# if RT_INLINE_ASM_USES_INTRIN
1124 __writecr4(uCR4);
1125
1126# elif RT_INLINE_ASM_GNU_STYLE
1127# ifdef RT_ARCH_AMD64
1128 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1129# else
1130 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1131# endif
1132# else
1133 __asm
1134 {
1135# ifdef RT_ARCH_AMD64
1136 mov rax, [uCR4]
1137 mov cr4, rax
1138# else
1139 mov eax, [uCR4]
1140 _emit 0x0F
1141 _emit 0x22
1142 _emit 0xE0 /* mov cr4, eax */
1143# endif
1144 }
1145# endif
1146}
1147#endif
1148
1149
1150/**
1151 * Get cr8.
1152 * @returns cr8.
1153 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1154 */
1155#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1156DECLASM(RTCCUINTREG) ASMGetCR8(void);
1157#else
1158DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1159{
1160# ifdef RT_ARCH_AMD64
1161 RTCCUINTREG uCR8;
1162# if RT_INLINE_ASM_USES_INTRIN
1163 uCR8 = __readcr8();
1164
1165# elif RT_INLINE_ASM_GNU_STYLE
1166 __asm__ ("movq %%cr8, %0\t\n" : "=r" (uCR8));
1167# else
1168 __asm
1169 {
1170 mov rax, cr8
1171 mov [uCR8], rax
1172 }
1173# endif
1174 return uCR8;
1175# else /* !RT_ARCH_AMD64 */
1176 return 0;
1177# endif /* !RT_ARCH_AMD64 */
1178}
1179#endif
1180
1181
1182/**
1183 * Enables interrupts (EFLAGS.IF).
1184 */
1185#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1186DECLASM(void) ASMIntEnable(void);
1187#else
1188DECLINLINE(void) ASMIntEnable(void)
1189{
1190# if RT_INLINE_ASM_GNU_STYLE
1191 __asm("sti\n");
1192# elif RT_INLINE_ASM_USES_INTRIN
1193 _enable();
1194# else
1195 __asm sti
1196# endif
1197}
1198#endif
1199
1200
1201/**
1202 * Disables interrupts (!EFLAGS.IF).
1203 */
1204#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1205DECLASM(void) ASMIntDisable(void);
1206#else
1207DECLINLINE(void) ASMIntDisable(void)
1208{
1209# if RT_INLINE_ASM_GNU_STYLE
1210 __asm("cli\n");
1211# elif RT_INLINE_ASM_USES_INTRIN
1212 _disable();
1213# else
1214 __asm cli
1215# endif
1216}
1217#endif
1218
1219
1220/**
1221 * Disables interrupts and returns previous xFLAGS.
1222 */
1223#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1224DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1225#else
1226DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1227{
1228 RTCCUINTREG xFlags;
1229# if RT_INLINE_ASM_GNU_STYLE
1230# ifdef RT_ARCH_AMD64
1231 __asm__ __volatile__("pushfq\n\t"
1232 "cli\n\t"
1233 "popq %0\n\t"
1234 : "=m" (xFlags));
1235# else
1236 __asm__ __volatile__("pushfl\n\t"
1237 "cli\n\t"
1238 "popl %0\n\t"
1239 : "=m" (xFlags));
1240# endif
1241# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1242 xFlags = ASMGetFlags();
1243 _disable();
1244# else
1245 __asm {
1246 pushfd
1247 cli
1248 pop [xFlags]
1249 }
1250# endif
1251 return xFlags;
1252}
1253#endif
1254
1255
1256/**
1257 * Reads a machine specific register.
1258 *
1259 * @returns Register content.
1260 * @param uRegister Register to read.
1261 */
1262#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1263DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1264#else
1265DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1266{
1267 RTUINT64U u;
1268# if RT_INLINE_ASM_GNU_STYLE
1269 __asm__ ("rdmsr\n\t"
1270 : "=a" (u.s.Lo),
1271 "=d" (u.s.Hi)
1272 : "c" (uRegister));
1273
1274# elif RT_INLINE_ASM_USES_INTRIN
1275 u.u = __readmsr(uRegister);
1276
1277# else
1278 __asm
1279 {
1280 mov ecx, [uRegister]
1281 rdmsr
1282 mov [u.s.Lo], eax
1283 mov [u.s.Hi], edx
1284 }
1285# endif
1286
1287 return u.u;
1288}
1289#endif
1290
1291
1292/**
1293 * Writes a machine specific register.
1294 *
1295 * @returns Register content.
1296 * @param uRegister Register to write to.
1297 * @param u64Val Value to write.
1298 */
1299#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1300DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1301#else
1302DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1303{
1304 RTUINT64U u;
1305
1306 u.u = u64Val;
1307# if RT_INLINE_ASM_GNU_STYLE
1308 __asm__ __volatile__("wrmsr\n\t"
1309 ::"a" (u.s.Lo),
1310 "d" (u.s.Hi),
1311 "c" (uRegister));
1312
1313# elif RT_INLINE_ASM_USES_INTRIN
1314 __writemsr(uRegister, u.u);
1315
1316# else
1317 __asm
1318 {
1319 mov ecx, [uRegister]
1320 mov edx, [u.s.Hi]
1321 mov eax, [u.s.Lo]
1322 wrmsr
1323 }
1324# endif
1325}
1326#endif
1327
1328
1329/**
1330 * Reads low part of a machine specific register.
1331 *
1332 * @returns Register content.
1333 * @param uRegister Register to read.
1334 */
1335#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1336DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1337#else
1338DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1339{
1340 uint32_t u32;
1341# if RT_INLINE_ASM_GNU_STYLE
1342 __asm__ ("rdmsr\n\t"
1343 : "=a" (u32)
1344 : "c" (uRegister)
1345 : "edx");
1346
1347# elif RT_INLINE_ASM_USES_INTRIN
1348 u32 = (uint32_t)__readmsr(uRegister);
1349
1350#else
1351 __asm
1352 {
1353 mov ecx, [uRegister]
1354 rdmsr
1355 mov [u32], eax
1356 }
1357# endif
1358
1359 return u32;
1360}
1361#endif
1362
1363
1364/**
1365 * Reads high part of a machine specific register.
1366 *
1367 * @returns Register content.
1368 * @param uRegister Register to read.
1369 */
1370#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1371DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1372#else
1373DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1374{
1375 uint32_t u32;
1376# if RT_INLINE_ASM_GNU_STYLE
1377 __asm__ ("rdmsr\n\t"
1378 : "=d" (u32)
1379 : "c" (uRegister)
1380 : "eax");
1381
1382# elif RT_INLINE_ASM_USES_INTRIN
1383 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1384
1385# else
1386 __asm
1387 {
1388 mov ecx, [uRegister]
1389 rdmsr
1390 mov [u32], edx
1391 }
1392# endif
1393
1394 return u32;
1395}
1396#endif
1397
1398
1399/**
1400 * Gets dr7.
1401 *
1402 * @returns dr7.
1403 */
1404#if RT_INLINE_ASM_EXTERNAL
1405DECLASM(RTCCUINTREG) ASMGetDR7(void);
1406#else
1407DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1408{
1409 RTCCUINTREG uDR7;
1410# if RT_INLINE_ASM_GNU_STYLE
1411# ifdef RT_ARCH_AMD64
1412 __asm__ ("movq %%dr7, %0\n\t" : "=r" (uDR7));
1413# else
1414 __asm__ ("movl %%dr7, %0\n\t" : "=r" (uDR7));
1415# endif
1416# else
1417 __asm
1418 {
1419# ifdef RT_ARCH_AMD64
1420 mov rax, dr7
1421 mov [uDR7], rax
1422# else
1423 mov eax, dr7
1424 mov [uDR7], eax
1425# endif
1426 }
1427# endif
1428 return uDR7;
1429}
1430#endif
1431
1432
1433/**
1434 * Gets dr6.
1435 *
1436 * @returns dr6.
1437 */
1438#if RT_INLINE_ASM_EXTERNAL
1439DECLASM(RTCCUINTREG) ASMGetDR6(void);
1440#else
1441DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1442{
1443 RTCCUINTREG uDR6;
1444# if RT_INLINE_ASM_GNU_STYLE
1445# ifdef RT_ARCH_AMD64
1446 __asm__ ("movq %%dr6, %0\n\t" : "=r" (uDR6));
1447# else
1448 __asm__ ("movl %%dr6, %0\n\t" : "=r" (uDR6));
1449# endif
1450# else
1451 __asm
1452 {
1453# ifdef RT_ARCH_AMD64
1454 mov rax, dr6
1455 mov [uDR6], rax
1456# else
1457 mov eax, dr6
1458 mov [uDR6], eax
1459# endif
1460 }
1461# endif
1462 return uDR6;
1463}
1464#endif
1465
1466
1467/**
1468 * Reads and clears DR6.
1469 *
1470 * @returns DR6.
1471 */
1472#if RT_INLINE_ASM_EXTERNAL
1473DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1474#else
1475DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1476{
1477 RTCCUINTREG uDR6;
1478# if RT_INLINE_ASM_GNU_STYLE
1479 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1480# ifdef RT_ARCH_AMD64
1481 __asm__ ("movq %%dr6, %0\n\t"
1482 "movq %1, %%dr6\n\t"
1483 : "=r" (uDR6)
1484 : "r" (uNewValue));
1485# else
1486 __asm__ ("movl %%dr6, %0\n\t"
1487 "movl %1, %%dr6\n\t"
1488 : "=r" (uDR6)
1489 : "r" (uNewValue));
1490# endif
1491# else
1492 __asm
1493 {
1494# ifdef RT_ARCH_AMD64
1495 mov rax, dr6
1496 mov [uDR6], rax
1497 mov rcx, rax
1498 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1499 mov dr6, rcx
1500# else
1501 mov eax, dr6
1502 mov [uDR6], eax
1503 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1504 mov dr6, ecx
1505# endif
1506 }
1507# endif
1508 return uDR6;
1509}
1510#endif
1511
1512
1513/**
1514 * Compiler memory barrier.
1515 *
1516 * Ensure that the compiler does not use any cached (register/tmp stack) memory
1517 * values or any outstanding writes when returning from this function.
1518 *
1519 * This function must be used if non-volatile data is modified by a
1520 * device or the VMM. Typical cases are port access, MMIO access,
1521 * trapping instruction, etc.
1522 */
1523#if RT_INLINE_ASM_GNU_STYLE
1524# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
1525#elif RT_INLINE_ASM_USES_INTRIN
1526# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
1527#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
1528DECLINLINE(void) ASMCompilerBarrier(void)
1529{
1530 __asm
1531 {
1532 }
1533}
1534#endif
1535
1536
1537/**
1538 * Writes a 8-bit unsigned integer to an I/O port.
1539 *
1540 * @param Port I/O port to read from.
1541 * @param u8 8-bit integer to write.
1542 */
1543#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1544DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1545#else
1546DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1547{
1548# if RT_INLINE_ASM_GNU_STYLE
1549 __asm__ __volatile__("outb %b1, %w0\n\t"
1550 :: "Nd" (Port),
1551 "a" (u8));
1552
1553# elif RT_INLINE_ASM_USES_INTRIN
1554 __outbyte(Port, u8);
1555
1556# else
1557 __asm
1558 {
1559 mov dx, [Port]
1560 mov al, [u8]
1561 out dx, al
1562 }
1563# endif
1564}
1565#endif
1566
1567
1568/**
1569 * Gets a 8-bit unsigned integer from an I/O port.
1570 *
1571 * @returns 8-bit integer.
1572 * @param Port I/O port to read from.
1573 */
1574#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1575DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1576#else
1577DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1578{
1579 uint8_t u8;
1580# if RT_INLINE_ASM_GNU_STYLE
1581 __asm__ __volatile__("inb %w1, %b0\n\t"
1582 : "=a" (u8)
1583 : "Nd" (Port));
1584
1585# elif RT_INLINE_ASM_USES_INTRIN
1586 u8 = __inbyte(Port);
1587
1588# else
1589 __asm
1590 {
1591 mov dx, [Port]
1592 in al, dx
1593 mov [u8], al
1594 }
1595# endif
1596 return u8;
1597}
1598#endif
1599
1600
1601/**
1602 * Writes a 16-bit unsigned integer to an I/O port.
1603 *
1604 * @param Port I/O port to read from.
1605 * @param u16 16-bit integer to write.
1606 */
1607#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1608DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1609#else
1610DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1611{
1612# if RT_INLINE_ASM_GNU_STYLE
1613 __asm__ __volatile__("outw %w1, %w0\n\t"
1614 :: "Nd" (Port),
1615 "a" (u16));
1616
1617# elif RT_INLINE_ASM_USES_INTRIN
1618 __outword(Port, u16);
1619
1620# else
1621 __asm
1622 {
1623 mov dx, [Port]
1624 mov ax, [u16]
1625 out dx, ax
1626 }
1627# endif
1628}
1629#endif
1630
1631
1632/**
1633 * Gets a 16-bit unsigned integer from an I/O port.
1634 *
1635 * @returns 16-bit integer.
1636 * @param Port I/O port to read from.
1637 */
1638#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1639DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1640#else
1641DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1642{
1643 uint16_t u16;
1644# if RT_INLINE_ASM_GNU_STYLE
1645 __asm__ __volatile__("inw %w1, %w0\n\t"
1646 : "=a" (u16)
1647 : "Nd" (Port));
1648
1649# elif RT_INLINE_ASM_USES_INTRIN
1650 u16 = __inword(Port);
1651
1652# else
1653 __asm
1654 {
1655 mov dx, [Port]
1656 in ax, dx
1657 mov [u16], ax
1658 }
1659# endif
1660 return u16;
1661}
1662#endif
1663
1664
1665/**
1666 * Writes a 32-bit unsigned integer to an I/O port.
1667 *
1668 * @param Port I/O port to read from.
1669 * @param u32 32-bit integer to write.
1670 */
1671#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1672DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1673#else
1674DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1675{
1676# if RT_INLINE_ASM_GNU_STYLE
1677 __asm__ __volatile__("outl %1, %w0\n\t"
1678 :: "Nd" (Port),
1679 "a" (u32));
1680
1681# elif RT_INLINE_ASM_USES_INTRIN
1682 __outdword(Port, u32);
1683
1684# else
1685 __asm
1686 {
1687 mov dx, [Port]
1688 mov eax, [u32]
1689 out dx, eax
1690 }
1691# endif
1692}
1693#endif
1694
1695
1696/**
1697 * Gets a 32-bit unsigned integer from an I/O port.
1698 *
1699 * @returns 32-bit integer.
1700 * @param Port I/O port to read from.
1701 */
1702#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1703DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1704#else
1705DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1706{
1707 uint32_t u32;
1708# if RT_INLINE_ASM_GNU_STYLE
1709 __asm__ __volatile__("inl %w1, %0\n\t"
1710 : "=a" (u32)
1711 : "Nd" (Port));
1712
1713# elif RT_INLINE_ASM_USES_INTRIN
1714 u32 = __indword(Port);
1715
1716# else
1717 __asm
1718 {
1719 mov dx, [Port]
1720 in eax, dx
1721 mov [u32], eax
1722 }
1723# endif
1724 return u32;
1725}
1726#endif
1727
1728
1729/**
1730 * Atomically Exchange an unsigned 8-bit value.
1731 *
1732 * @returns Current *pu8 value
1733 * @param pu8 Pointer to the 8-bit variable to update.
1734 * @param u8 The 8-bit value to assign to *pu8.
1735 */
1736#if RT_INLINE_ASM_EXTERNAL
1737DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1738#else
1739DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1740{
1741# if RT_INLINE_ASM_GNU_STYLE
1742 __asm__ __volatile__("xchgb %0, %1\n\t"
1743 : "=m" (*pu8),
1744 "=r" (u8)
1745 : "1" (u8));
1746# else
1747 __asm
1748 {
1749# ifdef RT_ARCH_AMD64
1750 mov rdx, [pu8]
1751 mov al, [u8]
1752 xchg [rdx], al
1753 mov [u8], al
1754# else
1755 mov edx, [pu8]
1756 mov al, [u8]
1757 xchg [edx], al
1758 mov [u8], al
1759# endif
1760 }
1761# endif
1762 return u8;
1763}
1764#endif
1765
1766
1767/**
1768 * Atomically Exchange a signed 8-bit value.
1769 *
1770 * @returns Current *pu8 value
1771 * @param pi8 Pointer to the 8-bit variable to update.
1772 * @param i8 The 8-bit value to assign to *pi8.
1773 */
1774DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1775{
1776 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1777}
1778
1779
1780/**
1781 * Atomically Exchange a bool value.
1782 *
1783 * @returns Current *pf value
1784 * @param pf Pointer to the 8-bit variable to update.
1785 * @param f The 8-bit value to assign to *pi8.
1786 */
1787DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
1788{
1789#ifdef _MSC_VER
1790 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1791#else
1792 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1793#endif
1794}
1795
1796
1797/**
1798 * Atomically Exchange an unsigned 16-bit value.
1799 *
1800 * @returns Current *pu16 value
1801 * @param pu16 Pointer to the 16-bit variable to update.
1802 * @param u16 The 16-bit value to assign to *pu16.
1803 */
1804#if RT_INLINE_ASM_EXTERNAL
1805DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
1806#else
1807DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
1808{
1809# if RT_INLINE_ASM_GNU_STYLE
1810 __asm__ __volatile__("xchgw %0, %1\n\t"
1811 : "=m" (*pu16),
1812 "=r" (u16)
1813 : "1" (u16));
1814# else
1815 __asm
1816 {
1817# ifdef RT_ARCH_AMD64
1818 mov rdx, [pu16]
1819 mov ax, [u16]
1820 xchg [rdx], ax
1821 mov [u16], ax
1822# else
1823 mov edx, [pu16]
1824 mov ax, [u16]
1825 xchg [edx], ax
1826 mov [u16], ax
1827# endif
1828 }
1829# endif
1830 return u16;
1831}
1832#endif
1833
1834
1835/**
1836 * Atomically Exchange a signed 16-bit value.
1837 *
1838 * @returns Current *pu16 value
1839 * @param pi16 Pointer to the 16-bit variable to update.
1840 * @param i16 The 16-bit value to assign to *pi16.
1841 */
1842DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
1843{
1844 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
1845}
1846
1847
1848/**
1849 * Atomically Exchange an unsigned 32-bit value.
1850 *
1851 * @returns Current *pu32 value
1852 * @param pu32 Pointer to the 32-bit variable to update.
1853 * @param u32 The 32-bit value to assign to *pu32.
1854 */
1855#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1856DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
1857#else
1858DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
1859{
1860# if RT_INLINE_ASM_GNU_STYLE
1861 __asm__ __volatile__("xchgl %0, %1\n\t"
1862 : "=m" (*pu32),
1863 "=r" (u32)
1864 : "1" (u32));
1865
1866# elif RT_INLINE_ASM_USES_INTRIN
1867 u32 = _InterlockedExchange((long *)pu32, u32);
1868
1869# else
1870 __asm
1871 {
1872# ifdef RT_ARCH_AMD64
1873 mov rdx, [pu32]
1874 mov eax, u32
1875 xchg [rdx], eax
1876 mov [u32], eax
1877# else
1878 mov edx, [pu32]
1879 mov eax, u32
1880 xchg [edx], eax
1881 mov [u32], eax
1882# endif
1883 }
1884# endif
1885 return u32;
1886}
1887#endif
1888
1889
1890/**
1891 * Atomically Exchange a signed 32-bit value.
1892 *
1893 * @returns Current *pu32 value
1894 * @param pi32 Pointer to the 32-bit variable to update.
1895 * @param i32 The 32-bit value to assign to *pi32.
1896 */
1897DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
1898{
1899 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
1900}
1901
1902
1903/**
1904 * Atomically Exchange an unsigned 64-bit value.
1905 *
1906 * @returns Current *pu64 value
1907 * @param pu64 Pointer to the 64-bit variable to update.
1908 * @param u64 The 64-bit value to assign to *pu64.
1909 */
1910#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1911DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
1912#else
1913DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
1914{
1915# if defined(RT_ARCH_AMD64)
1916# if RT_INLINE_ASM_USES_INTRIN
1917 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
1918
1919# elif RT_INLINE_ASM_GNU_STYLE
1920 __asm__ __volatile__("xchgq %0, %1\n\t"
1921 : "=m" (*pu64),
1922 "=r" (u64)
1923 : "1" (u64));
1924# else
1925 __asm
1926 {
1927 mov rdx, [pu64]
1928 mov rax, [u64]
1929 xchg [rdx], rax
1930 mov [u64], rax
1931 }
1932# endif
1933# else /* !RT_ARCH_AMD64 */
1934# if RT_INLINE_ASM_GNU_STYLE
1935# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
1936 uint32_t u32 = (uint32_t)u64;
1937 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
1938 "xchgl %%ebx, %3\n\t"
1939 "1:\n\t"
1940 "lock; cmpxchg8b (%5)\n\t"
1941 "jnz 1b\n\t"
1942 "xchgl %%ebx, %3\n\t"
1943 /*"xchgl %%esi, %5\n\t"*/
1944 : "=A" (u64),
1945 "=m" (*pu64)
1946 : "0" (*pu64),
1947 "m" ( u32 ),
1948 "c" ( (uint32_t)(u64 >> 32) ),
1949 "S" (pu64) );
1950# else /* !PIC */
1951 __asm__ __volatile__("1:\n\t"
1952 "lock; cmpxchg8b %1\n\t"
1953 "jnz 1b\n\t"
1954 : "=A" (u64),
1955 "=m" (*pu64)
1956 : "0" (*pu64),
1957 "b" ( (uint32_t)u64 ),
1958 "c" ( (uint32_t)(u64 >> 32) ));
1959# endif
1960# else
1961 __asm
1962 {
1963 mov ebx, dword ptr [u64]
1964 mov ecx, dword ptr [u64 + 4]
1965 mov edi, pu64
1966 mov eax, dword ptr [edi]
1967 mov edx, dword ptr [edi + 4]
1968 retry:
1969 lock cmpxchg8b [edi]
1970 jnz retry
1971 mov dword ptr [u64], eax
1972 mov dword ptr [u64 + 4], edx
1973 }
1974# endif
1975# endif /* !RT_ARCH_AMD64 */
1976 return u64;
1977}
1978#endif
1979
1980
1981/**
1982 * Atomically Exchange an signed 64-bit value.
1983 *
1984 * @returns Current *pi64 value
1985 * @param pi64 Pointer to the 64-bit variable to update.
1986 * @param i64 The 64-bit value to assign to *pi64.
1987 */
1988DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
1989{
1990 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
1991}
1992
1993
1994#ifdef RT_ARCH_AMD64
1995/**
1996 * Atomically Exchange an unsigned 128-bit value.
1997 *
1998 * @returns Current *pu128.
1999 * @param pu128 Pointer to the 128-bit variable to update.
2000 * @param u128 The 128-bit value to assign to *pu128.
2001 *
2002 * @remark We cannot really assume that any hardware supports this. Nor do I have
2003 * GAS support for it. So, for the time being we'll BREAK the atomic
2004 * bit of this function and use two 64-bit exchanges instead.
2005 */
2006# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2007DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2008# else
2009DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2010{
2011 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2012 {
2013 /** @todo this is clumsy code */
2014 RTUINT128U u128Ret;
2015 u128Ret.u = u128;
2016 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2017 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2018 return u128Ret.u;
2019 }
2020#if 0 /* later? */
2021 else
2022 {
2023# if RT_INLINE_ASM_GNU_STYLE
2024 __asm__ __volatile__("1:\n\t"
2025 "lock; cmpxchg8b %1\n\t"
2026 "jnz 1b\n\t"
2027 : "=A" (u128),
2028 "=m" (*pu128)
2029 : "0" (*pu128),
2030 "b" ( (uint64_t)u128 ),
2031 "c" ( (uint64_t)(u128 >> 64) ));
2032# else
2033 __asm
2034 {
2035 mov rbx, dword ptr [u128]
2036 mov rcx, dword ptr [u128 + 4]
2037 mov rdi, pu128
2038 mov rax, dword ptr [rdi]
2039 mov rdx, dword ptr [rdi + 4]
2040 retry:
2041 lock cmpxchg16b [rdi]
2042 jnz retry
2043 mov dword ptr [u128], rax
2044 mov dword ptr [u128 + 4], rdx
2045 }
2046# endif
2047 }
2048 return u128;
2049#endif
2050}
2051# endif
2052#endif /* RT_ARCH_AMD64 */
2053
2054
2055/**
2056 * Atomically Reads a unsigned 64-bit value.
2057 *
2058 * @returns Current *pu64 value
2059 * @param pu64 Pointer to the 64-bit variable to read.
2060 * The memory pointed to must be writable.
2061 * @remark This will fault if the memory is read-only!
2062 */
2063#if RT_INLINE_ASM_EXTERNAL
2064DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
2065#else
2066DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
2067{
2068 uint64_t u64;
2069# ifdef RT_ARCH_AMD64
2070# if RT_INLINE_ASM_GNU_STYLE
2071 __asm__ __volatile__("movq %1, %0\n\t"
2072 : "=r" (u64)
2073 : "m" (*pu64));
2074# else
2075 __asm
2076 {
2077 mov rdx, [pu64]
2078 mov rax, [rdx]
2079 mov [u64], rax
2080 }
2081# endif
2082# else /* !RT_ARCH_AMD64 */
2083# if RT_INLINE_ASM_GNU_STYLE
2084# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2085 uint32_t u32EBX = 0;
2086 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2087 "lock; cmpxchg8b (%5)\n\t"
2088 "xchgl %%ebx, %3\n\t"
2089 : "=A" (u64),
2090 "=m" (*pu64)
2091 : "0" (0),
2092 "m" (u32EBX),
2093 "c" (0),
2094 "S" (pu64));
2095# else /* !PIC */
2096 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2097 : "=A" (u64),
2098 "=m" (*pu64)
2099 : "0" (0),
2100 "b" (0),
2101 "c" (0));
2102# endif
2103# else
2104 __asm
2105 {
2106 xor eax, eax
2107 xor edx, edx
2108 mov edi, pu64
2109 xor ecx, ecx
2110 xor ebx, ebx
2111 lock cmpxchg8b [edi]
2112 mov dword ptr [u64], eax
2113 mov dword ptr [u64 + 4], edx
2114 }
2115# endif
2116# endif /* !RT_ARCH_AMD64 */
2117 return u64;
2118}
2119#endif
2120
2121
2122/**
2123 * Atomically Reads a signed 64-bit value.
2124 *
2125 * @returns Current *pi64 value
2126 * @param pi64 Pointer to the 64-bit variable to read.
2127 * The memory pointed to must be writable.
2128 * @remark This will fault if the memory is read-only!
2129 */
2130DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
2131{
2132 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
2133}
2134
2135
2136/**
2137 * Atomically Exchange a value which size might differ
2138 * between platforms or compilers.
2139 *
2140 * @param pu Pointer to the variable to update.
2141 * @param uNew The value to assign to *pu.
2142 */
2143#define ASMAtomicXchgSize(pu, uNew) \
2144 do { \
2145 switch (sizeof(*(pu))) { \
2146 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2147 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2148 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2149 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2150 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2151 } \
2152 } while (0)
2153
2154
2155/**
2156 * Atomically Exchange a pointer value.
2157 *
2158 * @returns Current *ppv value
2159 * @param ppv Pointer to the pointer variable to update.
2160 * @param pv The pointer value to assign to *ppv.
2161 */
2162DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2163{
2164#if ARCH_BITS == 32
2165 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2166#elif ARCH_BITS == 64
2167 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2168#else
2169# error "ARCH_BITS is bogus"
2170#endif
2171}
2172
2173
2174/**
2175 * Atomically Compare and Exchange an unsigned 32-bit value.
2176 *
2177 * @returns true if xchg was done.
2178 * @returns false if xchg wasn't done.
2179 *
2180 * @param pu32 Pointer to the value to update.
2181 * @param u32New The new value to assigned to *pu32.
2182 * @param u32Old The old value to *pu32 compare with.
2183 */
2184#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2185DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2186#else
2187DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old)
2188{
2189# if RT_INLINE_ASM_GNU_STYLE
2190 uint32_t u32Ret;
2191 __asm__ __volatile__("lock; cmpxchgl %2, %0\n\t"
2192 "setz %%al\n\t"
2193 "movzbl %%al, %%eax\n\t"
2194 : "=m" (*pu32),
2195 "=a" (u32Ret)
2196 : "r" (u32New),
2197 "1" (u32Old));
2198 return (bool)u32Ret;
2199
2200# elif RT_INLINE_ASM_USES_INTRIN
2201 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2202
2203# else
2204 uint32_t u32Ret;
2205 __asm
2206 {
2207# ifdef RT_ARCH_AMD64
2208 mov rdx, [pu32]
2209# else
2210 mov edx, [pu32]
2211# endif
2212 mov eax, [u32Old]
2213 mov ecx, [u32New]
2214# ifdef RT_ARCH_AMD64
2215 lock cmpxchg [rdx], ecx
2216# else
2217 lock cmpxchg [edx], ecx
2218# endif
2219 setz al
2220 movzx eax, al
2221 mov [u32Ret], eax
2222 }
2223 return !!u32Ret;
2224# endif
2225}
2226#endif
2227
2228
2229/**
2230 * Atomically Compare and Exchange a signed 32-bit value.
2231 *
2232 * @returns true if xchg was done.
2233 * @returns false if xchg wasn't done.
2234 *
2235 * @param pi32 Pointer to the value to update.
2236 * @param i32New The new value to assigned to *pi32.
2237 * @param i32Old The old value to *pi32 compare with.
2238 */
2239DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2240{
2241 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2242}
2243
2244
2245/**
2246 * Atomically Compare and exchange an unsigned 64-bit value.
2247 *
2248 * @returns true if xchg was done.
2249 * @returns false if xchg wasn't done.
2250 *
2251 * @param pu64 Pointer to the 64-bit variable to update.
2252 * @param u64New The 64-bit value to assign to *pu64.
2253 * @param u64Old The value to compare with.
2254 */
2255#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2256DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2257#else
2258DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old)
2259{
2260# if RT_INLINE_ASM_USES_INTRIN
2261 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2262
2263# elif defined(RT_ARCH_AMD64)
2264# if RT_INLINE_ASM_GNU_STYLE
2265 uint64_t u64Ret;
2266 __asm__ __volatile__("lock; cmpxchgq %2, %0\n\t"
2267 "setz %%al\n\t"
2268 "movzbl %%al, %%eax\n\t"
2269 : "=m" (*pu64),
2270 "=a" (u64Ret)
2271 : "r" (u64New),
2272 "1" (u64Old));
2273 return (bool)u64Ret;
2274# else
2275 bool fRet;
2276 __asm
2277 {
2278 mov rdx, [pu32]
2279 mov rax, [u64Old]
2280 mov rcx, [u64New]
2281 lock cmpxchg [rdx], rcx
2282 setz al
2283 mov [fRet], al
2284 }
2285 return fRet;
2286# endif
2287# else /* !RT_ARCH_AMD64 */
2288 uint32_t u32Ret;
2289# if RT_INLINE_ASM_GNU_STYLE
2290# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2291 uint32_t u32 = (uint32_t)u64New;
2292 uint32_t u32Spill;
2293 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
2294 "lock; cmpxchg8b (%6)\n\t"
2295 "setz %%al\n\t"
2296 "xchgl %%ebx, %4\n\t"
2297 "movzbl %%al, %%eax\n\t"
2298 : "=a" (u32Ret),
2299 "=d" (u32Spill),
2300 "=m" (*pu64)
2301 : "A" (u64Old),
2302 "m" ( u32 ),
2303 "c" ( (uint32_t)(u64New >> 32) ),
2304 "S" (pu64) );
2305# else /* !PIC */
2306 uint32_t u32Spill;
2307 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
2308 "setz %%al\n\t"
2309 "movzbl %%al, %%eax\n\t"
2310 : "=a" (u32Ret),
2311 "=d" (u32Spill),
2312 "=m" (*pu64)
2313 : "A" (u64Old),
2314 "b" ( (uint32_t)u64New ),
2315 "c" ( (uint32_t)(u64New >> 32) ));
2316# endif
2317 return (bool)u32Ret;
2318# else
2319 __asm
2320 {
2321 mov ebx, dword ptr [u64New]
2322 mov ecx, dword ptr [u64New + 4]
2323 mov edi, [pu64]
2324 mov eax, dword ptr [u64Old]
2325 mov edx, dword ptr [u64Old + 4]
2326 lock cmpxchg8b [edi]
2327 setz al
2328 movzx eax, al
2329 mov dword ptr [u32Ret], eax
2330 }
2331 return !!u32Ret;
2332# endif
2333# endif /* !RT_ARCH_AMD64 */
2334}
2335#endif
2336
2337
2338/**
2339 * Atomically Compare and exchange a signed 64-bit value.
2340 *
2341 * @returns true if xchg was done.
2342 * @returns false if xchg wasn't done.
2343 *
2344 * @param pi64 Pointer to the 64-bit variable to update.
2345 * @param i64 The 64-bit value to assign to *pu64.
2346 * @param i64Old The value to compare with.
2347 */
2348DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2349{
2350 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2351}
2352
2353
2354
2355/** @def ASMAtomicCmpXchgSize
2356 * Atomically Compare and Exchange a value which size might differ
2357 * between platforms or compilers.
2358 *
2359 * @param pu Pointer to the value to update.
2360 * @param uNew The new value to assigned to *pu.
2361 * @param uOld The old value to *pu compare with.
2362 * @param fRc Where to store the result.
2363 */
2364#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2365 do { \
2366 switch (sizeof(*(pu))) { \
2367 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2368 break; \
2369 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2370 break; \
2371 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2372 (fRc) = false; \
2373 break; \
2374 } \
2375 } while (0)
2376
2377
2378/**
2379 * Atomically Compare and Exchange a pointer value.
2380 *
2381 * @returns true if xchg was done.
2382 * @returns false if xchg wasn't done.
2383 *
2384 * @param ppv Pointer to the value to update.
2385 * @param pvNew The new value to assigned to *ppv.
2386 * @param pvOld The old value to *ppv compare with.
2387 */
2388DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2389{
2390#if ARCH_BITS == 32
2391 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2392#elif ARCH_BITS == 64
2393 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2394#else
2395# error "ARCH_BITS is bogus"
2396#endif
2397}
2398
2399
2400/**
2401 * Atomically increment a 32-bit value.
2402 *
2403 * @returns The new value.
2404 * @param pu32 Pointer to the value to increment.
2405 */
2406#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2407DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2408#else
2409DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2410{
2411 uint32_t u32;
2412# if RT_INLINE_ASM_USES_INTRIN
2413 u32 = _InterlockedIncrement((long *)pu32);
2414
2415# elif RT_INLINE_ASM_GNU_STYLE
2416 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2417 "incl %0\n\t"
2418 : "=r" (u32),
2419 "=m" (*pu32)
2420 : "0" (1)
2421 : "memory");
2422# else
2423 __asm
2424 {
2425 mov eax, 1
2426# ifdef RT_ARCH_AMD64
2427 mov rdx, [pu32]
2428 lock xadd [rdx], eax
2429# else
2430 mov edx, [pu32]
2431 lock xadd [edx], eax
2432# endif
2433 inc eax
2434 mov u32, eax
2435 }
2436# endif
2437 return u32;
2438}
2439#endif
2440
2441
2442/**
2443 * Atomically increment a signed 32-bit value.
2444 *
2445 * @returns The new value.
2446 * @param pi32 Pointer to the value to increment.
2447 */
2448DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2449{
2450 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2451}
2452
2453
2454/**
2455 * Atomically decrement an unsigned 32-bit value.
2456 *
2457 * @returns The new value.
2458 * @param pu32 Pointer to the value to decrement.
2459 */
2460#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2461DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2462#else
2463DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2464{
2465 uint32_t u32;
2466# if RT_INLINE_ASM_USES_INTRIN
2467 u32 = _InterlockedDecrement((long *)pu32);
2468
2469# elif RT_INLINE_ASM_GNU_STYLE
2470 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2471 "decl %0\n\t"
2472 : "=r" (u32),
2473 "=m" (*pu32)
2474 : "0" (-1)
2475 : "memory");
2476# else
2477 __asm
2478 {
2479 mov eax, -1
2480# ifdef RT_ARCH_AMD64
2481 mov rdx, [pu32]
2482 lock xadd [rdx], eax
2483# else
2484 mov edx, [pu32]
2485 lock xadd [edx], eax
2486# endif
2487 dec eax
2488 mov u32, eax
2489 }
2490# endif
2491 return u32;
2492}
2493#endif
2494
2495
2496/**
2497 * Atomically decrement a signed 32-bit value.
2498 *
2499 * @returns The new value.
2500 * @param pi32 Pointer to the value to decrement.
2501 */
2502DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2503{
2504 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2505}
2506
2507
2508/**
2509 * Atomically Or an unsigned 32-bit value.
2510 *
2511 * @param pu32 Pointer to the pointer variable to OR u32 with.
2512 * @param u32 The value to OR *pu32 with.
2513 */
2514#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2515DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2516#else
2517DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2518{
2519# if RT_INLINE_ASM_USES_INTRIN
2520 _InterlockedOr((long volatile *)pu32, (long)u32);
2521
2522# elif RT_INLINE_ASM_GNU_STYLE
2523 __asm__ __volatile__("lock; orl %1, %0\n\t"
2524 : "=m" (*pu32)
2525 : "ir" (u32));
2526# else
2527 __asm
2528 {
2529 mov eax, [u32]
2530# ifdef RT_ARCH_AMD64
2531 mov rdx, [pu32]
2532 lock or [rdx], eax
2533# else
2534 mov edx, [pu32]
2535 lock or [edx], eax
2536# endif
2537 }
2538# endif
2539}
2540#endif
2541
2542
2543/**
2544 * Atomically Or a signed 32-bit value.
2545 *
2546 * @param pi32 Pointer to the pointer variable to OR u32 with.
2547 * @param i32 The value to OR *pu32 with.
2548 */
2549DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2550{
2551 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2552}
2553
2554
2555/**
2556 * Atomically And an unsigned 32-bit value.
2557 *
2558 * @param pu32 Pointer to the pointer variable to AND u32 with.
2559 * @param u32 The value to AND *pu32 with.
2560 */
2561#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2562DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
2563#else
2564DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
2565{
2566# if RT_INLINE_ASM_USES_INTRIN
2567 _InterlockedAnd((long volatile *)pu32, u32);
2568
2569# elif RT_INLINE_ASM_GNU_STYLE
2570 __asm__ __volatile__("lock; andl %1, %0\n\t"
2571 : "=m" (*pu32)
2572 : "ir" (u32));
2573# else
2574 __asm
2575 {
2576 mov eax, [u32]
2577# ifdef RT_ARCH_AMD64
2578 mov rdx, [pu32]
2579 lock and [rdx], eax
2580# else
2581 mov edx, [pu32]
2582 lock and [edx], eax
2583# endif
2584 }
2585# endif
2586}
2587#endif
2588
2589
2590/**
2591 * Atomically And a signed 32-bit value.
2592 *
2593 * @param pi32 Pointer to the pointer variable to AND i32 with.
2594 * @param i32 The value to AND *pi32 with.
2595 */
2596DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
2597{
2598 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
2599}
2600
2601
2602/**
2603 * Invalidate page.
2604 *
2605 * @param pv Address of the page to invalidate.
2606 */
2607#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2608DECLASM(void) ASMInvalidatePage(void *pv);
2609#else
2610DECLINLINE(void) ASMInvalidatePage(void *pv)
2611{
2612# if RT_INLINE_ASM_USES_INTRIN
2613 __invlpg(pv);
2614
2615# elif RT_INLINE_ASM_GNU_STYLE
2616 __asm__ __volatile__("invlpg %0\n\t"
2617 : : "m" (*(uint8_t *)pv));
2618# else
2619 __asm
2620 {
2621# ifdef RT_ARCH_AMD64
2622 mov rax, [pv]
2623 invlpg [rax]
2624# else
2625 mov eax, [pv]
2626 invlpg [eax]
2627# endif
2628 }
2629# endif
2630}
2631#endif
2632
2633
2634#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
2635# if PAGE_SIZE != 0x1000
2636# error "PAGE_SIZE is not 0x1000!"
2637# endif
2638#endif
2639
2640/**
2641 * Zeros a 4K memory page.
2642 *
2643 * @param pv Pointer to the memory block. This must be page aligned.
2644 */
2645#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2646DECLASM(void) ASMMemZeroPage(volatile void *pv);
2647# else
2648DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
2649{
2650# if RT_INLINE_ASM_USES_INTRIN
2651# ifdef RT_ARCH_AMD64
2652 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
2653# else
2654 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
2655# endif
2656
2657# elif RT_INLINE_ASM_GNU_STYLE
2658 RTUINTREG uDummy;
2659# ifdef RT_ARCH_AMD64
2660 __asm__ __volatile__ ("rep stosq"
2661 : "=D" (pv),
2662 "=c" (uDummy)
2663 : "0" (pv),
2664 "c" (0x1000 >> 3),
2665 "a" (0)
2666 : "memory");
2667# else
2668 __asm__ __volatile__ ("rep stosl"
2669 : "=D" (pv),
2670 "=c" (uDummy)
2671 : "0" (pv),
2672 "c" (0x1000 >> 2),
2673 "a" (0)
2674 : "memory");
2675# endif
2676# else
2677 __asm
2678 {
2679# ifdef RT_ARCH_AMD64
2680 xor rax, rax
2681 mov ecx, 0200h
2682 mov rdi, [pv]
2683 rep stosq
2684# else
2685 xor eax, eax
2686 mov ecx, 0400h
2687 mov edi, [pv]
2688 rep stosd
2689# endif
2690 }
2691# endif
2692}
2693# endif
2694
2695
2696/**
2697 * Zeros a memory block with a 32-bit aligned size.
2698 *
2699 * @param pv Pointer to the memory block.
2700 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2701 */
2702#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2703DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
2704#else
2705DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
2706{
2707# if RT_INLINE_ASM_USES_INTRIN
2708 __stosd((unsigned long *)pv, 0, cb >> 2);
2709
2710# elif RT_INLINE_ASM_GNU_STYLE
2711 __asm__ __volatile__ ("rep stosl"
2712 : "=D" (pv),
2713 "=c" (cb)
2714 : "0" (pv),
2715 "1" (cb >> 2),
2716 "a" (0)
2717 : "memory");
2718# else
2719 __asm
2720 {
2721 xor eax, eax
2722# ifdef RT_ARCH_AMD64
2723 mov rcx, [cb]
2724 shr rcx, 2
2725 mov rdi, [pv]
2726# else
2727 mov ecx, [cb]
2728 shr ecx, 2
2729 mov edi, [pv]
2730# endif
2731 rep stosd
2732 }
2733# endif
2734}
2735#endif
2736
2737
2738/**
2739 * Fills a memory block with a 32-bit aligned size.
2740 *
2741 * @param pv Pointer to the memory block.
2742 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2743 * @param u32 The value to fill with.
2744 */
2745#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2746DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
2747#else
2748DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
2749{
2750# if RT_INLINE_ASM_USES_INTRIN
2751 __stosd((unsigned long *)pv, 0, cb >> 2);
2752
2753# elif RT_INLINE_ASM_GNU_STYLE
2754 __asm__ __volatile__ ("rep stosl"
2755 : "=D" (pv),
2756 "=c" (cb)
2757 : "0" (pv),
2758 "1" (cb >> 2),
2759 "a" (u32)
2760 : "memory");
2761# else
2762 __asm
2763 {
2764# ifdef RT_ARCH_AMD64
2765 mov rcx, [cb]
2766 shr rcx, 2
2767 mov rdi, [pv]
2768# else
2769 mov ecx, [cb]
2770 shr ecx, 2
2771 mov edi, [pv]
2772# endif
2773 mov eax, [u32]
2774 rep stosd
2775 }
2776# endif
2777}
2778#endif
2779
2780
2781/**
2782 * Checks if a memory block is filled with the specified byte.
2783 *
2784 * This is a sort of inverted memchr.
2785 *
2786 * @returns Pointer to the byte which doesn't equal u8.
2787 * @returns NULL if all equal to u8.
2788 *
2789 * @param pv Pointer to the memory block.
2790 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2791 * @param u8 The value it's supposed to be filled with.
2792 */
2793#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2794DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
2795#else
2796DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
2797{
2798/** @todo rewrite this in inline assembly. */
2799 uint8_t const *pb = (uint8_t const *)pv;
2800 for (; cb; cb--, pb++)
2801 if (RT_UNLIKELY(*pb != u8))
2802 return (void *)pb;
2803 return NULL;
2804}
2805#endif
2806
2807
2808
2809/**
2810 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
2811 *
2812 * @returns u32F1 * u32F2.
2813 */
2814#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
2815DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
2816#else
2817DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
2818{
2819# ifdef RT_ARCH_AMD64
2820 return (uint64_t)u32F1 * u32F2;
2821# else /* !RT_ARCH_AMD64 */
2822 uint64_t u64;
2823# if RT_INLINE_ASM_GNU_STYLE
2824 __asm__ __volatile__("mull %%edx"
2825 : "=A" (u64)
2826 : "a" (u32F2), "d" (u32F1));
2827# else
2828 __asm
2829 {
2830 mov edx, [u32F1]
2831 mov eax, [u32F2]
2832 mul edx
2833 mov dword ptr [u64], eax
2834 mov dword ptr [u64 + 4], edx
2835 }
2836# endif
2837 return u64;
2838# endif /* !RT_ARCH_AMD64 */
2839}
2840#endif
2841
2842
2843/**
2844 * Multiplies two signed 32-bit values returning a signed 64-bit result.
2845 *
2846 * @returns u32F1 * u32F2.
2847 */
2848#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
2849DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
2850#else
2851DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
2852{
2853# ifdef RT_ARCH_AMD64
2854 return (int64_t)i32F1 * i32F2;
2855# else /* !RT_ARCH_AMD64 */
2856 int64_t i64;
2857# if RT_INLINE_ASM_GNU_STYLE
2858 __asm__ __volatile__("imull %%edx"
2859 : "=A" (i64)
2860 : "a" (i32F2), "d" (i32F1));
2861# else
2862 __asm
2863 {
2864 mov edx, [i32F1]
2865 mov eax, [i32F2]
2866 imul edx
2867 mov dword ptr [i64], eax
2868 mov dword ptr [i64 + 4], edx
2869 }
2870# endif
2871 return i64;
2872# endif /* !RT_ARCH_AMD64 */
2873}
2874#endif
2875
2876
2877/**
2878 * Devides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
2879 *
2880 * @returns u64 / u32.
2881 */
2882#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
2883DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
2884#else
2885DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
2886{
2887# ifdef RT_ARCH_AMD64
2888 return (uint32_t)(u64 / u32);
2889# else /* !RT_ARCH_AMD64 */
2890# if RT_INLINE_ASM_GNU_STYLE
2891 RTUINTREG uDummy;
2892 __asm__ __volatile__("divl %3"
2893 : "=a" (u32), "=d"(uDummy)
2894 : "A" (u64), "r" (u32));
2895# else
2896 __asm
2897 {
2898 mov eax, dword ptr [u64]
2899 mov edx, dword ptr [u64 + 4]
2900 mov ecx, [u32]
2901 div ecx
2902 mov [u32], eax
2903 }
2904# endif
2905 return u32;
2906# endif /* !RT_ARCH_AMD64 */
2907}
2908#endif
2909
2910
2911/**
2912 * Devides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
2913 *
2914 * @returns u64 / u32.
2915 */
2916#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
2917DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
2918#else
2919DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
2920{
2921# ifdef RT_ARCH_AMD64
2922 return (int32_t)(i64 / i32);
2923# else /* !RT_ARCH_AMD64 */
2924# if RT_INLINE_ASM_GNU_STYLE
2925 RTUINTREG iDummy;
2926 __asm__ __volatile__("idivl %3"
2927 : "=a" (i32), "=d"(iDummy)
2928 : "A" (i64), "r" (i32));
2929# else
2930 __asm
2931 {
2932 mov eax, dword ptr [i64]
2933 mov edx, dword ptr [i64 + 4]
2934 mov ecx, [i32]
2935 idiv ecx
2936 mov [i32], eax
2937 }
2938# endif
2939 return i32;
2940# endif /* !RT_ARCH_AMD64 */
2941}
2942#endif
2943
2944
2945/**
2946 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
2947 * using a 96 bit intermediate result.
2948 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
2949 * __udivdi3 and __umoddi3 even if this inline function is not used.
2950 *
2951 * @returns (u64A * u32B) / u32C.
2952 * @param u64A The 64-bit value.
2953 * @param u32B The 32-bit value to multiple by A.
2954 * @param u32C The 32-bit value to divide A*B by.
2955 */
2956#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
2957DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
2958#else
2959DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
2960{
2961# if RT_INLINE_ASM_GNU_STYLE
2962# ifdef RT_ARCH_AMD64
2963 uint64_t u64Result, u64Spill;
2964 __asm__ __volatile__("mulq %2\n\t"
2965 "divq %3\n\t"
2966 : "=a" (u64Result),
2967 "=d" (u64Spill)
2968 : "r" ((uint64_t)u32B),
2969 "r" ((uint64_t)u32C),
2970 "0" (u64A),
2971 "1" (0));
2972 return u64Result;
2973# else
2974 uint32_t u32Dummy;
2975 uint64_t u64Result;
2976 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
2977 edx = u64Lo.hi = (u64A.lo * u32B).hi */
2978 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
2979 eax = u64A.hi */
2980 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
2981 edx = u32C */
2982 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
2983 edx = u32B */
2984 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
2985 edx = u64Hi.hi = (u64A.hi * u32B).hi */
2986 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
2987 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
2988 "divl %%ecx \n\t" /* eax = u64Hi / u32C
2989 edx = u64Hi % u32C */
2990 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
2991 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
2992 "divl %%ecx \n\t" /* u64Result.lo */
2993 "movl %%edi,%%edx \n\t" /* u64Result.hi */
2994 : "=A"(u64Result), "=c"(u32Dummy),
2995 "=S"(u32Dummy), "=D"(u32Dummy)
2996 : "a"((uint32_t)u64A),
2997 "S"((uint32_t)(u64A >> 32)),
2998 "c"(u32B),
2999 "D"(u32C));
3000 return u64Result;
3001# endif
3002# else
3003 RTUINT64U u;
3004 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
3005 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
3006 u64Hi += (u64Lo >> 32);
3007 u.s.Hi = (uint32_t)(u64Hi / u32C);
3008 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
3009 return u.u;
3010# endif
3011}
3012#endif
3013
3014
3015/**
3016 * Probes a byte pointer for read access.
3017 *
3018 * While the function will not fault if the byte is not read accessible,
3019 * the idea is to do this in a safe place like before acquiring locks
3020 * and such like.
3021 *
3022 * Also, this functions guarantees that an eager compiler is not going
3023 * to optimize the probing away.
3024 *
3025 * @param pvByte Pointer to the byte.
3026 */
3027#if RT_INLINE_ASM_EXTERNAL
3028DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
3029#else
3030DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
3031{
3032 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3033 uint8_t u8;
3034# if RT_INLINE_ASM_GNU_STYLE
3035 __asm__ __volatile__("movb (%1), %0\n\t"
3036 : "=r" (u8)
3037 : "r" (pvByte));
3038# else
3039 __asm
3040 {
3041# ifdef RT_ARCH_AMD64
3042 mov rax, [pvByte]
3043 mov al, [rax]
3044# else
3045 mov eax, [pvByte]
3046 mov al, [eax]
3047# endif
3048 mov [u8], al
3049 }
3050# endif
3051 return u8;
3052}
3053#endif
3054
3055/**
3056 * Probes a buffer for read access page by page.
3057 *
3058 * While the function will fault if the buffer is not fully read
3059 * accessible, the idea is to do this in a safe place like before
3060 * acquiring locks and such like.
3061 *
3062 * Also, this functions guarantees that an eager compiler is not going
3063 * to optimize the probing away.
3064 *
3065 * @param pvBuf Pointer to the buffer.
3066 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
3067 */
3068DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
3069{
3070 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3071 /* the first byte */
3072 const uint8_t *pu8 = (const uint8_t *)pvBuf;
3073 ASMProbeReadByte(pu8);
3074
3075 /* the pages in between pages. */
3076 while (cbBuf > /*PAGE_SIZE*/0x1000)
3077 {
3078 ASMProbeReadByte(pu8);
3079 cbBuf -= /*PAGE_SIZE*/0x1000;
3080 pu8 += /*PAGE_SIZE*/0x1000;
3081 }
3082
3083 /* the last byte */
3084 ASMProbeReadByte(pu8 + cbBuf - 1);
3085}
3086
3087
3088/** @def ASMBreakpoint
3089 * Debugger Breakpoint.
3090 * @remark In the gnu world we add a nop instruction after the int3 to
3091 * force gdb to remain at the int3 source line.
3092 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
3093 * @internal
3094 */
3095#if RT_INLINE_ASM_GNU_STYLE
3096# ifndef __L4ENV__
3097# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
3098# else
3099# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
3100# endif
3101#else
3102# define ASMBreakpoint() __debugbreak()
3103#endif
3104
3105
3106
3107/** @defgroup grp_inline_bits Bit Operations
3108 * @{
3109 */
3110
3111
3112/**
3113 * Sets a bit in a bitmap.
3114 *
3115 * @param pvBitmap Pointer to the bitmap.
3116 * @param iBit The bit to set.
3117 */
3118#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3119DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
3120#else
3121DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
3122{
3123# if RT_INLINE_ASM_USES_INTRIN
3124 _bittestandset((long *)pvBitmap, iBit);
3125
3126# elif RT_INLINE_ASM_GNU_STYLE
3127 __asm__ __volatile__ ("btsl %1, %0"
3128 : "=m" (*(volatile long *)pvBitmap)
3129 : "Ir" (iBit)
3130 : "memory");
3131# else
3132 __asm
3133 {
3134# ifdef RT_ARCH_AMD64
3135 mov rax, [pvBitmap]
3136 mov edx, [iBit]
3137 bts [rax], edx
3138# else
3139 mov eax, [pvBitmap]
3140 mov edx, [iBit]
3141 bts [eax], edx
3142# endif
3143 }
3144# endif
3145}
3146#endif
3147
3148
3149/**
3150 * Atomically sets a bit in a bitmap.
3151 *
3152 * @param pvBitmap Pointer to the bitmap.
3153 * @param iBit The bit to set.
3154 */
3155#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3156DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
3157#else
3158DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
3159{
3160# if RT_INLINE_ASM_USES_INTRIN
3161 _interlockedbittestandset((long *)pvBitmap, iBit);
3162# elif RT_INLINE_ASM_GNU_STYLE
3163 __asm__ __volatile__ ("lock; btsl %1, %0"
3164 : "=m" (*(volatile long *)pvBitmap)
3165 : "Ir" (iBit)
3166 : "memory");
3167# else
3168 __asm
3169 {
3170# ifdef RT_ARCH_AMD64
3171 mov rax, [pvBitmap]
3172 mov edx, [iBit]
3173 lock bts [rax], edx
3174# else
3175 mov eax, [pvBitmap]
3176 mov edx, [iBit]
3177 lock bts [eax], edx
3178# endif
3179 }
3180# endif
3181}
3182#endif
3183
3184
3185/**
3186 * Clears a bit in a bitmap.
3187 *
3188 * @param pvBitmap Pointer to the bitmap.
3189 * @param iBit The bit to clear.
3190 */
3191#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3192DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
3193#else
3194DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
3195{
3196# if RT_INLINE_ASM_USES_INTRIN
3197 _bittestandreset((long *)pvBitmap, iBit);
3198
3199# elif RT_INLINE_ASM_GNU_STYLE
3200 __asm__ __volatile__ ("btrl %1, %0"
3201 : "=m" (*(volatile long *)pvBitmap)
3202 : "Ir" (iBit)
3203 : "memory");
3204# else
3205 __asm
3206 {
3207# ifdef RT_ARCH_AMD64
3208 mov rax, [pvBitmap]
3209 mov edx, [iBit]
3210 btr [rax], edx
3211# else
3212 mov eax, [pvBitmap]
3213 mov edx, [iBit]
3214 btr [eax], edx
3215# endif
3216 }
3217# endif
3218}
3219#endif
3220
3221
3222/**
3223 * Atomically clears a bit in a bitmap.
3224 *
3225 * @param pvBitmap Pointer to the bitmap.
3226 * @param iBit The bit to toggle set.
3227 * @remark No memory barrier, take care on smp.
3228 */
3229#if RT_INLINE_ASM_EXTERNAL
3230DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
3231#else
3232DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
3233{
3234# if RT_INLINE_ASM_GNU_STYLE
3235 __asm__ __volatile__ ("lock; btrl %1, %0"
3236 : "=m" (*(volatile long *)pvBitmap)
3237 : "Ir" (iBit)
3238 : "memory");
3239# else
3240 __asm
3241 {
3242# ifdef RT_ARCH_AMD64
3243 mov rax, [pvBitmap]
3244 mov edx, [iBit]
3245 lock btr [rax], edx
3246# else
3247 mov eax, [pvBitmap]
3248 mov edx, [iBit]
3249 lock btr [eax], edx
3250# endif
3251 }
3252# endif
3253}
3254#endif
3255
3256
3257/**
3258 * Toggles a bit in a bitmap.
3259 *
3260 * @param pvBitmap Pointer to the bitmap.
3261 * @param iBit The bit to toggle.
3262 */
3263#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3264DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
3265#else
3266DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
3267{
3268# if RT_INLINE_ASM_USES_INTRIN
3269 _bittestandcomplement((long *)pvBitmap, iBit);
3270# elif RT_INLINE_ASM_GNU_STYLE
3271 __asm__ __volatile__ ("btcl %1, %0"
3272 : "=m" (*(volatile long *)pvBitmap)
3273 : "Ir" (iBit)
3274 : "memory");
3275# else
3276 __asm
3277 {
3278# ifdef RT_ARCH_AMD64
3279 mov rax, [pvBitmap]
3280 mov edx, [iBit]
3281 btc [rax], edx
3282# else
3283 mov eax, [pvBitmap]
3284 mov edx, [iBit]
3285 btc [eax], edx
3286# endif
3287 }
3288# endif
3289}
3290#endif
3291
3292
3293/**
3294 * Atomically toggles a bit in a bitmap.
3295 *
3296 * @param pvBitmap Pointer to the bitmap.
3297 * @param iBit The bit to test and set.
3298 */
3299#if RT_INLINE_ASM_EXTERNAL
3300DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
3301#else
3302DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
3303{
3304# if RT_INLINE_ASM_GNU_STYLE
3305 __asm__ __volatile__ ("lock; btcl %1, %0"
3306 : "=m" (*(volatile long *)pvBitmap)
3307 : "Ir" (iBit)
3308 : "memory");
3309# else
3310 __asm
3311 {
3312# ifdef RT_ARCH_AMD64
3313 mov rax, [pvBitmap]
3314 mov edx, [iBit]
3315 lock btc [rax], edx
3316# else
3317 mov eax, [pvBitmap]
3318 mov edx, [iBit]
3319 lock btc [eax], edx
3320# endif
3321 }
3322# endif
3323}
3324#endif
3325
3326
3327/**
3328 * Tests and sets a bit in a bitmap.
3329 *
3330 * @returns true if the bit was set.
3331 * @returns false if the bit was clear.
3332 * @param pvBitmap Pointer to the bitmap.
3333 * @param iBit The bit to test and set.
3334 */
3335#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3336DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3337#else
3338DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3339{
3340 union { bool f; uint32_t u32; uint8_t u8; } rc;
3341# if RT_INLINE_ASM_USES_INTRIN
3342 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
3343
3344# elif RT_INLINE_ASM_GNU_STYLE
3345 __asm__ __volatile__ ("btsl %2, %1\n\t"
3346 "setc %b0\n\t"
3347 "andl $1, %0\n\t"
3348 : "=q" (rc.u32),
3349 "=m" (*(volatile long *)pvBitmap)
3350 : "Ir" (iBit)
3351 : "memory");
3352# else
3353 __asm
3354 {
3355 mov edx, [iBit]
3356# ifdef RT_ARCH_AMD64
3357 mov rax, [pvBitmap]
3358 bts [rax], edx
3359# else
3360 mov eax, [pvBitmap]
3361 bts [eax], edx
3362# endif
3363 setc al
3364 and eax, 1
3365 mov [rc.u32], eax
3366 }
3367# endif
3368 return rc.f;
3369}
3370#endif
3371
3372
3373/**
3374 * Atomically tests and sets a bit in a bitmap.
3375 *
3376 * @returns true if the bit was set.
3377 * @returns false if the bit was clear.
3378 * @param pvBitmap Pointer to the bitmap.
3379 * @param iBit The bit to set.
3380 */
3381#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3382DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3383#else
3384DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3385{
3386 union { bool f; uint32_t u32; uint8_t u8; } rc;
3387# if RT_INLINE_ASM_USES_INTRIN
3388 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
3389# elif RT_INLINE_ASM_GNU_STYLE
3390 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
3391 "setc %b0\n\t"
3392 "andl $1, %0\n\t"
3393 : "=q" (rc.u32),
3394 "=m" (*(volatile long *)pvBitmap)
3395 : "Ir" (iBit)
3396 : "memory");
3397# else
3398 __asm
3399 {
3400 mov edx, [iBit]
3401# ifdef RT_ARCH_AMD64
3402 mov rax, [pvBitmap]
3403 lock bts [rax], edx
3404# else
3405 mov eax, [pvBitmap]
3406 lock bts [eax], edx
3407# endif
3408 setc al
3409 and eax, 1
3410 mov [rc.u32], eax
3411 }
3412# endif
3413 return rc.f;
3414}
3415#endif
3416
3417
3418/**
3419 * Tests and clears a bit in a bitmap.
3420 *
3421 * @returns true if the bit was set.
3422 * @returns false if the bit was clear.
3423 * @param pvBitmap Pointer to the bitmap.
3424 * @param iBit The bit to test and clear.
3425 */
3426#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3427DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3428#else
3429DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3430{
3431 union { bool f; uint32_t u32; uint8_t u8; } rc;
3432# if RT_INLINE_ASM_USES_INTRIN
3433 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
3434
3435# elif RT_INLINE_ASM_GNU_STYLE
3436 __asm__ __volatile__ ("btrl %2, %1\n\t"
3437 "setc %b0\n\t"
3438 "andl $1, %0\n\t"
3439 : "=q" (rc.u32),
3440 "=m" (*(volatile long *)pvBitmap)
3441 : "Ir" (iBit)
3442 : "memory");
3443# else
3444 __asm
3445 {
3446 mov edx, [iBit]
3447# ifdef RT_ARCH_AMD64
3448 mov rax, [pvBitmap]
3449 btr [rax], edx
3450# else
3451 mov eax, [pvBitmap]
3452 btr [eax], edx
3453# endif
3454 setc al
3455 and eax, 1
3456 mov [rc.u32], eax
3457 }
3458# endif
3459 return rc.f;
3460}
3461#endif
3462
3463
3464/**
3465 * Atomically tests and clears a bit in a bitmap.
3466 *
3467 * @returns true if the bit was set.
3468 * @returns false if the bit was clear.
3469 * @param pvBitmap Pointer to the bitmap.
3470 * @param iBit The bit to test and clear.
3471 * @remark No memory barrier, take care on smp.
3472 */
3473#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3474DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3475#else
3476DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3477{
3478 union { bool f; uint32_t u32; uint8_t u8; } rc;
3479# if RT_INLINE_ASM_USES_INTRIN
3480 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
3481
3482# elif RT_INLINE_ASM_GNU_STYLE
3483 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
3484 "setc %b0\n\t"
3485 "andl $1, %0\n\t"
3486 : "=q" (rc.u32),
3487 "=m" (*(volatile long *)pvBitmap)
3488 : "Ir" (iBit)
3489 : "memory");
3490# else
3491 __asm
3492 {
3493 mov edx, [iBit]
3494# ifdef RT_ARCH_AMD64
3495 mov rax, [pvBitmap]
3496 lock btr [rax], edx
3497# else
3498 mov eax, [pvBitmap]
3499 lock btr [eax], edx
3500# endif
3501 setc al
3502 and eax, 1
3503 mov [rc.u32], eax
3504 }
3505# endif
3506 return rc.f;
3507}
3508#endif
3509
3510
3511/**
3512 * Tests and toggles a bit in a bitmap.
3513 *
3514 * @returns true if the bit was set.
3515 * @returns false if the bit was clear.
3516 * @param pvBitmap Pointer to the bitmap.
3517 * @param iBit The bit to test and toggle.
3518 */
3519#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3520DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3521#else
3522DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3523{
3524 union { bool f; uint32_t u32; uint8_t u8; } rc;
3525# if RT_INLINE_ASM_USES_INTRIN
3526 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
3527
3528# elif RT_INLINE_ASM_GNU_STYLE
3529 __asm__ __volatile__ ("btcl %2, %1\n\t"
3530 "setc %b0\n\t"
3531 "andl $1, %0\n\t"
3532 : "=q" (rc.u32),
3533 "=m" (*(volatile long *)pvBitmap)
3534 : "Ir" (iBit)
3535 : "memory");
3536# else
3537 __asm
3538 {
3539 mov edx, [iBit]
3540# ifdef RT_ARCH_AMD64
3541 mov rax, [pvBitmap]
3542 btc [rax], edx
3543# else
3544 mov eax, [pvBitmap]
3545 btc [eax], edx
3546# endif
3547 setc al
3548 and eax, 1
3549 mov [rc.u32], eax
3550 }
3551# endif
3552 return rc.f;
3553}
3554#endif
3555
3556
3557/**
3558 * Atomically tests and toggles a bit in a bitmap.
3559 *
3560 * @returns true if the bit was set.
3561 * @returns false if the bit was clear.
3562 * @param pvBitmap Pointer to the bitmap.
3563 * @param iBit The bit to test and toggle.
3564 */
3565#if RT_INLINE_ASM_EXTERNAL
3566DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3567#else
3568DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3569{
3570 union { bool f; uint32_t u32; uint8_t u8; } rc;
3571# if RT_INLINE_ASM_GNU_STYLE
3572 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
3573 "setc %b0\n\t"
3574 "andl $1, %0\n\t"
3575 : "=q" (rc.u32),
3576 "=m" (*(volatile long *)pvBitmap)
3577 : "Ir" (iBit)
3578 : "memory");
3579# else
3580 __asm
3581 {
3582 mov edx, [iBit]
3583# ifdef RT_ARCH_AMD64
3584 mov rax, [pvBitmap]
3585 lock btc [rax], edx
3586# else
3587 mov eax, [pvBitmap]
3588 lock btc [eax], edx
3589# endif
3590 setc al
3591 and eax, 1
3592 mov [rc.u32], eax
3593 }
3594# endif
3595 return rc.f;
3596}
3597#endif
3598
3599
3600/**
3601 * Tests if a bit in a bitmap is set.
3602 *
3603 * @returns true if the bit is set.
3604 * @returns false if the bit is clear.
3605 * @param pvBitmap Pointer to the bitmap.
3606 * @param iBit The bit to test.
3607 */
3608#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3609DECLASM(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit);
3610#else
3611DECLINLINE(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit)
3612{
3613 union { bool f; uint32_t u32; uint8_t u8; } rc;
3614# if RT_INLINE_ASM_USES_INTRIN
3615 rc.u32 = _bittest((long *)pvBitmap, iBit);
3616# elif RT_INLINE_ASM_GNU_STYLE
3617
3618 __asm__ __volatile__ ("btl %2, %1\n\t"
3619 "setc %b0\n\t"
3620 "andl $1, %0\n\t"
3621 : "=q" (rc.u32),
3622 "=m" (*(volatile long *)pvBitmap)
3623 : "Ir" (iBit)
3624 : "memory");
3625# else
3626 __asm
3627 {
3628 mov edx, [iBit]
3629# ifdef RT_ARCH_AMD64
3630 mov rax, [pvBitmap]
3631 bt [rax], edx
3632# else
3633 mov eax, [pvBitmap]
3634 bt [eax], edx
3635# endif
3636 setc al
3637 and eax, 1
3638 mov [rc.u32], eax
3639 }
3640# endif
3641 return rc.f;
3642}
3643#endif
3644
3645
3646/**
3647 * Clears a bit range within a bitmap.
3648 *
3649 * @param pvBitmap Pointer to the bitmap.
3650 * @param iBitStart The First bit to clear.
3651 * @param iBitEnd The first bit not to clear.
3652 */
3653DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
3654{
3655 if (iBitStart < iBitEnd)
3656 {
3657 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
3658 int iStart = iBitStart & ~31;
3659 int iEnd = iBitEnd & ~31;
3660 if (iStart == iEnd)
3661 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
3662 else
3663 {
3664 /* bits in first dword. */
3665 if (iBitStart & 31)
3666 {
3667 *pu32 &= (1 << (iBitStart & 31)) - 1;
3668 pu32++;
3669 iBitStart = iStart + 32;
3670 }
3671
3672 /* whole dword. */
3673 if (iBitStart != iEnd)
3674 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
3675
3676 /* bits in last dword. */
3677 if (iBitEnd & 31)
3678 {
3679 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
3680 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
3681 }
3682 }
3683 }
3684}
3685
3686
3687/**
3688 * Finds the first clear bit in a bitmap.
3689 *
3690 * @returns Index of the first zero bit.
3691 * @returns -1 if no clear bit was found.
3692 * @param pvBitmap Pointer to the bitmap.
3693 * @param cBits The number of bits in the bitmap. Multiple of 32.
3694 */
3695#if RT_INLINE_ASM_EXTERNAL
3696DECLASM(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits);
3697#else
3698DECLINLINE(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits)
3699{
3700 if (cBits)
3701 {
3702 int32_t iBit;
3703# if RT_INLINE_ASM_GNU_STYLE
3704 RTCCUINTREG uEAX, uECX, uEDI;
3705 cBits = RT_ALIGN_32(cBits, 32);
3706 __asm__ __volatile__("repe; scasl\n\t"
3707 "je 1f\n\t"
3708# ifdef RT_ARCH_AMD64
3709 "lea -4(%%rdi), %%rdi\n\t"
3710 "xorl (%%rdi), %%eax\n\t"
3711 "subq %5, %%rdi\n\t"
3712# else
3713 "lea -4(%%edi), %%edi\n\t"
3714 "xorl (%%edi), %%eax\n\t"
3715 "subl %5, %%edi\n\t"
3716# endif
3717 "shll $3, %%edi\n\t"
3718 "bsfl %%eax, %%edx\n\t"
3719 "addl %%edi, %%edx\n\t"
3720 "1:\t\n"
3721 : "=d" (iBit),
3722 "=&c" (uECX),
3723 "=&D" (uEDI),
3724 "=&a" (uEAX)
3725 : "0" (0xffffffff),
3726 "mr" (pvBitmap),
3727 "1" (cBits >> 5),
3728 "2" (pvBitmap),
3729 "3" (0xffffffff));
3730# else
3731 cBits = RT_ALIGN_32(cBits, 32);
3732 __asm
3733 {
3734# ifdef RT_ARCH_AMD64
3735 mov rdi, [pvBitmap]
3736 mov rbx, rdi
3737# else
3738 mov edi, [pvBitmap]
3739 mov ebx, edi
3740# endif
3741 mov edx, 0ffffffffh
3742 mov eax, edx
3743 mov ecx, [cBits]
3744 shr ecx, 5
3745 repe scasd
3746 je done
3747
3748# ifdef RT_ARCH_AMD64
3749 lea rdi, [rdi - 4]
3750 xor eax, [rdi]
3751 sub rdi, rbx
3752# else
3753 lea edi, [edi - 4]
3754 xor eax, [edi]
3755 sub edi, ebx
3756# endif
3757 shl edi, 3
3758 bsf edx, eax
3759 add edx, edi
3760 done:
3761 mov [iBit], edx
3762 }
3763# endif
3764 return iBit;
3765 }
3766 return -1;
3767}
3768#endif
3769
3770
3771/**
3772 * Finds the next clear bit in a bitmap.
3773 *
3774 * @returns Index of the first zero bit.
3775 * @returns -1 if no clear bit was found.
3776 * @param pvBitmap Pointer to the bitmap.
3777 * @param cBits The number of bits in the bitmap. Multiple of 32.
3778 * @param iBitPrev The bit returned from the last search.
3779 * The search will start at iBitPrev + 1.
3780 */
3781#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3782DECLASM(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3783#else
3784DECLINLINE(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3785{
3786 int iBit = ++iBitPrev & 31;
3787 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
3788 cBits -= iBitPrev & ~31;
3789 if (iBit)
3790 {
3791 /* inspect the first dword. */
3792 uint32_t u32 = (~*(volatile uint32_t *)pvBitmap) >> iBit;
3793# if RT_INLINE_ASM_USES_INTRIN
3794 unsigned long ulBit = 0;
3795 if (_BitScanForward(&ulBit, u32))
3796 return ulBit + iBitPrev;
3797 iBit = -1;
3798# else
3799# if RT_INLINE_ASM_GNU_STYLE
3800 __asm__ __volatile__("bsf %1, %0\n\t"
3801 "jnz 1f\n\t"
3802 "movl $-1, %0\n\t"
3803 "1:\n\t"
3804 : "=r" (iBit)
3805 : "r" (u32));
3806# else
3807 __asm
3808 {
3809 mov edx, [u32]
3810 bsf eax, edx
3811 jnz done
3812 mov eax, 0ffffffffh
3813 done:
3814 mov [iBit], eax
3815 }
3816# endif
3817 if (iBit >= 0)
3818 return iBit + iBitPrev;
3819# endif
3820 /* Search the rest of the bitmap, if there is anything. */
3821 if (cBits > 32)
3822 {
3823 iBit = ASMBitFirstClear((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
3824 if (iBit >= 0)
3825 return iBit + (iBitPrev & ~31) + 32;
3826 }
3827 }
3828 else
3829 {
3830 /* Search the rest of the bitmap. */
3831 iBit = ASMBitFirstClear(pvBitmap, cBits);
3832 if (iBit >= 0)
3833 return iBit + (iBitPrev & ~31);
3834 }
3835 return iBit;
3836}
3837#endif
3838
3839
3840/**
3841 * Finds the first set bit in a bitmap.
3842 *
3843 * @returns Index of the first set bit.
3844 * @returns -1 if no clear bit was found.
3845 * @param pvBitmap Pointer to the bitmap.
3846 * @param cBits The number of bits in the bitmap. Multiple of 32.
3847 */
3848#if RT_INLINE_ASM_EXTERNAL
3849DECLASM(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits);
3850#else
3851DECLINLINE(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits)
3852{
3853 if (cBits)
3854 {
3855 int32_t iBit;
3856# if RT_INLINE_ASM_GNU_STYLE
3857 RTCCUINTREG uEAX, uECX, uEDI;
3858 cBits = RT_ALIGN_32(cBits, 32);
3859 __asm__ __volatile__("repe; scasl\n\t"
3860 "je 1f\n\t"
3861# ifdef RT_ARCH_AMD64
3862 "lea -4(%%rdi), %%rdi\n\t"
3863 "movl (%%rdi), %%eax\n\t"
3864 "subq %5, %%rdi\n\t"
3865# else
3866 "lea -4(%%edi), %%edi\n\t"
3867 "movl (%%edi), %%eax\n\t"
3868 "subl %5, %%edi\n\t"
3869# endif
3870 "shll $3, %%edi\n\t"
3871 "bsfl %%eax, %%edx\n\t"
3872 "addl %%edi, %%edx\n\t"
3873 "1:\t\n"
3874 : "=d" (iBit),
3875 "=&c" (uECX),
3876 "=&D" (uEDI),
3877 "=&a" (uEAX)
3878 : "0" (0xffffffff),
3879 "mr" (pvBitmap),
3880 "1" (cBits >> 5),
3881 "2" (pvBitmap),
3882 "3" (0));
3883# else
3884 cBits = RT_ALIGN_32(cBits, 32);
3885 __asm
3886 {
3887# ifdef RT_ARCH_AMD64
3888 mov rdi, [pvBitmap]
3889 mov rbx, rdi
3890# else
3891 mov edi, [pvBitmap]
3892 mov ebx, edi
3893# endif
3894 mov edx, 0ffffffffh
3895 xor eax, eax
3896 mov ecx, [cBits]
3897 shr ecx, 5
3898 repe scasd
3899 je done
3900# ifdef RT_ARCH_AMD64
3901 lea rdi, [rdi - 4]
3902 mov eax, [rdi]
3903 sub rdi, rbx
3904# else
3905 lea edi, [edi - 4]
3906 mov eax, [edi]
3907 sub edi, ebx
3908# endif
3909 shl edi, 3
3910 bsf edx, eax
3911 add edx, edi
3912 done:
3913 mov [iBit], edx
3914 }
3915# endif
3916 return iBit;
3917 }
3918 return -1;
3919}
3920#endif
3921
3922
3923/**
3924 * Finds the next set bit in a bitmap.
3925 *
3926 * @returns Index of the next set bit.
3927 * @returns -1 if no set bit was found.
3928 * @param pvBitmap Pointer to the bitmap.
3929 * @param cBits The number of bits in the bitmap. Multiple of 32.
3930 * @param iBitPrev The bit returned from the last search.
3931 * The search will start at iBitPrev + 1.
3932 */
3933#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3934DECLASM(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3935#else
3936DECLINLINE(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3937{
3938 int iBit = ++iBitPrev & 31;
3939 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
3940 cBits -= iBitPrev & ~31;
3941 if (iBit)
3942 {
3943 /* inspect the first dword. */
3944 uint32_t u32 = *(volatile uint32_t *)pvBitmap >> iBit;
3945# if RT_INLINE_ASM_USES_INTRIN
3946 unsigned long ulBit = 0;
3947 if (_BitScanForward(&ulBit, u32))
3948 return ulBit + iBitPrev;
3949 iBit = -1;
3950# else
3951# if RT_INLINE_ASM_GNU_STYLE
3952 __asm__ __volatile__("bsf %1, %0\n\t"
3953 "jnz 1f\n\t"
3954 "movl $-1, %0\n\t"
3955 "1:\n\t"
3956 : "=r" (iBit)
3957 : "r" (u32));
3958# else
3959 __asm
3960 {
3961 mov edx, u32
3962 bsf eax, edx
3963 jnz done
3964 mov eax, 0ffffffffh
3965 done:
3966 mov [iBit], eax
3967 }
3968# endif
3969 if (iBit >= 0)
3970 return iBit + iBitPrev;
3971# endif
3972 /* Search the rest of the bitmap, if there is anything. */
3973 if (cBits > 32)
3974 {
3975 iBit = ASMBitFirstSet((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
3976 if (iBit >= 0)
3977 return iBit + (iBitPrev & ~31) + 32;
3978 }
3979
3980 }
3981 else
3982 {
3983 /* Search the rest of the bitmap. */
3984 iBit = ASMBitFirstSet(pvBitmap, cBits);
3985 if (iBit >= 0)
3986 return iBit + (iBitPrev & ~31);
3987 }
3988 return iBit;
3989}
3990#endif
3991
3992
3993/**
3994 * Finds the first bit which is set in the given 32-bit integer.
3995 * Bits are numbered from 1 (least significant) to 32.
3996 *
3997 * @returns index [1..32] of the first set bit.
3998 * @returns 0 if all bits are cleared.
3999 * @param u32 Integer to search for set bits.
4000 * @remark Similar to ffs() in BSD.
4001 */
4002DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
4003{
4004# if RT_INLINE_ASM_USES_INTRIN
4005 unsigned long iBit;
4006 if (_BitScanForward(&iBit, u32))
4007 iBit++;
4008 else
4009 iBit = 0;
4010# elif RT_INLINE_ASM_GNU_STYLE
4011 uint32_t iBit;
4012 __asm__ __volatile__("bsf %1, %0\n\t"
4013 "jnz 1f\n\t"
4014 "xorl %0, %0\n\t"
4015 "jmp 2f\n"
4016 "1:\n\t"
4017 "incl %0\n"
4018 "2:\n\t"
4019 : "=r" (iBit)
4020 : "rm" (u32));
4021# else
4022 uint32_t iBit;
4023 _asm
4024 {
4025 bsf eax, [u32]
4026 jnz found
4027 xor eax, eax
4028 jmp done
4029 found:
4030 inc eax
4031 done:
4032 mov [iBit], eax
4033 }
4034# endif
4035 return iBit;
4036}
4037
4038
4039/**
4040 * Finds the first bit which is set in the given 32-bit integer.
4041 * Bits are numbered from 1 (least significant) to 32.
4042 *
4043 * @returns index [1..32] of the first set bit.
4044 * @returns 0 if all bits are cleared.
4045 * @param i32 Integer to search for set bits.
4046 * @remark Similar to ffs() in BSD.
4047 */
4048DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
4049{
4050 return ASMBitFirstSetU32((uint32_t)i32);
4051}
4052
4053
4054/**
4055 * Finds the last bit which is set in the given 32-bit integer.
4056 * Bits are numbered from 1 (least significant) to 32.
4057 *
4058 * @returns index [1..32] of the last set bit.
4059 * @returns 0 if all bits are cleared.
4060 * @param u32 Integer to search for set bits.
4061 * @remark Similar to fls() in BSD.
4062 */
4063DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
4064{
4065# if RT_INLINE_ASM_USES_INTRIN
4066 unsigned long iBit;
4067 if (_BitScanReverse(&iBit, u32))
4068 iBit++;
4069 else
4070 iBit = 0;
4071# elif RT_INLINE_ASM_GNU_STYLE
4072 uint32_t iBit;
4073 __asm__ __volatile__("bsrl %1, %0\n\t"
4074 "jnz 1f\n\t"
4075 "xorl %0, %0\n\t"
4076 "jmp 2f\n"
4077 "1:\n\t"
4078 "incl %0\n"
4079 "2:\n\t"
4080 : "=r" (iBit)
4081 : "rm" (u32));
4082# else
4083 uint32_t iBit;
4084 _asm
4085 {
4086 bsr eax, [u32]
4087 jnz found
4088 xor eax, eax
4089 jmp done
4090 found:
4091 inc eax
4092 done:
4093 mov [iBit], eax
4094 }
4095# endif
4096 return iBit;
4097}
4098
4099
4100/**
4101 * Finds the last bit which is set in the given 32-bit integer.
4102 * Bits are numbered from 1 (least significant) to 32.
4103 *
4104 * @returns index [1..32] of the last set bit.
4105 * @returns 0 if all bits are cleared.
4106 * @param i32 Integer to search for set bits.
4107 * @remark Similar to fls() in BSD.
4108 */
4109DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
4110{
4111 return ASMBitLastSetS32((uint32_t)i32);
4112}
4113
4114
4115/**
4116 * Reverse the byte order of the given 32-bit integer.
4117 * @param u32 Integer
4118 */
4119DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
4120{
4121#if RT_INLINE_ASM_USES_INTRIN
4122 u32 = _byteswap_ulong(u32);
4123#elif RT_INLINE_ASM_GNU_STYLE
4124 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
4125#else
4126 _asm
4127 {
4128 mov eax, [u32]
4129 bswap eax
4130 mov [u32], eax
4131 }
4132#endif
4133 return u32;
4134}
4135
4136/** @} */
4137
4138
4139/** @} */
4140#endif
4141
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette