VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 19601

Last change on this file since 19601 was 19601, checked in by vboxsync, 16 years ago

Attempt to fix 64-bit Linux builds.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 164.1 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 */
29
30#ifndef ___iprt_asm_h
31#define ___iprt_asm_h
32
33#include <iprt/cdefs.h>
34#include <iprt/types.h>
35#include <iprt/assert.h>
36/** @todo #include <iprt/param.h> for PAGE_SIZE. */
37/** @def RT_INLINE_ASM_USES_INTRIN
38 * Defined as 1 if we're using a _MSC_VER 1400.
39 * Otherwise defined as 0.
40 */
41
42/* Solaris 10 header ugliness */
43#ifdef u
44#undef u
45#endif
46
47#ifdef _MSC_VER
48# if _MSC_VER >= 1400
49# define RT_INLINE_ASM_USES_INTRIN 1
50# include <intrin.h>
51 /* Emit the intrinsics at all optimization levels. */
52# pragma intrinsic(_ReadWriteBarrier)
53# pragma intrinsic(__cpuid)
54# pragma intrinsic(_enable)
55# pragma intrinsic(_disable)
56# pragma intrinsic(__rdtsc)
57# pragma intrinsic(__readmsr)
58# pragma intrinsic(__writemsr)
59# pragma intrinsic(__outbyte)
60# pragma intrinsic(__outbytestring)
61# pragma intrinsic(__outword)
62# pragma intrinsic(__outwordstring)
63# pragma intrinsic(__outdword)
64# pragma intrinsic(__outdwordstring)
65# pragma intrinsic(__inbyte)
66# pragma intrinsic(__inbytestring)
67# pragma intrinsic(__inword)
68# pragma intrinsic(__inwordstring)
69# pragma intrinsic(__indword)
70# pragma intrinsic(__indwordstring)
71# pragma intrinsic(__invlpg)
72# pragma intrinsic(__wbinvd)
73# pragma intrinsic(__stosd)
74# pragma intrinsic(__stosw)
75# pragma intrinsic(__stosb)
76# pragma intrinsic(__readcr0)
77# pragma intrinsic(__readcr2)
78# pragma intrinsic(__readcr3)
79# pragma intrinsic(__readcr4)
80# pragma intrinsic(__writecr0)
81# pragma intrinsic(__writecr3)
82# pragma intrinsic(__writecr4)
83# pragma intrinsic(__readdr)
84# pragma intrinsic(__writedr)
85# pragma intrinsic(_BitScanForward)
86# pragma intrinsic(_BitScanReverse)
87# pragma intrinsic(_bittest)
88# pragma intrinsic(_bittestandset)
89# pragma intrinsic(_bittestandreset)
90# pragma intrinsic(_bittestandcomplement)
91# pragma intrinsic(_byteswap_ushort)
92# pragma intrinsic(_byteswap_ulong)
93# pragma intrinsic(_interlockedbittestandset)
94# pragma intrinsic(_interlockedbittestandreset)
95# pragma intrinsic(_InterlockedAnd)
96# pragma intrinsic(_InterlockedOr)
97# pragma intrinsic(_InterlockedIncrement)
98# pragma intrinsic(_InterlockedDecrement)
99# pragma intrinsic(_InterlockedExchange)
100# pragma intrinsic(_InterlockedExchangeAdd)
101# pragma intrinsic(_InterlockedCompareExchange)
102# pragma intrinsic(_InterlockedCompareExchange64)
103# ifdef RT_ARCH_AMD64
104# pragma intrinsic(__stosq)
105# pragma intrinsic(__readcr8)
106# pragma intrinsic(__writecr8)
107# pragma intrinsic(_byteswap_uint64)
108# pragma intrinsic(_InterlockedExchange64)
109# endif
110# endif
111#endif
112#ifndef RT_INLINE_ASM_USES_INTRIN
113# define RT_INLINE_ASM_USES_INTRIN 0
114#endif
115
116/** @def RT_INLINE_ASM_GCC_4_3_3_X86
117 * Used to work around some 4.3.3 register allocation issues in this version
118 * of the compiler. */
119#ifdef __GNUC__
120# define RT_INLINE_ASM_GCC_4_3_3_X86 (__GNUC__ == 4 && __GNUC_MINOR__ == 3 && __GNUC_PATCHLEVEL__ == 3 && defined(__i386__))
121#endif
122#ifndef RT_INLINE_ASM_GCC_4_3_3_X86
123# define RT_INLINE_ASM_GCC_4_3_3_X86 0
124#endif
125
126
127
128/** @defgroup grp_asm ASM - Assembly Routines
129 * @ingroup grp_rt
130 *
131 * @remarks The difference between ordered and unordered atomic operations are that
132 * the former will complete outstanding reads and writes before continuing
133 * while the latter doesn't make any promisses about the order. Ordered
134 * operations doesn't, it seems, make any 100% promise wrt to whether
135 * the operation will complete before any subsequent memory access.
136 * (please, correct if wrong.)
137 *
138 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
139 * are unordered (note the Uo).
140 *
141 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
142 * or even optimize assembler instructions away. For instance, in the following code
143 * the second rdmsr instruction is optimized away because gcc treats that instruction
144 * as deterministic:
145 *
146 * @code
147 * static inline uint64_t rdmsr_low(int idx)
148 * {
149 * uint32_t low;
150 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
151 * }
152 * ...
153 * uint32_t msr1 = rdmsr_low(1);
154 * foo(msr1);
155 * msr1 = rdmsr_low(1);
156 * bar(msr1);
157 * @endcode
158 *
159 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
160 * use the result of the first call as input parameter for bar() as well. For rdmsr this
161 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
162 * machine status information in general.
163 *
164 * @{
165 */
166
167/** @def RT_INLINE_ASM_EXTERNAL
168 * Defined as 1 if the compiler does not support inline assembly.
169 * The ASM* functions will then be implemented in an external .asm file.
170 *
171 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
172 * inline assembly in their AMD64 compiler.
173 */
174#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
175# define RT_INLINE_ASM_EXTERNAL 1
176#else
177# define RT_INLINE_ASM_EXTERNAL 0
178#endif
179
180/** @def RT_INLINE_ASM_GNU_STYLE
181 * Defined as 1 if the compiler understands GNU style inline assembly.
182 */
183#if defined(_MSC_VER)
184# define RT_INLINE_ASM_GNU_STYLE 0
185#else
186# define RT_INLINE_ASM_GNU_STYLE 1
187#endif
188
189
190/** @todo find a more proper place for this structure? */
191#pragma pack(1)
192/** IDTR */
193typedef struct RTIDTR
194{
195 /** Size of the IDT. */
196 uint16_t cbIdt;
197 /** Address of the IDT. */
198 uintptr_t pIdt;
199} RTIDTR, *PRTIDTR;
200#pragma pack()
201
202#pragma pack(1)
203/** GDTR */
204typedef struct RTGDTR
205{
206 /** Size of the GDT. */
207 uint16_t cbGdt;
208 /** Address of the GDT. */
209 uintptr_t pGdt;
210} RTGDTR, *PRTGDTR;
211#pragma pack()
212
213
214/** @def ASMReturnAddress
215 * Gets the return address of the current (or calling if you like) function or method.
216 */
217#ifdef _MSC_VER
218# ifdef __cplusplus
219extern "C"
220# endif
221void * _ReturnAddress(void);
222# pragma intrinsic(_ReturnAddress)
223# define ASMReturnAddress() _ReturnAddress()
224#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
225# define ASMReturnAddress() __builtin_return_address(0)
226#else
227# error "Unsupported compiler."
228#endif
229
230
231/**
232 * Gets the content of the IDTR CPU register.
233 * @param pIdtr Where to store the IDTR contents.
234 */
235#if RT_INLINE_ASM_EXTERNAL
236DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
237#else
238DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
239{
240# if RT_INLINE_ASM_GNU_STYLE
241 __asm__ __volatile__("sidt %0" : "=m" (*pIdtr));
242# else
243 __asm
244 {
245# ifdef RT_ARCH_AMD64
246 mov rax, [pIdtr]
247 sidt [rax]
248# else
249 mov eax, [pIdtr]
250 sidt [eax]
251# endif
252 }
253# endif
254}
255#endif
256
257
258/**
259 * Sets the content of the IDTR CPU register.
260 * @param pIdtr Where to load the IDTR contents from
261 */
262#if RT_INLINE_ASM_EXTERNAL
263DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
264#else
265DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
266{
267# if RT_INLINE_ASM_GNU_STYLE
268 __asm__ __volatile__("lidt %0" : : "m" (*pIdtr));
269# else
270 __asm
271 {
272# ifdef RT_ARCH_AMD64
273 mov rax, [pIdtr]
274 lidt [rax]
275# else
276 mov eax, [pIdtr]
277 lidt [eax]
278# endif
279 }
280# endif
281}
282#endif
283
284
285/**
286 * Gets the content of the GDTR CPU register.
287 * @param pGdtr Where to store the GDTR contents.
288 */
289#if RT_INLINE_ASM_EXTERNAL
290DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
291#else
292DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
293{
294# if RT_INLINE_ASM_GNU_STYLE
295 __asm__ __volatile__("sgdt %0" : "=m" (*pGdtr));
296# else
297 __asm
298 {
299# ifdef RT_ARCH_AMD64
300 mov rax, [pGdtr]
301 sgdt [rax]
302# else
303 mov eax, [pGdtr]
304 sgdt [eax]
305# endif
306 }
307# endif
308}
309#endif
310
311/**
312 * Get the cs register.
313 * @returns cs.
314 */
315#if RT_INLINE_ASM_EXTERNAL
316DECLASM(RTSEL) ASMGetCS(void);
317#else
318DECLINLINE(RTSEL) ASMGetCS(void)
319{
320 RTSEL SelCS;
321# if RT_INLINE_ASM_GNU_STYLE
322 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
323# else
324 __asm
325 {
326 mov ax, cs
327 mov [SelCS], ax
328 }
329# endif
330 return SelCS;
331}
332#endif
333
334
335/**
336 * Get the DS register.
337 * @returns DS.
338 */
339#if RT_INLINE_ASM_EXTERNAL
340DECLASM(RTSEL) ASMGetDS(void);
341#else
342DECLINLINE(RTSEL) ASMGetDS(void)
343{
344 RTSEL SelDS;
345# if RT_INLINE_ASM_GNU_STYLE
346 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
347# else
348 __asm
349 {
350 mov ax, ds
351 mov [SelDS], ax
352 }
353# endif
354 return SelDS;
355}
356#endif
357
358
359/**
360 * Get the ES register.
361 * @returns ES.
362 */
363#if RT_INLINE_ASM_EXTERNAL
364DECLASM(RTSEL) ASMGetES(void);
365#else
366DECLINLINE(RTSEL) ASMGetES(void)
367{
368 RTSEL SelES;
369# if RT_INLINE_ASM_GNU_STYLE
370 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
371# else
372 __asm
373 {
374 mov ax, es
375 mov [SelES], ax
376 }
377# endif
378 return SelES;
379}
380#endif
381
382
383/**
384 * Get the FS register.
385 * @returns FS.
386 */
387#if RT_INLINE_ASM_EXTERNAL
388DECLASM(RTSEL) ASMGetFS(void);
389#else
390DECLINLINE(RTSEL) ASMGetFS(void)
391{
392 RTSEL SelFS;
393# if RT_INLINE_ASM_GNU_STYLE
394 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
395# else
396 __asm
397 {
398 mov ax, fs
399 mov [SelFS], ax
400 }
401# endif
402 return SelFS;
403}
404# endif
405
406
407/**
408 * Get the GS register.
409 * @returns GS.
410 */
411#if RT_INLINE_ASM_EXTERNAL
412DECLASM(RTSEL) ASMGetGS(void);
413#else
414DECLINLINE(RTSEL) ASMGetGS(void)
415{
416 RTSEL SelGS;
417# if RT_INLINE_ASM_GNU_STYLE
418 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
419# else
420 __asm
421 {
422 mov ax, gs
423 mov [SelGS], ax
424 }
425# endif
426 return SelGS;
427}
428#endif
429
430
431/**
432 * Get the SS register.
433 * @returns SS.
434 */
435#if RT_INLINE_ASM_EXTERNAL
436DECLASM(RTSEL) ASMGetSS(void);
437#else
438DECLINLINE(RTSEL) ASMGetSS(void)
439{
440 RTSEL SelSS;
441# if RT_INLINE_ASM_GNU_STYLE
442 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
443# else
444 __asm
445 {
446 mov ax, ss
447 mov [SelSS], ax
448 }
449# endif
450 return SelSS;
451}
452#endif
453
454
455/**
456 * Get the TR register.
457 * @returns TR.
458 */
459#if RT_INLINE_ASM_EXTERNAL
460DECLASM(RTSEL) ASMGetTR(void);
461#else
462DECLINLINE(RTSEL) ASMGetTR(void)
463{
464 RTSEL SelTR;
465# if RT_INLINE_ASM_GNU_STYLE
466 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
467# else
468 __asm
469 {
470 str ax
471 mov [SelTR], ax
472 }
473# endif
474 return SelTR;
475}
476#endif
477
478
479/**
480 * Get the [RE]FLAGS register.
481 * @returns [RE]FLAGS.
482 */
483#if RT_INLINE_ASM_EXTERNAL
484DECLASM(RTCCUINTREG) ASMGetFlags(void);
485#else
486DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
487{
488 RTCCUINTREG uFlags;
489# if RT_INLINE_ASM_GNU_STYLE
490# ifdef RT_ARCH_AMD64
491 __asm__ __volatile__("pushfq\n\t"
492 "popq %0\n\t"
493 : "=g" (uFlags));
494# else
495 __asm__ __volatile__("pushfl\n\t"
496 "popl %0\n\t"
497 : "=g" (uFlags));
498# endif
499# else
500 __asm
501 {
502# ifdef RT_ARCH_AMD64
503 pushfq
504 pop [uFlags]
505# else
506 pushfd
507 pop [uFlags]
508# endif
509 }
510# endif
511 return uFlags;
512}
513#endif
514
515
516/**
517 * Set the [RE]FLAGS register.
518 * @param uFlags The new [RE]FLAGS value.
519 */
520#if RT_INLINE_ASM_EXTERNAL
521DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
522#else
523DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
524{
525# if RT_INLINE_ASM_GNU_STYLE
526# ifdef RT_ARCH_AMD64
527 __asm__ __volatile__("pushq %0\n\t"
528 "popfq\n\t"
529 : : "g" (uFlags));
530# else
531 __asm__ __volatile__("pushl %0\n\t"
532 "popfl\n\t"
533 : : "g" (uFlags));
534# endif
535# else
536 __asm
537 {
538# ifdef RT_ARCH_AMD64
539 push [uFlags]
540 popfq
541# else
542 push [uFlags]
543 popfd
544# endif
545 }
546# endif
547}
548#endif
549
550
551/**
552 * Gets the content of the CPU timestamp counter register.
553 *
554 * @returns TSC.
555 */
556#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
557DECLASM(uint64_t) ASMReadTSC(void);
558#else
559DECLINLINE(uint64_t) ASMReadTSC(void)
560{
561 RTUINT64U u;
562# if RT_INLINE_ASM_GNU_STYLE
563 __asm__ __volatile__("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
564# else
565# if RT_INLINE_ASM_USES_INTRIN
566 u.u = __rdtsc();
567# else
568 __asm
569 {
570 rdtsc
571 mov [u.s.Lo], eax
572 mov [u.s.Hi], edx
573 }
574# endif
575# endif
576 return u.u;
577}
578#endif
579
580
581/**
582 * Performs the cpuid instruction returning all registers.
583 *
584 * @param uOperator CPUID operation (eax).
585 * @param pvEAX Where to store eax.
586 * @param pvEBX Where to store ebx.
587 * @param pvECX Where to store ecx.
588 * @param pvEDX Where to store edx.
589 * @remark We're using void pointers to ease the use of special bitfield structures and such.
590 */
591#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
592DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
593#else
594DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
595{
596# if RT_INLINE_ASM_GNU_STYLE
597# ifdef RT_ARCH_AMD64
598 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
599 __asm__ ("cpuid\n\t"
600 : "=a" (uRAX),
601 "=b" (uRBX),
602 "=c" (uRCX),
603 "=d" (uRDX)
604 : "0" (uOperator));
605 *(uint32_t *)pvEAX = (uint32_t)uRAX;
606 *(uint32_t *)pvEBX = (uint32_t)uRBX;
607 *(uint32_t *)pvECX = (uint32_t)uRCX;
608 *(uint32_t *)pvEDX = (uint32_t)uRDX;
609# else
610 __asm__ ("xchgl %%ebx, %1\n\t"
611 "cpuid\n\t"
612 "xchgl %%ebx, %1\n\t"
613 : "=a" (*(uint32_t *)pvEAX),
614 "=r" (*(uint32_t *)pvEBX),
615 "=c" (*(uint32_t *)pvECX),
616 "=d" (*(uint32_t *)pvEDX)
617 : "0" (uOperator));
618# endif
619
620# elif RT_INLINE_ASM_USES_INTRIN
621 int aInfo[4];
622 __cpuid(aInfo, uOperator);
623 *(uint32_t *)pvEAX = aInfo[0];
624 *(uint32_t *)pvEBX = aInfo[1];
625 *(uint32_t *)pvECX = aInfo[2];
626 *(uint32_t *)pvEDX = aInfo[3];
627
628# else
629 uint32_t uEAX;
630 uint32_t uEBX;
631 uint32_t uECX;
632 uint32_t uEDX;
633 __asm
634 {
635 push ebx
636 mov eax, [uOperator]
637 cpuid
638 mov [uEAX], eax
639 mov [uEBX], ebx
640 mov [uECX], ecx
641 mov [uEDX], edx
642 pop ebx
643 }
644 *(uint32_t *)pvEAX = uEAX;
645 *(uint32_t *)pvEBX = uEBX;
646 *(uint32_t *)pvECX = uECX;
647 *(uint32_t *)pvEDX = uEDX;
648# endif
649}
650#endif
651
652
653/**
654 * Performs the cpuid instruction returning all registers.
655 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
656 *
657 * @param uOperator CPUID operation (eax).
658 * @param uIdxECX ecx index
659 * @param pvEAX Where to store eax.
660 * @param pvEBX Where to store ebx.
661 * @param pvECX Where to store ecx.
662 * @param pvEDX Where to store edx.
663 * @remark We're using void pointers to ease the use of special bitfield structures and such.
664 */
665#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
666DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
667#else
668DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
669{
670# if RT_INLINE_ASM_GNU_STYLE
671# ifdef RT_ARCH_AMD64
672 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
673 __asm__ ("cpuid\n\t"
674 : "=a" (uRAX),
675 "=b" (uRBX),
676 "=c" (uRCX),
677 "=d" (uRDX)
678 : "0" (uOperator),
679 "2" (uIdxECX));
680 *(uint32_t *)pvEAX = (uint32_t)uRAX;
681 *(uint32_t *)pvEBX = (uint32_t)uRBX;
682 *(uint32_t *)pvECX = (uint32_t)uRCX;
683 *(uint32_t *)pvEDX = (uint32_t)uRDX;
684# else
685 __asm__ ("xchgl %%ebx, %1\n\t"
686 "cpuid\n\t"
687 "xchgl %%ebx, %1\n\t"
688 : "=a" (*(uint32_t *)pvEAX),
689 "=r" (*(uint32_t *)pvEBX),
690 "=c" (*(uint32_t *)pvECX),
691 "=d" (*(uint32_t *)pvEDX)
692 : "0" (uOperator),
693 "2" (uIdxECX));
694# endif
695
696# elif RT_INLINE_ASM_USES_INTRIN
697 int aInfo[4];
698 /* ??? another intrinsic ??? */
699 __cpuid(aInfo, uOperator);
700 *(uint32_t *)pvEAX = aInfo[0];
701 *(uint32_t *)pvEBX = aInfo[1];
702 *(uint32_t *)pvECX = aInfo[2];
703 *(uint32_t *)pvEDX = aInfo[3];
704
705# else
706 uint32_t uEAX;
707 uint32_t uEBX;
708 uint32_t uECX;
709 uint32_t uEDX;
710 __asm
711 {
712 push ebx
713 mov eax, [uOperator]
714 mov ecx, [uIdxECX]
715 cpuid
716 mov [uEAX], eax
717 mov [uEBX], ebx
718 mov [uECX], ecx
719 mov [uEDX], edx
720 pop ebx
721 }
722 *(uint32_t *)pvEAX = uEAX;
723 *(uint32_t *)pvEBX = uEBX;
724 *(uint32_t *)pvECX = uECX;
725 *(uint32_t *)pvEDX = uEDX;
726# endif
727}
728#endif
729
730
731/**
732 * Performs the cpuid instruction returning ecx and edx.
733 *
734 * @param uOperator CPUID operation (eax).
735 * @param pvECX Where to store ecx.
736 * @param pvEDX Where to store edx.
737 * @remark We're using void pointers to ease the use of special bitfield structures and such.
738 */
739#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
740DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
741#else
742DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
743{
744 uint32_t uEBX;
745 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
746}
747#endif
748
749
750/**
751 * Performs the cpuid instruction returning edx.
752 *
753 * @param uOperator CPUID operation (eax).
754 * @returns EDX after cpuid operation.
755 */
756#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
757DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
758#else
759DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
760{
761 RTCCUINTREG xDX;
762# if RT_INLINE_ASM_GNU_STYLE
763# ifdef RT_ARCH_AMD64
764 RTCCUINTREG uSpill;
765 __asm__ ("cpuid"
766 : "=a" (uSpill),
767 "=d" (xDX)
768 : "0" (uOperator)
769 : "rbx", "rcx");
770# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
771 __asm__ ("push %%ebx\n\t"
772 "cpuid\n\t"
773 "pop %%ebx\n\t"
774 : "=a" (uOperator),
775 "=d" (xDX)
776 : "0" (uOperator)
777 : "ecx");
778# else
779 __asm__ ("cpuid"
780 : "=a" (uOperator),
781 "=d" (xDX)
782 : "0" (uOperator)
783 : "ebx", "ecx");
784# endif
785
786# elif RT_INLINE_ASM_USES_INTRIN
787 int aInfo[4];
788 __cpuid(aInfo, uOperator);
789 xDX = aInfo[3];
790
791# else
792 __asm
793 {
794 push ebx
795 mov eax, [uOperator]
796 cpuid
797 mov [xDX], edx
798 pop ebx
799 }
800# endif
801 return (uint32_t)xDX;
802}
803#endif
804
805
806/**
807 * Performs the cpuid instruction returning ecx.
808 *
809 * @param uOperator CPUID operation (eax).
810 * @returns ECX after cpuid operation.
811 */
812#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
813DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
814#else
815DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
816{
817 RTCCUINTREG xCX;
818# if RT_INLINE_ASM_GNU_STYLE
819# ifdef RT_ARCH_AMD64
820 RTCCUINTREG uSpill;
821 __asm__ ("cpuid"
822 : "=a" (uSpill),
823 "=c" (xCX)
824 : "0" (uOperator)
825 : "rbx", "rdx");
826# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
827 __asm__ ("push %%ebx\n\t"
828 "cpuid\n\t"
829 "pop %%ebx\n\t"
830 : "=a" (uOperator),
831 "=c" (xCX)
832 : "0" (uOperator)
833 : "edx");
834# else
835 __asm__ ("cpuid"
836 : "=a" (uOperator),
837 "=c" (xCX)
838 : "0" (uOperator)
839 : "ebx", "edx");
840
841# endif
842
843# elif RT_INLINE_ASM_USES_INTRIN
844 int aInfo[4];
845 __cpuid(aInfo, uOperator);
846 xCX = aInfo[2];
847
848# else
849 __asm
850 {
851 push ebx
852 mov eax, [uOperator]
853 cpuid
854 mov [xCX], ecx
855 pop ebx
856 }
857# endif
858 return (uint32_t)xCX;
859}
860#endif
861
862
863/**
864 * Checks if the current CPU supports CPUID.
865 *
866 * @returns true if CPUID is supported.
867 */
868DECLINLINE(bool) ASMHasCpuId(void)
869{
870#ifdef RT_ARCH_AMD64
871 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
872#else /* !RT_ARCH_AMD64 */
873 bool fRet = false;
874# if RT_INLINE_ASM_GNU_STYLE
875 uint32_t u1;
876 uint32_t u2;
877 __asm__ ("pushf\n\t"
878 "pop %1\n\t"
879 "mov %1, %2\n\t"
880 "xorl $0x200000, %1\n\t"
881 "push %1\n\t"
882 "popf\n\t"
883 "pushf\n\t"
884 "pop %1\n\t"
885 "cmpl %1, %2\n\t"
886 "setne %0\n\t"
887 "push %2\n\t"
888 "popf\n\t"
889 : "=m" (fRet), "=r" (u1), "=r" (u2));
890# else
891 __asm
892 {
893 pushfd
894 pop eax
895 mov ebx, eax
896 xor eax, 0200000h
897 push eax
898 popfd
899 pushfd
900 pop eax
901 cmp eax, ebx
902 setne fRet
903 push ebx
904 popfd
905 }
906# endif
907 return fRet;
908#endif /* !RT_ARCH_AMD64 */
909}
910
911
912/**
913 * Gets the APIC ID of the current CPU.
914 *
915 * @returns the APIC ID.
916 */
917#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
918DECLASM(uint8_t) ASMGetApicId(void);
919#else
920DECLINLINE(uint8_t) ASMGetApicId(void)
921{
922 RTCCUINTREG xBX;
923# if RT_INLINE_ASM_GNU_STYLE
924# ifdef RT_ARCH_AMD64
925 RTCCUINTREG uSpill;
926 __asm__ ("cpuid"
927 : "=a" (uSpill),
928 "=b" (xBX)
929 : "0" (1)
930 : "rcx", "rdx");
931# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
932 RTCCUINTREG uSpill;
933 __asm__ ("mov %%ebx,%1\n\t"
934 "cpuid\n\t"
935 "xchgl %%ebx,%1\n\t"
936 : "=a" (uSpill),
937 "=r" (xBX)
938 : "0" (1)
939 : "ecx", "edx");
940# else
941 RTCCUINTREG uSpill;
942 __asm__ ("cpuid"
943 : "=a" (uSpill),
944 "=b" (xBX)
945 : "0" (1)
946 : "ecx", "edx");
947# endif
948
949# elif RT_INLINE_ASM_USES_INTRIN
950 int aInfo[4];
951 __cpuid(aInfo, 1);
952 xBX = aInfo[1];
953
954# else
955 __asm
956 {
957 push ebx
958 mov eax, 1
959 cpuid
960 mov [xBX], ebx
961 pop ebx
962 }
963# endif
964 return (uint8_t)(xBX >> 24);
965}
966#endif
967
968
969/**
970 * Tests if it an genuin Intel CPU based on the ASMCpuId(0) output.
971 *
972 * @returns true/false.
973 * @param uEBX EBX return from ASMCpuId(0)
974 * @param uECX ECX return from ASMCpuId(0)
975 * @param uEDX EDX return from ASMCpuId(0)
976 */
977DECLINLINE(bool) ASMIsIntelCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
978{
979 return uEBX == 0x756e6547
980 && uECX == 0x6c65746e
981 && uEDX == 0x49656e69;
982}
983
984
985/**
986 * Tests if this is an genuin Intel CPU.
987 *
988 * @returns true/false.
989 */
990DECLINLINE(bool) ASMIsIntelCpu(void)
991{
992 uint32_t uEAX, uEBX, uECX, uEDX;
993 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
994 return ASMIsIntelCpuEx(uEBX, uECX, uEDX);
995}
996
997
998/**
999 * Extracts the CPU family from ASMCpuId(1) or ASMCpuId(0x80000001)
1000 *
1001 * @returns Family.
1002 * @param uEAX EAX return from ASMCpuId(1) or ASMCpuId(0x80000001).
1003 */
1004DECLINLINE(uint32_t) ASMGetCpuFamily(uint32_t uEAX)
1005{
1006 return ((uEAX >> 8) & 0xf) == 0xf
1007 ? ((uEAX >> 20) & 0x7f) + 0xf
1008 : ((uEAX >> 8) & 0xf);
1009}
1010
1011
1012/**
1013 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), Intel variant.
1014 *
1015 * @returns Model.
1016 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1017 * @param fIntel Whether it's an intel CPU.
1018 */
1019DECLINLINE(uint32_t) ASMGetCpuModelIntel(uint32_t uEAX)
1020{
1021 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6) /* family! */
1022 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1023 : ((uEAX >> 4) & 0xf);
1024}
1025
1026
1027/**
1028 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), AMD variant.
1029 *
1030 * @returns Model.
1031 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1032 * @param fIntel Whether it's an intel CPU.
1033 */
1034DECLINLINE(uint32_t) ASMGetCpuModelAMD(uint32_t uEAX)
1035{
1036 return ((uEAX >> 8) & 0xf) == 0xf
1037 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1038 : ((uEAX >> 4) & 0xf);
1039}
1040
1041
1042/**
1043 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001)
1044 *
1045 * @returns Model.
1046 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1047 * @param fIntel Whether it's an intel CPU. Use ASMIsIntelCpuEx() or ASMIsIntelCpu().
1048 */
1049DECLINLINE(uint32_t) ASMGetCpuModel(uint32_t uEAX, bool fIntel)
1050{
1051 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6 && fIntel) /* family! */
1052 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1053 : ((uEAX >> 4) & 0xf);
1054}
1055
1056
1057/**
1058 * Extracts the CPU stepping from ASMCpuId(1) or ASMCpuId(0x80000001)
1059 *
1060 * @returns Model.
1061 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1062 */
1063DECLINLINE(uint32_t) ASMGetCpuStepping(uint32_t uEAX)
1064{
1065 return uEAX & 0xf;
1066}
1067
1068
1069/**
1070 * Get cr0.
1071 * @returns cr0.
1072 */
1073#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1074DECLASM(RTCCUINTREG) ASMGetCR0(void);
1075#else
1076DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
1077{
1078 RTCCUINTREG uCR0;
1079# if RT_INLINE_ASM_USES_INTRIN
1080 uCR0 = __readcr0();
1081
1082# elif RT_INLINE_ASM_GNU_STYLE
1083# ifdef RT_ARCH_AMD64
1084 __asm__ __volatile__("movq %%cr0, %0\t\n" : "=r" (uCR0));
1085# else
1086 __asm__ __volatile__("movl %%cr0, %0\t\n" : "=r" (uCR0));
1087# endif
1088# else
1089 __asm
1090 {
1091# ifdef RT_ARCH_AMD64
1092 mov rax, cr0
1093 mov [uCR0], rax
1094# else
1095 mov eax, cr0
1096 mov [uCR0], eax
1097# endif
1098 }
1099# endif
1100 return uCR0;
1101}
1102#endif
1103
1104
1105/**
1106 * Sets the CR0 register.
1107 * @param uCR0 The new CR0 value.
1108 */
1109#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1110DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
1111#else
1112DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
1113{
1114# if RT_INLINE_ASM_USES_INTRIN
1115 __writecr0(uCR0);
1116
1117# elif RT_INLINE_ASM_GNU_STYLE
1118# ifdef RT_ARCH_AMD64
1119 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
1120# else
1121 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
1122# endif
1123# else
1124 __asm
1125 {
1126# ifdef RT_ARCH_AMD64
1127 mov rax, [uCR0]
1128 mov cr0, rax
1129# else
1130 mov eax, [uCR0]
1131 mov cr0, eax
1132# endif
1133 }
1134# endif
1135}
1136#endif
1137
1138
1139/**
1140 * Get cr2.
1141 * @returns cr2.
1142 */
1143#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1144DECLASM(RTCCUINTREG) ASMGetCR2(void);
1145#else
1146DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
1147{
1148 RTCCUINTREG uCR2;
1149# if RT_INLINE_ASM_USES_INTRIN
1150 uCR2 = __readcr2();
1151
1152# elif RT_INLINE_ASM_GNU_STYLE
1153# ifdef RT_ARCH_AMD64
1154 __asm__ __volatile__("movq %%cr2, %0\t\n" : "=r" (uCR2));
1155# else
1156 __asm__ __volatile__("movl %%cr2, %0\t\n" : "=r" (uCR2));
1157# endif
1158# else
1159 __asm
1160 {
1161# ifdef RT_ARCH_AMD64
1162 mov rax, cr2
1163 mov [uCR2], rax
1164# else
1165 mov eax, cr2
1166 mov [uCR2], eax
1167# endif
1168 }
1169# endif
1170 return uCR2;
1171}
1172#endif
1173
1174
1175/**
1176 * Sets the CR2 register.
1177 * @param uCR2 The new CR0 value.
1178 */
1179#if RT_INLINE_ASM_EXTERNAL
1180DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1181#else
1182DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1183{
1184# if RT_INLINE_ASM_GNU_STYLE
1185# ifdef RT_ARCH_AMD64
1186 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1187# else
1188 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1189# endif
1190# else
1191 __asm
1192 {
1193# ifdef RT_ARCH_AMD64
1194 mov rax, [uCR2]
1195 mov cr2, rax
1196# else
1197 mov eax, [uCR2]
1198 mov cr2, eax
1199# endif
1200 }
1201# endif
1202}
1203#endif
1204
1205
1206/**
1207 * Get cr3.
1208 * @returns cr3.
1209 */
1210#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1211DECLASM(RTCCUINTREG) ASMGetCR3(void);
1212#else
1213DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1214{
1215 RTCCUINTREG uCR3;
1216# if RT_INLINE_ASM_USES_INTRIN
1217 uCR3 = __readcr3();
1218
1219# elif RT_INLINE_ASM_GNU_STYLE
1220# ifdef RT_ARCH_AMD64
1221 __asm__ __volatile__("movq %%cr3, %0\t\n" : "=r" (uCR3));
1222# else
1223 __asm__ __volatile__("movl %%cr3, %0\t\n" : "=r" (uCR3));
1224# endif
1225# else
1226 __asm
1227 {
1228# ifdef RT_ARCH_AMD64
1229 mov rax, cr3
1230 mov [uCR3], rax
1231# else
1232 mov eax, cr3
1233 mov [uCR3], eax
1234# endif
1235 }
1236# endif
1237 return uCR3;
1238}
1239#endif
1240
1241
1242/**
1243 * Sets the CR3 register.
1244 *
1245 * @param uCR3 New CR3 value.
1246 */
1247#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1248DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1249#else
1250DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1251{
1252# if RT_INLINE_ASM_USES_INTRIN
1253 __writecr3(uCR3);
1254
1255# elif RT_INLINE_ASM_GNU_STYLE
1256# ifdef RT_ARCH_AMD64
1257 __asm__ __volatile__("movq %0, %%cr3\n\t" : : "r" (uCR3));
1258# else
1259 __asm__ __volatile__("movl %0, %%cr3\n\t" : : "r" (uCR3));
1260# endif
1261# else
1262 __asm
1263 {
1264# ifdef RT_ARCH_AMD64
1265 mov rax, [uCR3]
1266 mov cr3, rax
1267# else
1268 mov eax, [uCR3]
1269 mov cr3, eax
1270# endif
1271 }
1272# endif
1273}
1274#endif
1275
1276
1277/**
1278 * Reloads the CR3 register.
1279 */
1280#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1281DECLASM(void) ASMReloadCR3(void);
1282#else
1283DECLINLINE(void) ASMReloadCR3(void)
1284{
1285# if RT_INLINE_ASM_USES_INTRIN
1286 __writecr3(__readcr3());
1287
1288# elif RT_INLINE_ASM_GNU_STYLE
1289 RTCCUINTREG u;
1290# ifdef RT_ARCH_AMD64
1291 __asm__ __volatile__("movq %%cr3, %0\n\t"
1292 "movq %0, %%cr3\n\t"
1293 : "=r" (u));
1294# else
1295 __asm__ __volatile__("movl %%cr3, %0\n\t"
1296 "movl %0, %%cr3\n\t"
1297 : "=r" (u));
1298# endif
1299# else
1300 __asm
1301 {
1302# ifdef RT_ARCH_AMD64
1303 mov rax, cr3
1304 mov cr3, rax
1305# else
1306 mov eax, cr3
1307 mov cr3, eax
1308# endif
1309 }
1310# endif
1311}
1312#endif
1313
1314
1315/**
1316 * Get cr4.
1317 * @returns cr4.
1318 */
1319#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1320DECLASM(RTCCUINTREG) ASMGetCR4(void);
1321#else
1322DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1323{
1324 RTCCUINTREG uCR4;
1325# if RT_INLINE_ASM_USES_INTRIN
1326 uCR4 = __readcr4();
1327
1328# elif RT_INLINE_ASM_GNU_STYLE
1329# ifdef RT_ARCH_AMD64
1330 __asm__ __volatile__("movq %%cr4, %0\t\n" : "=r" (uCR4));
1331# else
1332 __asm__ __volatile__("movl %%cr4, %0\t\n" : "=r" (uCR4));
1333# endif
1334# else
1335 __asm
1336 {
1337# ifdef RT_ARCH_AMD64
1338 mov rax, cr4
1339 mov [uCR4], rax
1340# else
1341 push eax /* just in case */
1342 /*mov eax, cr4*/
1343 _emit 0x0f
1344 _emit 0x20
1345 _emit 0xe0
1346 mov [uCR4], eax
1347 pop eax
1348# endif
1349 }
1350# endif
1351 return uCR4;
1352}
1353#endif
1354
1355
1356/**
1357 * Sets the CR4 register.
1358 *
1359 * @param uCR4 New CR4 value.
1360 */
1361#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1362DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1363#else
1364DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1365{
1366# if RT_INLINE_ASM_USES_INTRIN
1367 __writecr4(uCR4);
1368
1369# elif RT_INLINE_ASM_GNU_STYLE
1370# ifdef RT_ARCH_AMD64
1371 __asm__ __volatile__("movq %0, %%cr4\n\t" : : "r" (uCR4));
1372# else
1373 __asm__ __volatile__("movl %0, %%cr4\n\t" : : "r" (uCR4));
1374# endif
1375# else
1376 __asm
1377 {
1378# ifdef RT_ARCH_AMD64
1379 mov rax, [uCR4]
1380 mov cr4, rax
1381# else
1382 mov eax, [uCR4]
1383 _emit 0x0F
1384 _emit 0x22
1385 _emit 0xE0 /* mov cr4, eax */
1386# endif
1387 }
1388# endif
1389}
1390#endif
1391
1392
1393/**
1394 * Get cr8.
1395 * @returns cr8.
1396 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1397 */
1398#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1399DECLASM(RTCCUINTREG) ASMGetCR8(void);
1400#else
1401DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1402{
1403# ifdef RT_ARCH_AMD64
1404 RTCCUINTREG uCR8;
1405# if RT_INLINE_ASM_USES_INTRIN
1406 uCR8 = __readcr8();
1407
1408# elif RT_INLINE_ASM_GNU_STYLE
1409 __asm__ __volatile__("movq %%cr8, %0\t\n" : "=r" (uCR8));
1410# else
1411 __asm
1412 {
1413 mov rax, cr8
1414 mov [uCR8], rax
1415 }
1416# endif
1417 return uCR8;
1418# else /* !RT_ARCH_AMD64 */
1419 return 0;
1420# endif /* !RT_ARCH_AMD64 */
1421}
1422#endif
1423
1424
1425/**
1426 * Enables interrupts (EFLAGS.IF).
1427 */
1428#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1429DECLASM(void) ASMIntEnable(void);
1430#else
1431DECLINLINE(void) ASMIntEnable(void)
1432{
1433# if RT_INLINE_ASM_GNU_STYLE
1434 __asm("sti\n");
1435# elif RT_INLINE_ASM_USES_INTRIN
1436 _enable();
1437# else
1438 __asm sti
1439# endif
1440}
1441#endif
1442
1443
1444/**
1445 * Disables interrupts (!EFLAGS.IF).
1446 */
1447#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1448DECLASM(void) ASMIntDisable(void);
1449#else
1450DECLINLINE(void) ASMIntDisable(void)
1451{
1452# if RT_INLINE_ASM_GNU_STYLE
1453 __asm("cli\n");
1454# elif RT_INLINE_ASM_USES_INTRIN
1455 _disable();
1456# else
1457 __asm cli
1458# endif
1459}
1460#endif
1461
1462
1463/**
1464 * Disables interrupts and returns previous xFLAGS.
1465 */
1466#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1467DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1468#else
1469DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1470{
1471 RTCCUINTREG xFlags;
1472# if RT_INLINE_ASM_GNU_STYLE
1473# ifdef RT_ARCH_AMD64
1474 __asm__ __volatile__("pushfq\n\t"
1475 "cli\n\t"
1476 "popq %0\n\t"
1477 : "=rm" (xFlags));
1478# else
1479 __asm__ __volatile__("pushfl\n\t"
1480 "cli\n\t"
1481 "popl %0\n\t"
1482 : "=rm" (xFlags));
1483# endif
1484# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1485 xFlags = ASMGetFlags();
1486 _disable();
1487# else
1488 __asm {
1489 pushfd
1490 cli
1491 pop [xFlags]
1492 }
1493# endif
1494 return xFlags;
1495}
1496#endif
1497
1498
1499/**
1500 * Halts the CPU until interrupted.
1501 */
1502#if RT_INLINE_ASM_EXTERNAL
1503DECLASM(void) ASMHalt(void);
1504#else
1505DECLINLINE(void) ASMHalt(void)
1506{
1507# if RT_INLINE_ASM_GNU_STYLE
1508 __asm__ __volatile__("hlt\n\t");
1509# else
1510 __asm {
1511 hlt
1512 }
1513# endif
1514}
1515#endif
1516
1517
1518/**
1519 * Reads a machine specific register.
1520 *
1521 * @returns Register content.
1522 * @param uRegister Register to read.
1523 */
1524#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1525DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1526#else
1527DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1528{
1529 RTUINT64U u;
1530# if RT_INLINE_ASM_GNU_STYLE
1531 __asm__ __volatile__("rdmsr\n\t"
1532 : "=a" (u.s.Lo),
1533 "=d" (u.s.Hi)
1534 : "c" (uRegister));
1535
1536# elif RT_INLINE_ASM_USES_INTRIN
1537 u.u = __readmsr(uRegister);
1538
1539# else
1540 __asm
1541 {
1542 mov ecx, [uRegister]
1543 rdmsr
1544 mov [u.s.Lo], eax
1545 mov [u.s.Hi], edx
1546 }
1547# endif
1548
1549 return u.u;
1550}
1551#endif
1552
1553
1554/**
1555 * Writes a machine specific register.
1556 *
1557 * @returns Register content.
1558 * @param uRegister Register to write to.
1559 * @param u64Val Value to write.
1560 */
1561#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1562DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1563#else
1564DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1565{
1566 RTUINT64U u;
1567
1568 u.u = u64Val;
1569# if RT_INLINE_ASM_GNU_STYLE
1570 __asm__ __volatile__("wrmsr\n\t"
1571 ::"a" (u.s.Lo),
1572 "d" (u.s.Hi),
1573 "c" (uRegister));
1574
1575# elif RT_INLINE_ASM_USES_INTRIN
1576 __writemsr(uRegister, u.u);
1577
1578# else
1579 __asm
1580 {
1581 mov ecx, [uRegister]
1582 mov edx, [u.s.Hi]
1583 mov eax, [u.s.Lo]
1584 wrmsr
1585 }
1586# endif
1587}
1588#endif
1589
1590
1591/**
1592 * Reads low part of a machine specific register.
1593 *
1594 * @returns Register content.
1595 * @param uRegister Register to read.
1596 */
1597#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1598DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1599#else
1600DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1601{
1602 uint32_t u32;
1603# if RT_INLINE_ASM_GNU_STYLE
1604 __asm__ __volatile__("rdmsr\n\t"
1605 : "=a" (u32)
1606 : "c" (uRegister)
1607 : "edx");
1608
1609# elif RT_INLINE_ASM_USES_INTRIN
1610 u32 = (uint32_t)__readmsr(uRegister);
1611
1612#else
1613 __asm
1614 {
1615 mov ecx, [uRegister]
1616 rdmsr
1617 mov [u32], eax
1618 }
1619# endif
1620
1621 return u32;
1622}
1623#endif
1624
1625
1626/**
1627 * Reads high part of a machine specific register.
1628 *
1629 * @returns Register content.
1630 * @param uRegister Register to read.
1631 */
1632#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1633DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1634#else
1635DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1636{
1637 uint32_t u32;
1638# if RT_INLINE_ASM_GNU_STYLE
1639 __asm__ __volatile__("rdmsr\n\t"
1640 : "=d" (u32)
1641 : "c" (uRegister)
1642 : "eax");
1643
1644# elif RT_INLINE_ASM_USES_INTRIN
1645 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1646
1647# else
1648 __asm
1649 {
1650 mov ecx, [uRegister]
1651 rdmsr
1652 mov [u32], edx
1653 }
1654# endif
1655
1656 return u32;
1657}
1658#endif
1659
1660
1661/**
1662 * Gets dr0.
1663 *
1664 * @returns dr0.
1665 */
1666#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1667DECLASM(RTCCUINTREG) ASMGetDR0(void);
1668#else
1669DECLINLINE(RTCCUINTREG) ASMGetDR0(void)
1670{
1671 RTCCUINTREG uDR0;
1672# if RT_INLINE_ASM_USES_INTRIN
1673 uDR0 = __readdr(0);
1674# elif RT_INLINE_ASM_GNU_STYLE
1675# ifdef RT_ARCH_AMD64
1676 __asm__ __volatile__("movq %%dr0, %0\n\t" : "=r" (uDR0));
1677# else
1678 __asm__ __volatile__("movl %%dr0, %0\n\t" : "=r" (uDR0));
1679# endif
1680# else
1681 __asm
1682 {
1683# ifdef RT_ARCH_AMD64
1684 mov rax, dr0
1685 mov [uDR0], rax
1686# else
1687 mov eax, dr0
1688 mov [uDR0], eax
1689# endif
1690 }
1691# endif
1692 return uDR0;
1693}
1694#endif
1695
1696
1697/**
1698 * Gets dr1.
1699 *
1700 * @returns dr1.
1701 */
1702#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1703DECLASM(RTCCUINTREG) ASMGetDR1(void);
1704#else
1705DECLINLINE(RTCCUINTREG) ASMGetDR1(void)
1706{
1707 RTCCUINTREG uDR1;
1708# if RT_INLINE_ASM_USES_INTRIN
1709 uDR1 = __readdr(1);
1710# elif RT_INLINE_ASM_GNU_STYLE
1711# ifdef RT_ARCH_AMD64
1712 __asm__ __volatile__("movq %%dr1, %0\n\t" : "=r" (uDR1));
1713# else
1714 __asm__ __volatile__("movl %%dr1, %0\n\t" : "=r" (uDR1));
1715# endif
1716# else
1717 __asm
1718 {
1719# ifdef RT_ARCH_AMD64
1720 mov rax, dr1
1721 mov [uDR1], rax
1722# else
1723 mov eax, dr1
1724 mov [uDR1], eax
1725# endif
1726 }
1727# endif
1728 return uDR1;
1729}
1730#endif
1731
1732
1733/**
1734 * Gets dr2.
1735 *
1736 * @returns dr2.
1737 */
1738#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1739DECLASM(RTCCUINTREG) ASMGetDR2(void);
1740#else
1741DECLINLINE(RTCCUINTREG) ASMGetDR2(void)
1742{
1743 RTCCUINTREG uDR2;
1744# if RT_INLINE_ASM_USES_INTRIN
1745 uDR2 = __readdr(2);
1746# elif RT_INLINE_ASM_GNU_STYLE
1747# ifdef RT_ARCH_AMD64
1748 __asm__ __volatile__("movq %%dr2, %0\n\t" : "=r" (uDR2));
1749# else
1750 __asm__ __volatile__("movl %%dr2, %0\n\t" : "=r" (uDR2));
1751# endif
1752# else
1753 __asm
1754 {
1755# ifdef RT_ARCH_AMD64
1756 mov rax, dr2
1757 mov [uDR2], rax
1758# else
1759 mov eax, dr2
1760 mov [uDR2], eax
1761# endif
1762 }
1763# endif
1764 return uDR2;
1765}
1766#endif
1767
1768
1769/**
1770 * Gets dr3.
1771 *
1772 * @returns dr3.
1773 */
1774#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1775DECLASM(RTCCUINTREG) ASMGetDR3(void);
1776#else
1777DECLINLINE(RTCCUINTREG) ASMGetDR3(void)
1778{
1779 RTCCUINTREG uDR3;
1780# if RT_INLINE_ASM_USES_INTRIN
1781 uDR3 = __readdr(3);
1782# elif RT_INLINE_ASM_GNU_STYLE
1783# ifdef RT_ARCH_AMD64
1784 __asm__ __volatile__("movq %%dr3, %0\n\t" : "=r" (uDR3));
1785# else
1786 __asm__ __volatile__("movl %%dr3, %0\n\t" : "=r" (uDR3));
1787# endif
1788# else
1789 __asm
1790 {
1791# ifdef RT_ARCH_AMD64
1792 mov rax, dr3
1793 mov [uDR3], rax
1794# else
1795 mov eax, dr3
1796 mov [uDR3], eax
1797# endif
1798 }
1799# endif
1800 return uDR3;
1801}
1802#endif
1803
1804
1805/**
1806 * Gets dr6.
1807 *
1808 * @returns dr6.
1809 */
1810#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1811DECLASM(RTCCUINTREG) ASMGetDR6(void);
1812#else
1813DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1814{
1815 RTCCUINTREG uDR6;
1816# if RT_INLINE_ASM_USES_INTRIN
1817 uDR6 = __readdr(6);
1818# elif RT_INLINE_ASM_GNU_STYLE
1819# ifdef RT_ARCH_AMD64
1820 __asm__ __volatile__("movq %%dr6, %0\n\t" : "=r" (uDR6));
1821# else
1822 __asm__ __volatile__("movl %%dr6, %0\n\t" : "=r" (uDR6));
1823# endif
1824# else
1825 __asm
1826 {
1827# ifdef RT_ARCH_AMD64
1828 mov rax, dr6
1829 mov [uDR6], rax
1830# else
1831 mov eax, dr6
1832 mov [uDR6], eax
1833# endif
1834 }
1835# endif
1836 return uDR6;
1837}
1838#endif
1839
1840
1841/**
1842 * Reads and clears DR6.
1843 *
1844 * @returns DR6.
1845 */
1846#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1847DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1848#else
1849DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1850{
1851 RTCCUINTREG uDR6;
1852# if RT_INLINE_ASM_USES_INTRIN
1853 uDR6 = __readdr(6);
1854 __writedr(6, 0xffff0ff0U); /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1855# elif RT_INLINE_ASM_GNU_STYLE
1856 RTCCUINTREG uNewValue = 0xffff0ff0U;/* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1857# ifdef RT_ARCH_AMD64
1858 __asm__ __volatile__("movq %%dr6, %0\n\t"
1859 "movq %1, %%dr6\n\t"
1860 : "=r" (uDR6)
1861 : "r" (uNewValue));
1862# else
1863 __asm__ __volatile__("movl %%dr6, %0\n\t"
1864 "movl %1, %%dr6\n\t"
1865 : "=r" (uDR6)
1866 : "r" (uNewValue));
1867# endif
1868# else
1869 __asm
1870 {
1871# ifdef RT_ARCH_AMD64
1872 mov rax, dr6
1873 mov [uDR6], rax
1874 mov rcx, rax
1875 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1876 mov dr6, rcx
1877# else
1878 mov eax, dr6
1879 mov [uDR6], eax
1880 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1881 mov dr6, ecx
1882# endif
1883 }
1884# endif
1885 return uDR6;
1886}
1887#endif
1888
1889
1890/**
1891 * Gets dr7.
1892 *
1893 * @returns dr7.
1894 */
1895#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1896DECLASM(RTCCUINTREG) ASMGetDR7(void);
1897#else
1898DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1899{
1900 RTCCUINTREG uDR7;
1901# if RT_INLINE_ASM_USES_INTRIN
1902 uDR7 = __readdr(7);
1903# elif RT_INLINE_ASM_GNU_STYLE
1904# ifdef RT_ARCH_AMD64
1905 __asm__ __volatile__("movq %%dr7, %0\n\t" : "=r" (uDR7));
1906# else
1907 __asm__ __volatile__("movl %%dr7, %0\n\t" : "=r" (uDR7));
1908# endif
1909# else
1910 __asm
1911 {
1912# ifdef RT_ARCH_AMD64
1913 mov rax, dr7
1914 mov [uDR7], rax
1915# else
1916 mov eax, dr7
1917 mov [uDR7], eax
1918# endif
1919 }
1920# endif
1921 return uDR7;
1922}
1923#endif
1924
1925
1926/**
1927 * Sets dr0.
1928 *
1929 * @param uDRVal Debug register value to write
1930 */
1931#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1932DECLASM(void) ASMSetDR0(RTCCUINTREG uDRVal);
1933#else
1934DECLINLINE(void) ASMSetDR0(RTCCUINTREG uDRVal)
1935{
1936# if RT_INLINE_ASM_USES_INTRIN
1937 __writedr(0, uDRVal);
1938# elif RT_INLINE_ASM_GNU_STYLE
1939# ifdef RT_ARCH_AMD64
1940 __asm__ __volatile__("movq %0, %%dr0\n\t" : : "r" (uDRVal));
1941# else
1942 __asm__ __volatile__("movl %0, %%dr0\n\t" : : "r" (uDRVal));
1943# endif
1944# else
1945 __asm
1946 {
1947# ifdef RT_ARCH_AMD64
1948 mov rax, [uDRVal]
1949 mov dr0, rax
1950# else
1951 mov eax, [uDRVal]
1952 mov dr0, eax
1953# endif
1954 }
1955# endif
1956}
1957#endif
1958
1959
1960/**
1961 * Sets dr1.
1962 *
1963 * @param uDRVal Debug register value to write
1964 */
1965#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1966DECLASM(void) ASMSetDR1(RTCCUINTREG uDRVal);
1967#else
1968DECLINLINE(void) ASMSetDR1(RTCCUINTREG uDRVal)
1969{
1970# if RT_INLINE_ASM_USES_INTRIN
1971 __writedr(1, uDRVal);
1972# elif RT_INLINE_ASM_GNU_STYLE
1973# ifdef RT_ARCH_AMD64
1974 __asm__ __volatile__("movq %0, %%dr1\n\t" : : "r" (uDRVal));
1975# else
1976 __asm__ __volatile__("movl %0, %%dr1\n\t" : : "r" (uDRVal));
1977# endif
1978# else
1979 __asm
1980 {
1981# ifdef RT_ARCH_AMD64
1982 mov rax, [uDRVal]
1983 mov dr1, rax
1984# else
1985 mov eax, [uDRVal]
1986 mov dr1, eax
1987# endif
1988 }
1989# endif
1990}
1991#endif
1992
1993
1994/**
1995 * Sets dr2.
1996 *
1997 * @param uDRVal Debug register value to write
1998 */
1999#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2000DECLASM(void) ASMSetDR2(RTCCUINTREG uDRVal);
2001#else
2002DECLINLINE(void) ASMSetDR2(RTCCUINTREG uDRVal)
2003{
2004# if RT_INLINE_ASM_USES_INTRIN
2005 __writedr(2, uDRVal);
2006# elif RT_INLINE_ASM_GNU_STYLE
2007# ifdef RT_ARCH_AMD64
2008 __asm__ __volatile__("movq %0, %%dr2\n\t" : : "r" (uDRVal));
2009# else
2010 __asm__ __volatile__("movl %0, %%dr2\n\t" : : "r" (uDRVal));
2011# endif
2012# else
2013 __asm
2014 {
2015# ifdef RT_ARCH_AMD64
2016 mov rax, [uDRVal]
2017 mov dr2, rax
2018# else
2019 mov eax, [uDRVal]
2020 mov dr2, eax
2021# endif
2022 }
2023# endif
2024}
2025#endif
2026
2027
2028/**
2029 * Sets dr3.
2030 *
2031 * @param uDRVal Debug register value to write
2032 */
2033#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2034DECLASM(void) ASMSetDR3(RTCCUINTREG uDRVal);
2035#else
2036DECLINLINE(void) ASMSetDR3(RTCCUINTREG uDRVal)
2037{
2038# if RT_INLINE_ASM_USES_INTRIN
2039 __writedr(3, uDRVal);
2040# elif RT_INLINE_ASM_GNU_STYLE
2041# ifdef RT_ARCH_AMD64
2042 __asm__ __volatile__("movq %0, %%dr3\n\t" : : "r" (uDRVal));
2043# else
2044 __asm__ __volatile__("movl %0, %%dr3\n\t" : : "r" (uDRVal));
2045# endif
2046# else
2047 __asm
2048 {
2049# ifdef RT_ARCH_AMD64
2050 mov rax, [uDRVal]
2051 mov dr3, rax
2052# else
2053 mov eax, [uDRVal]
2054 mov dr3, eax
2055# endif
2056 }
2057# endif
2058}
2059#endif
2060
2061
2062/**
2063 * Sets dr6.
2064 *
2065 * @param uDRVal Debug register value to write
2066 */
2067#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2068DECLASM(void) ASMSetDR6(RTCCUINTREG uDRVal);
2069#else
2070DECLINLINE(void) ASMSetDR6(RTCCUINTREG uDRVal)
2071{
2072# if RT_INLINE_ASM_USES_INTRIN
2073 __writedr(6, uDRVal);
2074# elif RT_INLINE_ASM_GNU_STYLE
2075# ifdef RT_ARCH_AMD64
2076 __asm__ __volatile__("movq %0, %%dr6\n\t" : : "r" (uDRVal));
2077# else
2078 __asm__ __volatile__("movl %0, %%dr6\n\t" : : "r" (uDRVal));
2079# endif
2080# else
2081 __asm
2082 {
2083# ifdef RT_ARCH_AMD64
2084 mov rax, [uDRVal]
2085 mov dr6, rax
2086# else
2087 mov eax, [uDRVal]
2088 mov dr6, eax
2089# endif
2090 }
2091# endif
2092}
2093#endif
2094
2095
2096/**
2097 * Sets dr7.
2098 *
2099 * @param uDRVal Debug register value to write
2100 */
2101#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2102DECLASM(void) ASMSetDR7(RTCCUINTREG uDRVal);
2103#else
2104DECLINLINE(void) ASMSetDR7(RTCCUINTREG uDRVal)
2105{
2106# if RT_INLINE_ASM_USES_INTRIN
2107 __writedr(7, uDRVal);
2108# elif RT_INLINE_ASM_GNU_STYLE
2109# ifdef RT_ARCH_AMD64
2110 __asm__ __volatile__("movq %0, %%dr7\n\t" : : "r" (uDRVal));
2111# else
2112 __asm__ __volatile__("movl %0, %%dr7\n\t" : : "r" (uDRVal));
2113# endif
2114# else
2115 __asm
2116 {
2117# ifdef RT_ARCH_AMD64
2118 mov rax, [uDRVal]
2119 mov dr7, rax
2120# else
2121 mov eax, [uDRVal]
2122 mov dr7, eax
2123# endif
2124 }
2125# endif
2126}
2127#endif
2128
2129
2130/**
2131 * Compiler memory barrier.
2132 *
2133 * Ensure that the compiler does not use any cached (register/tmp stack) memory
2134 * values or any outstanding writes when returning from this function.
2135 *
2136 * This function must be used if non-volatile data is modified by a
2137 * device or the VMM. Typical cases are port access, MMIO access,
2138 * trapping instruction, etc.
2139 */
2140#if RT_INLINE_ASM_GNU_STYLE
2141# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
2142#elif RT_INLINE_ASM_USES_INTRIN
2143# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
2144#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
2145DECLINLINE(void) ASMCompilerBarrier(void)
2146{
2147 __asm
2148 {
2149 }
2150}
2151#endif
2152
2153
2154/**
2155 * Writes a 8-bit unsigned integer to an I/O port, ordered.
2156 *
2157 * @param Port I/O port to write to.
2158 * @param u8 8-bit integer to write.
2159 */
2160#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2161DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
2162#else
2163DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
2164{
2165# if RT_INLINE_ASM_GNU_STYLE
2166 __asm__ __volatile__("outb %b1, %w0\n\t"
2167 :: "Nd" (Port),
2168 "a" (u8));
2169
2170# elif RT_INLINE_ASM_USES_INTRIN
2171 __outbyte(Port, u8);
2172
2173# else
2174 __asm
2175 {
2176 mov dx, [Port]
2177 mov al, [u8]
2178 out dx, al
2179 }
2180# endif
2181}
2182#endif
2183
2184
2185/**
2186 * Reads a 8-bit unsigned integer from an I/O port, ordered.
2187 *
2188 * @returns 8-bit integer.
2189 * @param Port I/O port to read from.
2190 */
2191#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2192DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
2193#else
2194DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
2195{
2196 uint8_t u8;
2197# if RT_INLINE_ASM_GNU_STYLE
2198 __asm__ __volatile__("inb %w1, %b0\n\t"
2199 : "=a" (u8)
2200 : "Nd" (Port));
2201
2202# elif RT_INLINE_ASM_USES_INTRIN
2203 u8 = __inbyte(Port);
2204
2205# else
2206 __asm
2207 {
2208 mov dx, [Port]
2209 in al, dx
2210 mov [u8], al
2211 }
2212# endif
2213 return u8;
2214}
2215#endif
2216
2217
2218/**
2219 * Writes a 16-bit unsigned integer to an I/O port, ordered.
2220 *
2221 * @param Port I/O port to write to.
2222 * @param u16 16-bit integer to write.
2223 */
2224#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2225DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
2226#else
2227DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
2228{
2229# if RT_INLINE_ASM_GNU_STYLE
2230 __asm__ __volatile__("outw %w1, %w0\n\t"
2231 :: "Nd" (Port),
2232 "a" (u16));
2233
2234# elif RT_INLINE_ASM_USES_INTRIN
2235 __outword(Port, u16);
2236
2237# else
2238 __asm
2239 {
2240 mov dx, [Port]
2241 mov ax, [u16]
2242 out dx, ax
2243 }
2244# endif
2245}
2246#endif
2247
2248
2249/**
2250 * Reads a 16-bit unsigned integer from an I/O port, ordered.
2251 *
2252 * @returns 16-bit integer.
2253 * @param Port I/O port to read from.
2254 */
2255#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2256DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
2257#else
2258DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
2259{
2260 uint16_t u16;
2261# if RT_INLINE_ASM_GNU_STYLE
2262 __asm__ __volatile__("inw %w1, %w0\n\t"
2263 : "=a" (u16)
2264 : "Nd" (Port));
2265
2266# elif RT_INLINE_ASM_USES_INTRIN
2267 u16 = __inword(Port);
2268
2269# else
2270 __asm
2271 {
2272 mov dx, [Port]
2273 in ax, dx
2274 mov [u16], ax
2275 }
2276# endif
2277 return u16;
2278}
2279#endif
2280
2281
2282/**
2283 * Writes a 32-bit unsigned integer to an I/O port, ordered.
2284 *
2285 * @param Port I/O port to write to.
2286 * @param u32 32-bit integer to write.
2287 */
2288#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2289DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
2290#else
2291DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
2292{
2293# if RT_INLINE_ASM_GNU_STYLE
2294 __asm__ __volatile__("outl %1, %w0\n\t"
2295 :: "Nd" (Port),
2296 "a" (u32));
2297
2298# elif RT_INLINE_ASM_USES_INTRIN
2299 __outdword(Port, u32);
2300
2301# else
2302 __asm
2303 {
2304 mov dx, [Port]
2305 mov eax, [u32]
2306 out dx, eax
2307 }
2308# endif
2309}
2310#endif
2311
2312
2313/**
2314 * Reads a 32-bit unsigned integer from an I/O port, ordered.
2315 *
2316 * @returns 32-bit integer.
2317 * @param Port I/O port to read from.
2318 */
2319#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2320DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
2321#else
2322DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
2323{
2324 uint32_t u32;
2325# if RT_INLINE_ASM_GNU_STYLE
2326 __asm__ __volatile__("inl %w1, %0\n\t"
2327 : "=a" (u32)
2328 : "Nd" (Port));
2329
2330# elif RT_INLINE_ASM_USES_INTRIN
2331 u32 = __indword(Port);
2332
2333# else
2334 __asm
2335 {
2336 mov dx, [Port]
2337 in eax, dx
2338 mov [u32], eax
2339 }
2340# endif
2341 return u32;
2342}
2343#endif
2344
2345
2346/**
2347 * Writes a string of 8-bit unsigned integer items to an I/O port, ordered.
2348 *
2349 * @param Port I/O port to write to.
2350 * @param pau8 Pointer to the string buffer.
2351 * @param c The number of items to write.
2352 */
2353#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2354DECLASM(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c);
2355#else
2356DECLINLINE(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c)
2357{
2358# if RT_INLINE_ASM_GNU_STYLE
2359 __asm__ __volatile__("rep; outsb\n\t"
2360 : "+S" (pau8),
2361 "+c" (c)
2362 : "d" (Port));
2363
2364# elif RT_INLINE_ASM_USES_INTRIN
2365 __outbytestring(Port, (unsigned char *)pau8, (unsigned long)c);
2366
2367# else
2368 __asm
2369 {
2370 mov dx, [Port]
2371 mov ecx, [c]
2372 mov eax, [pau8]
2373 xchg esi, eax
2374 rep outsb
2375 xchg esi, eax
2376 }
2377# endif
2378}
2379#endif
2380
2381
2382/**
2383 * Reads a string of 8-bit unsigned integer items from an I/O port, ordered.
2384 *
2385 * @param Port I/O port to read from.
2386 * @param pau8 Pointer to the string buffer (output).
2387 * @param c The number of items to read.
2388 */
2389#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2390DECLASM(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c);
2391#else
2392DECLINLINE(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c)
2393{
2394# if RT_INLINE_ASM_GNU_STYLE
2395 __asm__ __volatile__("rep; insb\n\t"
2396 : "+D" (pau8),
2397 "+c" (c)
2398 : "d" (Port));
2399
2400# elif RT_INLINE_ASM_USES_INTRIN
2401 __inbytestring(Port, pau8, (unsigned long)c);
2402
2403# else
2404 __asm
2405 {
2406 mov dx, [Port]
2407 mov ecx, [c]
2408 mov eax, [pau8]
2409 xchg edi, eax
2410 rep insb
2411 xchg edi, eax
2412 }
2413# endif
2414}
2415#endif
2416
2417
2418/**
2419 * Writes a string of 16-bit unsigned integer items to an I/O port, ordered.
2420 *
2421 * @param Port I/O port to write to.
2422 * @param pau16 Pointer to the string buffer.
2423 * @param c The number of items to write.
2424 */
2425#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2426DECLASM(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c);
2427#else
2428DECLINLINE(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c)
2429{
2430# if RT_INLINE_ASM_GNU_STYLE
2431 __asm__ __volatile__("rep; outsw\n\t"
2432 : "+S" (pau16),
2433 "+c" (c)
2434 : "d" (Port));
2435
2436# elif RT_INLINE_ASM_USES_INTRIN
2437 __outwordstring(Port, (unsigned short *)pau16, (unsigned long)c);
2438
2439# else
2440 __asm
2441 {
2442 mov dx, [Port]
2443 mov ecx, [c]
2444 mov eax, [pau16]
2445 xchg esi, eax
2446 rep outsw
2447 xchg esi, eax
2448 }
2449# endif
2450}
2451#endif
2452
2453
2454/**
2455 * Reads a string of 16-bit unsigned integer items from an I/O port, ordered.
2456 *
2457 * @param Port I/O port to read from.
2458 * @param pau16 Pointer to the string buffer (output).
2459 * @param c The number of items to read.
2460 */
2461#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2462DECLASM(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c);
2463#else
2464DECLINLINE(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c)
2465{
2466# if RT_INLINE_ASM_GNU_STYLE
2467 __asm__ __volatile__("rep; insw\n\t"
2468 : "+D" (pau16),
2469 "+c" (c)
2470 : "d" (Port));
2471
2472# elif RT_INLINE_ASM_USES_INTRIN
2473 __inwordstring(Port, pau16, (unsigned long)c);
2474
2475# else
2476 __asm
2477 {
2478 mov dx, [Port]
2479 mov ecx, [c]
2480 mov eax, [pau16]
2481 xchg edi, eax
2482 rep insw
2483 xchg edi, eax
2484 }
2485# endif
2486}
2487#endif
2488
2489
2490/**
2491 * Writes a string of 32-bit unsigned integer items to an I/O port, ordered.
2492 *
2493 * @param Port I/O port to write to.
2494 * @param pau32 Pointer to the string buffer.
2495 * @param c The number of items to write.
2496 */
2497#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2498DECLASM(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c);
2499#else
2500DECLINLINE(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c)
2501{
2502# if RT_INLINE_ASM_GNU_STYLE
2503 __asm__ __volatile__("rep; outsl\n\t"
2504 : "+S" (pau32),
2505 "+c" (c)
2506 : "d" (Port));
2507
2508# elif RT_INLINE_ASM_USES_INTRIN
2509 __outdwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2510
2511# else
2512 __asm
2513 {
2514 mov dx, [Port]
2515 mov ecx, [c]
2516 mov eax, [pau32]
2517 xchg esi, eax
2518 rep outsd
2519 xchg esi, eax
2520 }
2521# endif
2522}
2523#endif
2524
2525
2526/**
2527 * Reads a string of 32-bit unsigned integer items from an I/O port, ordered.
2528 *
2529 * @param Port I/O port to read from.
2530 * @param pau32 Pointer to the string buffer (output).
2531 * @param c The number of items to read.
2532 */
2533#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2534DECLASM(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c);
2535#else
2536DECLINLINE(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c)
2537{
2538# if RT_INLINE_ASM_GNU_STYLE
2539 __asm__ __volatile__("rep; insl\n\t"
2540 : "+D" (pau32),
2541 "+c" (c)
2542 : "d" (Port));
2543
2544# elif RT_INLINE_ASM_USES_INTRIN
2545 __indwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2546
2547# else
2548 __asm
2549 {
2550 mov dx, [Port]
2551 mov ecx, [c]
2552 mov eax, [pau32]
2553 xchg edi, eax
2554 rep insd
2555 xchg edi, eax
2556 }
2557# endif
2558}
2559#endif
2560
2561
2562/**
2563 * Atomically Exchange an unsigned 8-bit value, ordered.
2564 *
2565 * @returns Current *pu8 value
2566 * @param pu8 Pointer to the 8-bit variable to update.
2567 * @param u8 The 8-bit value to assign to *pu8.
2568 */
2569#if RT_INLINE_ASM_EXTERNAL
2570DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
2571#else
2572DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
2573{
2574# if RT_INLINE_ASM_GNU_STYLE
2575 __asm__ __volatile__("xchgb %0, %1\n\t"
2576 : "=m" (*pu8),
2577 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
2578 : "1" (u8),
2579 "m" (*pu8));
2580# else
2581 __asm
2582 {
2583# ifdef RT_ARCH_AMD64
2584 mov rdx, [pu8]
2585 mov al, [u8]
2586 xchg [rdx], al
2587 mov [u8], al
2588# else
2589 mov edx, [pu8]
2590 mov al, [u8]
2591 xchg [edx], al
2592 mov [u8], al
2593# endif
2594 }
2595# endif
2596 return u8;
2597}
2598#endif
2599
2600
2601/**
2602 * Atomically Exchange a signed 8-bit value, ordered.
2603 *
2604 * @returns Current *pu8 value
2605 * @param pi8 Pointer to the 8-bit variable to update.
2606 * @param i8 The 8-bit value to assign to *pi8.
2607 */
2608DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
2609{
2610 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
2611}
2612
2613
2614/**
2615 * Atomically Exchange a bool value, ordered.
2616 *
2617 * @returns Current *pf value
2618 * @param pf Pointer to the 8-bit variable to update.
2619 * @param f The 8-bit value to assign to *pi8.
2620 */
2621DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
2622{
2623#ifdef _MSC_VER
2624 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2625#else
2626 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2627#endif
2628}
2629
2630
2631/**
2632 * Atomically Exchange an unsigned 16-bit value, ordered.
2633 *
2634 * @returns Current *pu16 value
2635 * @param pu16 Pointer to the 16-bit variable to update.
2636 * @param u16 The 16-bit value to assign to *pu16.
2637 */
2638#if RT_INLINE_ASM_EXTERNAL
2639DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
2640#else
2641DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
2642{
2643# if RT_INLINE_ASM_GNU_STYLE
2644 __asm__ __volatile__("xchgw %0, %1\n\t"
2645 : "=m" (*pu16),
2646 "=r" (u16)
2647 : "1" (u16),
2648 "m" (*pu16));
2649# else
2650 __asm
2651 {
2652# ifdef RT_ARCH_AMD64
2653 mov rdx, [pu16]
2654 mov ax, [u16]
2655 xchg [rdx], ax
2656 mov [u16], ax
2657# else
2658 mov edx, [pu16]
2659 mov ax, [u16]
2660 xchg [edx], ax
2661 mov [u16], ax
2662# endif
2663 }
2664# endif
2665 return u16;
2666}
2667#endif
2668
2669
2670/**
2671 * Atomically Exchange a signed 16-bit value, ordered.
2672 *
2673 * @returns Current *pu16 value
2674 * @param pi16 Pointer to the 16-bit variable to update.
2675 * @param i16 The 16-bit value to assign to *pi16.
2676 */
2677DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
2678{
2679 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
2680}
2681
2682
2683/**
2684 * Atomically Exchange an unsigned 32-bit value, ordered.
2685 *
2686 * @returns Current *pu32 value
2687 * @param pu32 Pointer to the 32-bit variable to update.
2688 * @param u32 The 32-bit value to assign to *pu32.
2689 */
2690#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2691DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
2692#else
2693DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
2694{
2695# if RT_INLINE_ASM_GNU_STYLE
2696 __asm__ __volatile__("xchgl %0, %1\n\t"
2697 : "=m" (*pu32),
2698 "=r" (u32)
2699 : "1" (u32),
2700 "m" (*pu32));
2701
2702# elif RT_INLINE_ASM_USES_INTRIN
2703 u32 = _InterlockedExchange((long *)pu32, u32);
2704
2705# else
2706 __asm
2707 {
2708# ifdef RT_ARCH_AMD64
2709 mov rdx, [pu32]
2710 mov eax, u32
2711 xchg [rdx], eax
2712 mov [u32], eax
2713# else
2714 mov edx, [pu32]
2715 mov eax, u32
2716 xchg [edx], eax
2717 mov [u32], eax
2718# endif
2719 }
2720# endif
2721 return u32;
2722}
2723#endif
2724
2725
2726/**
2727 * Atomically Exchange a signed 32-bit value, ordered.
2728 *
2729 * @returns Current *pu32 value
2730 * @param pi32 Pointer to the 32-bit variable to update.
2731 * @param i32 The 32-bit value to assign to *pi32.
2732 */
2733DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
2734{
2735 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
2736}
2737
2738
2739/**
2740 * Atomically Exchange an unsigned 64-bit value, ordered.
2741 *
2742 * @returns Current *pu64 value
2743 * @param pu64 Pointer to the 64-bit variable to update.
2744 * @param u64 The 64-bit value to assign to *pu64.
2745 */
2746#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2747DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2748#else
2749DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2750{
2751# if defined(RT_ARCH_AMD64)
2752# if RT_INLINE_ASM_USES_INTRIN
2753 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2754
2755# elif RT_INLINE_ASM_GNU_STYLE
2756 __asm__ __volatile__("xchgq %0, %1\n\t"
2757 : "=m" (*pu64),
2758 "=r" (u64)
2759 : "1" (u64),
2760 "m" (*pu64));
2761# else
2762 __asm
2763 {
2764 mov rdx, [pu64]
2765 mov rax, [u64]
2766 xchg [rdx], rax
2767 mov [u64], rax
2768 }
2769# endif
2770# else /* !RT_ARCH_AMD64 */
2771# if RT_INLINE_ASM_GNU_STYLE
2772# if defined(PIC) || defined(__PIC__)
2773 uint32_t u32EBX = (uint32_t)u64;
2774 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2775 "xchgl %%ebx, %3\n\t"
2776 "1:\n\t"
2777 "lock; cmpxchg8b (%5)\n\t"
2778 "jnz 1b\n\t"
2779 "movl %3, %%ebx\n\t"
2780 /*"xchgl %%esi, %5\n\t"*/
2781 : "=A" (u64),
2782 "=m" (*pu64)
2783 : "0" (*pu64),
2784 "m" ( u32EBX ),
2785 "c" ( (uint32_t)(u64 >> 32) ),
2786 "S" (pu64));
2787# else /* !PIC */
2788 __asm__ __volatile__("1:\n\t"
2789 "lock; cmpxchg8b %1\n\t"
2790 "jnz 1b\n\t"
2791 : "=A" (u64),
2792 "=m" (*pu64)
2793 : "0" (*pu64),
2794 "b" ( (uint32_t)u64 ),
2795 "c" ( (uint32_t)(u64 >> 32) ));
2796# endif
2797# else
2798 __asm
2799 {
2800 mov ebx, dword ptr [u64]
2801 mov ecx, dword ptr [u64 + 4]
2802 mov edi, pu64
2803 mov eax, dword ptr [edi]
2804 mov edx, dword ptr [edi + 4]
2805 retry:
2806 lock cmpxchg8b [edi]
2807 jnz retry
2808 mov dword ptr [u64], eax
2809 mov dword ptr [u64 + 4], edx
2810 }
2811# endif
2812# endif /* !RT_ARCH_AMD64 */
2813 return u64;
2814}
2815#endif
2816
2817
2818/**
2819 * Atomically Exchange an signed 64-bit value, ordered.
2820 *
2821 * @returns Current *pi64 value
2822 * @param pi64 Pointer to the 64-bit variable to update.
2823 * @param i64 The 64-bit value to assign to *pi64.
2824 */
2825DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2826{
2827 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2828}
2829
2830
2831#ifdef RT_ARCH_AMD64
2832/**
2833 * Atomically Exchange an unsigned 128-bit value, ordered.
2834 *
2835 * @returns Current *pu128.
2836 * @param pu128 Pointer to the 128-bit variable to update.
2837 * @param u128 The 128-bit value to assign to *pu128.
2838 *
2839 * @remark We cannot really assume that any hardware supports this. Nor do I have
2840 * GAS support for it. So, for the time being we'll BREAK the atomic
2841 * bit of this function and use two 64-bit exchanges instead.
2842 */
2843# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2844DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2845# else
2846DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2847{
2848 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2849 {
2850 /** @todo this is clumsy code */
2851 RTUINT128U u128Ret;
2852 u128Ret.u = u128;
2853 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2854 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2855 return u128Ret.u;
2856 }
2857#if 0 /* later? */
2858 else
2859 {
2860# if RT_INLINE_ASM_GNU_STYLE
2861 __asm__ __volatile__("1:\n\t"
2862 "lock; cmpxchg8b %1\n\t"
2863 "jnz 1b\n\t"
2864 : "=A" (u128),
2865 "=m" (*pu128)
2866 : "0" (*pu128),
2867 "b" ( (uint64_t)u128 ),
2868 "c" ( (uint64_t)(u128 >> 64) ));
2869# else
2870 __asm
2871 {
2872 mov rbx, dword ptr [u128]
2873 mov rcx, dword ptr [u128 + 8]
2874 mov rdi, pu128
2875 mov rax, dword ptr [rdi]
2876 mov rdx, dword ptr [rdi + 8]
2877 retry:
2878 lock cmpxchg16b [rdi]
2879 jnz retry
2880 mov dword ptr [u128], rax
2881 mov dword ptr [u128 + 8], rdx
2882 }
2883# endif
2884 }
2885 return u128;
2886#endif
2887}
2888# endif
2889#endif /* RT_ARCH_AMD64 */
2890
2891
2892/**
2893 * Atomically Exchange a pointer value, ordered.
2894 *
2895 * @returns Current *ppv value
2896 * @param ppv Pointer to the pointer variable to update.
2897 * @param pv The pointer value to assign to *ppv.
2898 */
2899DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
2900{
2901#if ARCH_BITS == 32
2902 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2903#elif ARCH_BITS == 64
2904 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2905#else
2906# error "ARCH_BITS is bogus"
2907#endif
2908}
2909
2910
2911/**
2912 * Atomically Exchange a raw-mode context pointer value, ordered.
2913 *
2914 * @returns Current *ppv value
2915 * @param ppvRC Pointer to the pointer variable to update.
2916 * @param pvRC The pointer value to assign to *ppv.
2917 */
2918DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
2919{
2920 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
2921}
2922
2923
2924/**
2925 * Atomically Exchange a ring-0 pointer value, ordered.
2926 *
2927 * @returns Current *ppv value
2928 * @param ppvR0 Pointer to the pointer variable to update.
2929 * @param pvR0 The pointer value to assign to *ppv.
2930 */
2931DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
2932{
2933#if R0_ARCH_BITS == 32
2934 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
2935#elif R0_ARCH_BITS == 64
2936 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
2937#else
2938# error "R0_ARCH_BITS is bogus"
2939#endif
2940}
2941
2942
2943/**
2944 * Atomically Exchange a ring-3 pointer value, ordered.
2945 *
2946 * @returns Current *ppv value
2947 * @param ppvR3 Pointer to the pointer variable to update.
2948 * @param pvR3 The pointer value to assign to *ppv.
2949 */
2950DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
2951{
2952#if R3_ARCH_BITS == 32
2953 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
2954#elif R3_ARCH_BITS == 64
2955 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
2956#else
2957# error "R3_ARCH_BITS is bogus"
2958#endif
2959}
2960
2961
2962/** @def ASMAtomicXchgHandle
2963 * Atomically Exchange a typical IPRT handle value, ordered.
2964 *
2965 * @param ph Pointer to the value to update.
2966 * @param hNew The new value to assigned to *pu.
2967 * @param phRes Where to store the current *ph value.
2968 *
2969 * @remarks This doesn't currently work for all handles (like RTFILE).
2970 */
2971#define ASMAtomicXchgHandle(ph, hNew, phRes) \
2972 do { \
2973 *(void **)(phRes) = ASMAtomicXchgPtr((void * volatile *)(ph), (const void *)(hNew)); \
2974 AssertCompile(sizeof(*ph) == sizeof(void *)); \
2975 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
2976 } while (0)
2977
2978
2979/**
2980 * Atomically Exchange a value which size might differ
2981 * between platforms or compilers, ordered.
2982 *
2983 * @param pu Pointer to the variable to update.
2984 * @param uNew The value to assign to *pu.
2985 * @todo This is busted as its missing the result argument.
2986 */
2987#define ASMAtomicXchgSize(pu, uNew) \
2988 do { \
2989 switch (sizeof(*(pu))) { \
2990 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2991 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2992 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2993 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2994 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2995 } \
2996 } while (0)
2997
2998/**
2999 * Atomically Exchange a value which size might differ
3000 * between platforms or compilers, ordered.
3001 *
3002 * @param pu Pointer to the variable to update.
3003 * @param uNew The value to assign to *pu.
3004 * @param puRes Where to store the current *pu value.
3005 */
3006#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
3007 do { \
3008 switch (sizeof(*(pu))) { \
3009 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
3010 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3011 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3012 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3013 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3014 } \
3015 } while (0)
3016
3017
3018/**
3019 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
3020 *
3021 * @returns true if xchg was done.
3022 * @returns false if xchg wasn't done.
3023 *
3024 * @param pu32 Pointer to the value to update.
3025 * @param u32New The new value to assigned to *pu32.
3026 * @param u32Old The old value to *pu32 compare with.
3027 */
3028#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3029DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
3030#else
3031DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
3032{
3033# if RT_INLINE_ASM_GNU_STYLE
3034 uint8_t u8Ret;
3035 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3036 "setz %1\n\t"
3037 : "=m" (*pu32),
3038 "=qm" (u8Ret),
3039 "=a" (u32Old)
3040 : "r" (u32New),
3041 "2" (u32Old),
3042 "m" (*pu32));
3043 return (bool)u8Ret;
3044
3045# elif RT_INLINE_ASM_USES_INTRIN
3046 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
3047
3048# else
3049 uint32_t u32Ret;
3050 __asm
3051 {
3052# ifdef RT_ARCH_AMD64
3053 mov rdx, [pu32]
3054# else
3055 mov edx, [pu32]
3056# endif
3057 mov eax, [u32Old]
3058 mov ecx, [u32New]
3059# ifdef RT_ARCH_AMD64
3060 lock cmpxchg [rdx], ecx
3061# else
3062 lock cmpxchg [edx], ecx
3063# endif
3064 setz al
3065 movzx eax, al
3066 mov [u32Ret], eax
3067 }
3068 return !!u32Ret;
3069# endif
3070}
3071#endif
3072
3073
3074/**
3075 * Atomically Compare and Exchange a signed 32-bit value, ordered.
3076 *
3077 * @returns true if xchg was done.
3078 * @returns false if xchg wasn't done.
3079 *
3080 * @param pi32 Pointer to the value to update.
3081 * @param i32New The new value to assigned to *pi32.
3082 * @param i32Old The old value to *pi32 compare with.
3083 */
3084DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
3085{
3086 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
3087}
3088
3089
3090/**
3091 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
3092 *
3093 * @returns true if xchg was done.
3094 * @returns false if xchg wasn't done.
3095 *
3096 * @param pu64 Pointer to the 64-bit variable to update.
3097 * @param u64New The 64-bit value to assign to *pu64.
3098 * @param u64Old The value to compare with.
3099 */
3100#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
3101 || (!defined(RT_ARCH_AMD64) && RT_INLINE_ASM_GCC_4_3_3_X86 && defined(IN_RING3) && defined(__PIC__))
3102DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
3103#else
3104DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, uint64_t u64Old)
3105{
3106# if RT_INLINE_ASM_USES_INTRIN
3107 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
3108
3109# elif defined(RT_ARCH_AMD64)
3110# if RT_INLINE_ASM_GNU_STYLE
3111 uint8_t u8Ret;
3112 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3113 "setz %1\n\t"
3114 : "=m" (*pu64),
3115 "=qm" (u8Ret),
3116 "=a" (u64Old)
3117 : "r" (u64New),
3118 "2" (u64Old),
3119 "m" (*pu64));
3120 return (bool)u8Ret;
3121# else
3122 bool fRet;
3123 __asm
3124 {
3125 mov rdx, [pu32]
3126 mov rax, [u64Old]
3127 mov rcx, [u64New]
3128 lock cmpxchg [rdx], rcx
3129 setz al
3130 mov [fRet], al
3131 }
3132 return fRet;
3133# endif
3134# else /* !RT_ARCH_AMD64 */
3135 uint32_t u32Ret;
3136# if RT_INLINE_ASM_GNU_STYLE
3137# if defined(PIC) || defined(__PIC__)
3138 uint32_t u32EBX = (uint32_t)u64New;
3139 uint32_t u32Spill;
3140 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
3141 "lock; cmpxchg8b (%6)\n\t"
3142 "setz %%al\n\t"
3143 "movl %4, %%ebx\n\t"
3144 "movzbl %%al, %%eax\n\t"
3145 : "=a" (u32Ret),
3146 "=d" (u32Spill),
3147# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
3148 "+m" (*pu64)
3149# else
3150 "=m" (*pu64)
3151# endif
3152 : "A" (u64Old),
3153 "m" ( u32EBX ),
3154 "c" ( (uint32_t)(u64New >> 32) ),
3155 "S" (pu64));
3156# else /* !PIC */
3157 uint32_t u32Spill;
3158 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
3159 "setz %%al\n\t"
3160 "movzbl %%al, %%eax\n\t"
3161 : "=a" (u32Ret),
3162 "=d" (u32Spill),
3163 "+m" (*pu64)
3164 : "A" (u64Old),
3165 "b" ( (uint32_t)u64New ),
3166 "c" ( (uint32_t)(u64New >> 32) ));
3167# endif
3168 return (bool)u32Ret;
3169# else
3170 __asm
3171 {
3172 mov ebx, dword ptr [u64New]
3173 mov ecx, dword ptr [u64New + 4]
3174 mov edi, [pu64]
3175 mov eax, dword ptr [u64Old]
3176 mov edx, dword ptr [u64Old + 4]
3177 lock cmpxchg8b [edi]
3178 setz al
3179 movzx eax, al
3180 mov dword ptr [u32Ret], eax
3181 }
3182 return !!u32Ret;
3183# endif
3184# endif /* !RT_ARCH_AMD64 */
3185}
3186#endif
3187
3188
3189/**
3190 * Atomically Compare and exchange a signed 64-bit value, ordered.
3191 *
3192 * @returns true if xchg was done.
3193 * @returns false if xchg wasn't done.
3194 *
3195 * @param pi64 Pointer to the 64-bit variable to update.
3196 * @param i64 The 64-bit value to assign to *pu64.
3197 * @param i64Old The value to compare with.
3198 */
3199DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
3200{
3201 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
3202}
3203
3204
3205/**
3206 * Atomically Compare and Exchange a pointer value, ordered.
3207 *
3208 * @returns true if xchg was done.
3209 * @returns false if xchg wasn't done.
3210 *
3211 * @param ppv Pointer to the value to update.
3212 * @param pvNew The new value to assigned to *ppv.
3213 * @param pvOld The old value to *ppv compare with.
3214 */
3215DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, const void *pvNew, const void *pvOld)
3216{
3217#if ARCH_BITS == 32
3218 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
3219#elif ARCH_BITS == 64
3220 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
3221#else
3222# error "ARCH_BITS is bogus"
3223#endif
3224}
3225
3226
3227/** @def ASMAtomicCmpXchgHandle
3228 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3229 *
3230 * @param ph Pointer to the value to update.
3231 * @param hNew The new value to assigned to *pu.
3232 * @param hOld The old value to *pu compare with.
3233 * @param fRc Where to store the result.
3234 *
3235 * @remarks This doesn't currently work for all handles (like RTFILE).
3236 */
3237#define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
3238 do { \
3239 (fRc) = ASMAtomicCmpXchgPtr((void * volatile *)(ph), (void *)(hNew), (void *)(hOld)); \
3240 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3241 } while (0)
3242
3243
3244/** @def ASMAtomicCmpXchgSize
3245 * Atomically Compare and Exchange a value which size might differ
3246 * between platforms or compilers, ordered.
3247 *
3248 * @param pu Pointer to the value to update.
3249 * @param uNew The new value to assigned to *pu.
3250 * @param uOld The old value to *pu compare with.
3251 * @param fRc Where to store the result.
3252 */
3253#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
3254 do { \
3255 switch (sizeof(*(pu))) { \
3256 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
3257 break; \
3258 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
3259 break; \
3260 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3261 (fRc) = false; \
3262 break; \
3263 } \
3264 } while (0)
3265
3266
3267/**
3268 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
3269 * passes back old value, ordered.
3270 *
3271 * @returns true if xchg was done.
3272 * @returns false if xchg wasn't done.
3273 *
3274 * @param pu32 Pointer to the value to update.
3275 * @param u32New The new value to assigned to *pu32.
3276 * @param u32Old The old value to *pu32 compare with.
3277 * @param pu32Old Pointer store the old value at.
3278 */
3279#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3280DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
3281#else
3282DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
3283{
3284# if RT_INLINE_ASM_GNU_STYLE
3285 uint8_t u8Ret;
3286 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3287 "setz %1\n\t"
3288 : "=m" (*pu32),
3289 "=qm" (u8Ret),
3290 "=a" (*pu32Old)
3291 : "r" (u32New),
3292 "a" (u32Old),
3293 "m" (*pu32));
3294 return (bool)u8Ret;
3295
3296# elif RT_INLINE_ASM_USES_INTRIN
3297 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
3298
3299# else
3300 uint32_t u32Ret;
3301 __asm
3302 {
3303# ifdef RT_ARCH_AMD64
3304 mov rdx, [pu32]
3305# else
3306 mov edx, [pu32]
3307# endif
3308 mov eax, [u32Old]
3309 mov ecx, [u32New]
3310# ifdef RT_ARCH_AMD64
3311 lock cmpxchg [rdx], ecx
3312 mov rdx, [pu32Old]
3313 mov [rdx], eax
3314# else
3315 lock cmpxchg [edx], ecx
3316 mov edx, [pu32Old]
3317 mov [edx], eax
3318# endif
3319 setz al
3320 movzx eax, al
3321 mov [u32Ret], eax
3322 }
3323 return !!u32Ret;
3324# endif
3325}
3326#endif
3327
3328
3329/**
3330 * Atomically Compare and Exchange a signed 32-bit value, additionally
3331 * passes back old value, ordered.
3332 *
3333 * @returns true if xchg was done.
3334 * @returns false if xchg wasn't done.
3335 *
3336 * @param pi32 Pointer to the value to update.
3337 * @param i32New The new value to assigned to *pi32.
3338 * @param i32Old The old value to *pi32 compare with.
3339 * @param pi32Old Pointer store the old value at.
3340 */
3341DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
3342{
3343 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
3344}
3345
3346
3347/**
3348 * Atomically Compare and exchange an unsigned 64-bit value, additionally
3349 * passing back old value, ordered.
3350 *
3351 * @returns true if xchg was done.
3352 * @returns false if xchg wasn't done.
3353 *
3354 * @param pu64 Pointer to the 64-bit variable to update.
3355 * @param u64New The 64-bit value to assign to *pu64.
3356 * @param u64Old The value to compare with.
3357 * @param pu64Old Pointer store the old value at.
3358 */
3359#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3360DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
3361#else
3362DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
3363{
3364# if RT_INLINE_ASM_USES_INTRIN
3365 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
3366
3367# elif defined(RT_ARCH_AMD64)
3368# if RT_INLINE_ASM_GNU_STYLE
3369 uint8_t u8Ret;
3370 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3371 "setz %1\n\t"
3372 : "=m" (*pu64),
3373 "=qm" (u8Ret),
3374 "=a" (*pu64Old)
3375 : "r" (u64New),
3376 "a" (u64Old),
3377 "m" (*pu64));
3378 return (bool)u8Ret;
3379# else
3380 bool fRet;
3381 __asm
3382 {
3383 mov rdx, [pu32]
3384 mov rax, [u64Old]
3385 mov rcx, [u64New]
3386 lock cmpxchg [rdx], rcx
3387 mov rdx, [pu64Old]
3388 mov [rdx], rax
3389 setz al
3390 mov [fRet], al
3391 }
3392 return fRet;
3393# endif
3394# else /* !RT_ARCH_AMD64 */
3395# if RT_INLINE_ASM_GNU_STYLE
3396 uint64_t u64Ret;
3397# if defined(PIC) || defined(__PIC__)
3398 /* NB: this code uses a memory clobber description, because the clean
3399 * solution with an output value for *pu64 makes gcc run out of registers.
3400 * This will cause suboptimal code, and anyone with a better solution is
3401 * welcome to improve this. */
3402 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
3403 "lock; cmpxchg8b %3\n\t"
3404 "xchgl %%ebx, %1\n\t"
3405 : "=A" (u64Ret)
3406 : "DS" ((uint32_t)u64New),
3407 "c" ((uint32_t)(u64New >> 32)),
3408 "m" (*pu64),
3409 "0" (u64Old)
3410 : "memory" );
3411# else /* !PIC */
3412 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
3413 : "=A" (u64Ret),
3414 "=m" (*pu64)
3415 : "b" ((uint32_t)u64New),
3416 "c" ((uint32_t)(u64New >> 32)),
3417 "m" (*pu64),
3418 "0" (u64Old));
3419# endif
3420 *pu64Old = u64Ret;
3421 return u64Ret == u64Old;
3422# else
3423 uint32_t u32Ret;
3424 __asm
3425 {
3426 mov ebx, dword ptr [u64New]
3427 mov ecx, dword ptr [u64New + 4]
3428 mov edi, [pu64]
3429 mov eax, dword ptr [u64Old]
3430 mov edx, dword ptr [u64Old + 4]
3431 lock cmpxchg8b [edi]
3432 mov ebx, [pu64Old]
3433 mov [ebx], eax
3434 setz al
3435 movzx eax, al
3436 add ebx, 4
3437 mov [ebx], edx
3438 mov dword ptr [u32Ret], eax
3439 }
3440 return !!u32Ret;
3441# endif
3442# endif /* !RT_ARCH_AMD64 */
3443}
3444#endif
3445
3446
3447/**
3448 * Atomically Compare and exchange a signed 64-bit value, additionally
3449 * passing back old value, ordered.
3450 *
3451 * @returns true if xchg was done.
3452 * @returns false if xchg wasn't done.
3453 *
3454 * @param pi64 Pointer to the 64-bit variable to update.
3455 * @param i64 The 64-bit value to assign to *pu64.
3456 * @param i64Old The value to compare with.
3457 * @param pi64Old Pointer store the old value at.
3458 */
3459DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
3460{
3461 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
3462}
3463
3464/** @def ASMAtomicCmpXchgExHandle
3465 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3466 *
3467 * @param ph Pointer to the value to update.
3468 * @param hNew The new value to assigned to *pu.
3469 * @param hOld The old value to *pu compare with.
3470 * @param fRc Where to store the result.
3471 * @param phOldVal Pointer to where to store the old value.
3472 *
3473 * @remarks This doesn't currently work for all handles (like RTFILE).
3474 */
3475#if ARCH_BITS == 32
3476# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3477 do { \
3478 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
3479 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3480 AssertCompile(sizeof(*phOldVal) == sizeof(void *)); \
3481 } while (0)
3482#elif ARCH_BITS == 64
3483# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3484 do { \
3485 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
3486 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3487 AssertCompile(sizeof(*phOldVal) == sizeof(void *)); \
3488 } while (0)
3489#endif
3490
3491
3492/** @def ASMAtomicCmpXchgExSize
3493 * Atomically Compare and Exchange a value which size might differ
3494 * between platforms or compilers. Additionally passes back old value.
3495 *
3496 * @param pu Pointer to the value to update.
3497 * @param uNew The new value to assigned to *pu.
3498 * @param uOld The old value to *pu compare with.
3499 * @param fRc Where to store the result.
3500 * @param puOldVal Pointer to where to store the old value.
3501 */
3502#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
3503 do { \
3504 switch (sizeof(*(pu))) { \
3505 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
3506 break; \
3507 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
3508 break; \
3509 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3510 (fRc) = false; \
3511 (uOldVal) = 0; \
3512 break; \
3513 } \
3514 } while (0)
3515
3516
3517/**
3518 * Atomically Compare and Exchange a pointer value, additionally
3519 * passing back old value, ordered.
3520 *
3521 * @returns true if xchg was done.
3522 * @returns false if xchg wasn't done.
3523 *
3524 * @param ppv Pointer to the value to update.
3525 * @param pvNew The new value to assigned to *ppv.
3526 * @param pvOld The old value to *ppv compare with.
3527 * @param ppvOld Pointer store the old value at.
3528 */
3529DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
3530{
3531#if ARCH_BITS == 32
3532 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
3533#elif ARCH_BITS == 64
3534 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
3535#else
3536# error "ARCH_BITS is bogus"
3537#endif
3538}
3539
3540
3541/**
3542 * Atomically exchanges and adds to a 32-bit value, ordered.
3543 *
3544 * @returns The old value.
3545 * @param pu32 Pointer to the value.
3546 * @param u32 Number to add.
3547 */
3548#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3549DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
3550#else
3551DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
3552{
3553# if RT_INLINE_ASM_USES_INTRIN
3554 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
3555 return u32;
3556
3557# elif RT_INLINE_ASM_GNU_STYLE
3558 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3559 : "=r" (u32),
3560 "=m" (*pu32)
3561 : "0" (u32),
3562 "m" (*pu32)
3563 : "memory");
3564 return u32;
3565# else
3566 __asm
3567 {
3568 mov eax, [u32]
3569# ifdef RT_ARCH_AMD64
3570 mov rdx, [pu32]
3571 lock xadd [rdx], eax
3572# else
3573 mov edx, [pu32]
3574 lock xadd [edx], eax
3575# endif
3576 mov [u32], eax
3577 }
3578 return u32;
3579# endif
3580}
3581#endif
3582
3583
3584/**
3585 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3586 *
3587 * @returns The old value.
3588 * @param pi32 Pointer to the value.
3589 * @param i32 Number to add.
3590 */
3591DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
3592{
3593 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
3594}
3595
3596
3597/**
3598 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
3599 *
3600 * @returns The old value.
3601 * @param pu32 Pointer to the value.
3602 * @param u32 Number to subtract.
3603 */
3604DECLINLINE(uint32_t) ASMAtomicSubU32(int32_t volatile *pi32, uint32_t u32)
3605{
3606 return ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-(int32_t)u32);
3607}
3608
3609
3610/**
3611 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
3612 *
3613 * @returns The old value.
3614 * @param pi32 Pointer to the value.
3615 * @param i32 Number to subtract.
3616 */
3617DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
3618{
3619 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
3620}
3621
3622
3623/**
3624 * Atomically increment a 32-bit value, ordered.
3625 *
3626 * @returns The new value.
3627 * @param pu32 Pointer to the value to increment.
3628 */
3629#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3630DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
3631#else
3632DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
3633{
3634 uint32_t u32;
3635# if RT_INLINE_ASM_USES_INTRIN
3636 u32 = _InterlockedIncrement((long *)pu32);
3637 return u32;
3638
3639# elif RT_INLINE_ASM_GNU_STYLE
3640 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3641 : "=r" (u32),
3642 "=m" (*pu32)
3643 : "0" (1),
3644 "m" (*pu32)
3645 : "memory");
3646 return u32+1;
3647# else
3648 __asm
3649 {
3650 mov eax, 1
3651# ifdef RT_ARCH_AMD64
3652 mov rdx, [pu32]
3653 lock xadd [rdx], eax
3654# else
3655 mov edx, [pu32]
3656 lock xadd [edx], eax
3657# endif
3658 mov u32, eax
3659 }
3660 return u32+1;
3661# endif
3662}
3663#endif
3664
3665
3666/**
3667 * Atomically increment a signed 32-bit value, ordered.
3668 *
3669 * @returns The new value.
3670 * @param pi32 Pointer to the value to increment.
3671 */
3672DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
3673{
3674 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
3675}
3676
3677
3678/**
3679 * Atomically decrement an unsigned 32-bit value, ordered.
3680 *
3681 * @returns The new value.
3682 * @param pu32 Pointer to the value to decrement.
3683 */
3684#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3685DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3686#else
3687DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3688{
3689 uint32_t u32;
3690# if RT_INLINE_ASM_USES_INTRIN
3691 u32 = _InterlockedDecrement((long *)pu32);
3692 return u32;
3693
3694# elif RT_INLINE_ASM_GNU_STYLE
3695 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3696 : "=r" (u32),
3697 "=m" (*pu32)
3698 : "0" (-1),
3699 "m" (*pu32)
3700 : "memory");
3701 return u32-1;
3702# else
3703 __asm
3704 {
3705 mov eax, -1
3706# ifdef RT_ARCH_AMD64
3707 mov rdx, [pu32]
3708 lock xadd [rdx], eax
3709# else
3710 mov edx, [pu32]
3711 lock xadd [edx], eax
3712# endif
3713 mov u32, eax
3714 }
3715 return u32-1;
3716# endif
3717}
3718#endif
3719
3720
3721/**
3722 * Atomically decrement a signed 32-bit value, ordered.
3723 *
3724 * @returns The new value.
3725 * @param pi32 Pointer to the value to decrement.
3726 */
3727DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3728{
3729 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3730}
3731
3732
3733/**
3734 * Atomically Or an unsigned 32-bit value, ordered.
3735 *
3736 * @param pu32 Pointer to the pointer variable to OR u32 with.
3737 * @param u32 The value to OR *pu32 with.
3738 */
3739#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3740DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3741#else
3742DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3743{
3744# if RT_INLINE_ASM_USES_INTRIN
3745 _InterlockedOr((long volatile *)pu32, (long)u32);
3746
3747# elif RT_INLINE_ASM_GNU_STYLE
3748 __asm__ __volatile__("lock; orl %1, %0\n\t"
3749 : "=m" (*pu32)
3750 : "ir" (u32),
3751 "m" (*pu32));
3752# else
3753 __asm
3754 {
3755 mov eax, [u32]
3756# ifdef RT_ARCH_AMD64
3757 mov rdx, [pu32]
3758 lock or [rdx], eax
3759# else
3760 mov edx, [pu32]
3761 lock or [edx], eax
3762# endif
3763 }
3764# endif
3765}
3766#endif
3767
3768
3769/**
3770 * Atomically Or a signed 32-bit value, ordered.
3771 *
3772 * @param pi32 Pointer to the pointer variable to OR u32 with.
3773 * @param i32 The value to OR *pu32 with.
3774 */
3775DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3776{
3777 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3778}
3779
3780
3781/**
3782 * Atomically And an unsigned 32-bit value, ordered.
3783 *
3784 * @param pu32 Pointer to the pointer variable to AND u32 with.
3785 * @param u32 The value to AND *pu32 with.
3786 */
3787#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3788DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3789#else
3790DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3791{
3792# if RT_INLINE_ASM_USES_INTRIN
3793 _InterlockedAnd((long volatile *)pu32, u32);
3794
3795# elif RT_INLINE_ASM_GNU_STYLE
3796 __asm__ __volatile__("lock; andl %1, %0\n\t"
3797 : "=m" (*pu32)
3798 : "ir" (u32),
3799 "m" (*pu32));
3800# else
3801 __asm
3802 {
3803 mov eax, [u32]
3804# ifdef RT_ARCH_AMD64
3805 mov rdx, [pu32]
3806 lock and [rdx], eax
3807# else
3808 mov edx, [pu32]
3809 lock and [edx], eax
3810# endif
3811 }
3812# endif
3813}
3814#endif
3815
3816
3817/**
3818 * Atomically And a signed 32-bit value, ordered.
3819 *
3820 * @param pi32 Pointer to the pointer variable to AND i32 with.
3821 * @param i32 The value to AND *pi32 with.
3822 */
3823DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3824{
3825 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3826}
3827
3828
3829/**
3830 * Memory fence, waits for any pending writes and reads to complete.
3831 */
3832DECLINLINE(void) ASMMemoryFence(void)
3833{
3834 /** @todo use mfence? check if all cpus we care for support it. */
3835 uint32_t volatile u32;
3836 ASMAtomicXchgU32(&u32, 0);
3837}
3838
3839
3840/**
3841 * Write fence, waits for any pending writes to complete.
3842 */
3843DECLINLINE(void) ASMWriteFence(void)
3844{
3845 /** @todo use sfence? check if all cpus we care for support it. */
3846 ASMMemoryFence();
3847}
3848
3849
3850/**
3851 * Read fence, waits for any pending reads to complete.
3852 */
3853DECLINLINE(void) ASMReadFence(void)
3854{
3855 /** @todo use lfence? check if all cpus we care for support it. */
3856 ASMMemoryFence();
3857}
3858
3859
3860/**
3861 * Atomically reads an unsigned 8-bit value, ordered.
3862 *
3863 * @returns Current *pu8 value
3864 * @param pu8 Pointer to the 8-bit variable to read.
3865 */
3866DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
3867{
3868 ASMMemoryFence();
3869 return *pu8; /* byte reads are atomic on x86 */
3870}
3871
3872
3873/**
3874 * Atomically reads an unsigned 8-bit value, unordered.
3875 *
3876 * @returns Current *pu8 value
3877 * @param pu8 Pointer to the 8-bit variable to read.
3878 */
3879DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
3880{
3881 return *pu8; /* byte reads are atomic on x86 */
3882}
3883
3884
3885/**
3886 * Atomically reads a signed 8-bit value, ordered.
3887 *
3888 * @returns Current *pi8 value
3889 * @param pi8 Pointer to the 8-bit variable to read.
3890 */
3891DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
3892{
3893 ASMMemoryFence();
3894 return *pi8; /* byte reads are atomic on x86 */
3895}
3896
3897
3898/**
3899 * Atomically reads a signed 8-bit value, unordered.
3900 *
3901 * @returns Current *pi8 value
3902 * @param pi8 Pointer to the 8-bit variable to read.
3903 */
3904DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
3905{
3906 return *pi8; /* byte reads are atomic on x86 */
3907}
3908
3909
3910/**
3911 * Atomically reads an unsigned 16-bit value, ordered.
3912 *
3913 * @returns Current *pu16 value
3914 * @param pu16 Pointer to the 16-bit variable to read.
3915 */
3916DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
3917{
3918 ASMMemoryFence();
3919 Assert(!((uintptr_t)pu16 & 1));
3920 return *pu16;
3921}
3922
3923
3924/**
3925 * Atomically reads an unsigned 16-bit value, unordered.
3926 *
3927 * @returns Current *pu16 value
3928 * @param pu16 Pointer to the 16-bit variable to read.
3929 */
3930DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
3931{
3932 Assert(!((uintptr_t)pu16 & 1));
3933 return *pu16;
3934}
3935
3936
3937/**
3938 * Atomically reads a signed 16-bit value, ordered.
3939 *
3940 * @returns Current *pi16 value
3941 * @param pi16 Pointer to the 16-bit variable to read.
3942 */
3943DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
3944{
3945 ASMMemoryFence();
3946 Assert(!((uintptr_t)pi16 & 1));
3947 return *pi16;
3948}
3949
3950
3951/**
3952 * Atomically reads a signed 16-bit value, unordered.
3953 *
3954 * @returns Current *pi16 value
3955 * @param pi16 Pointer to the 16-bit variable to read.
3956 */
3957DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
3958{
3959 Assert(!((uintptr_t)pi16 & 1));
3960 return *pi16;
3961}
3962
3963
3964/**
3965 * Atomically reads an unsigned 32-bit value, ordered.
3966 *
3967 * @returns Current *pu32 value
3968 * @param pu32 Pointer to the 32-bit variable to read.
3969 */
3970DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
3971{
3972 ASMMemoryFence();
3973 Assert(!((uintptr_t)pu32 & 3));
3974 return *pu32;
3975}
3976
3977
3978/**
3979 * Atomically reads an unsigned 32-bit value, unordered.
3980 *
3981 * @returns Current *pu32 value
3982 * @param pu32 Pointer to the 32-bit variable to read.
3983 */
3984DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
3985{
3986 Assert(!((uintptr_t)pu32 & 3));
3987 return *pu32;
3988}
3989
3990
3991/**
3992 * Atomically reads a signed 32-bit value, ordered.
3993 *
3994 * @returns Current *pi32 value
3995 * @param pi32 Pointer to the 32-bit variable to read.
3996 */
3997DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
3998{
3999 ASMMemoryFence();
4000 Assert(!((uintptr_t)pi32 & 3));
4001 return *pi32;
4002}
4003
4004
4005/**
4006 * Atomically reads a signed 32-bit value, unordered.
4007 *
4008 * @returns Current *pi32 value
4009 * @param pi32 Pointer to the 32-bit variable to read.
4010 */
4011DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
4012{
4013 Assert(!((uintptr_t)pi32 & 3));
4014 return *pi32;
4015}
4016
4017
4018/**
4019 * Atomically reads an unsigned 64-bit value, ordered.
4020 *
4021 * @returns Current *pu64 value
4022 * @param pu64 Pointer to the 64-bit variable to read.
4023 * The memory pointed to must be writable.
4024 * @remark This will fault if the memory is read-only!
4025 */
4026#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
4027 || (!defined(RT_ARCH_AMD64) && RT_INLINE_ASM_GCC_4_3_3_X86 && defined(IN_RING3) && defined(__PIC__))
4028DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
4029#else
4030DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
4031{
4032 uint64_t u64;
4033# ifdef RT_ARCH_AMD64
4034 Assert(!((uintptr_t)pu64 & 7));
4035/*# if RT_INLINE_ASM_GNU_STYLE
4036 __asm__ __volatile__( "mfence\n\t"
4037 "movq %1, %0\n\t"
4038 : "=r" (u64)
4039 : "m" (*pu64));
4040# else
4041 __asm
4042 {
4043 mfence
4044 mov rdx, [pu64]
4045 mov rax, [rdx]
4046 mov [u64], rax
4047 }
4048# endif*/
4049 ASMMemoryFence();
4050 u64 = *pu64;
4051# else /* !RT_ARCH_AMD64 */
4052# if RT_INLINE_ASM_GNU_STYLE
4053# if defined(PIC) || defined(__PIC__)
4054 uint32_t u32EBX = 0;
4055 Assert(!((uintptr_t)pu64 & 7));
4056 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
4057 "lock; cmpxchg8b (%5)\n\t"
4058 "movl %3, %%ebx\n\t"
4059 : "=A" (u64),
4060# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4061 "+m" (*pu64)
4062# else
4063 "=m" (*pu64)
4064# endif
4065 : "0" (0),
4066 "m" (u32EBX),
4067 "c" (0),
4068 "S" (pu64));
4069# else /* !PIC */
4070 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
4071 : "=A" (u64),
4072 "+m" (*pu64)
4073 : "0" (0),
4074 "b" (0),
4075 "c" (0));
4076# endif
4077# else
4078 Assert(!((uintptr_t)pu64 & 7));
4079 __asm
4080 {
4081 xor eax, eax
4082 xor edx, edx
4083 mov edi, pu64
4084 xor ecx, ecx
4085 xor ebx, ebx
4086 lock cmpxchg8b [edi]
4087 mov dword ptr [u64], eax
4088 mov dword ptr [u64 + 4], edx
4089 }
4090# endif
4091# endif /* !RT_ARCH_AMD64 */
4092 return u64;
4093}
4094#endif
4095
4096
4097/**
4098 * Atomically reads an unsigned 64-bit value, unordered.
4099 *
4100 * @returns Current *pu64 value
4101 * @param pu64 Pointer to the 64-bit variable to read.
4102 * The memory pointed to must be writable.
4103 * @remark This will fault if the memory is read-only!
4104 */
4105#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4106DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
4107#else
4108DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
4109{
4110 uint64_t u64;
4111# ifdef RT_ARCH_AMD64
4112 Assert(!((uintptr_t)pu64 & 7));
4113/*# if RT_INLINE_ASM_GNU_STYLE
4114 Assert(!((uintptr_t)pu64 & 7));
4115 __asm__ __volatile__("movq %1, %0\n\t"
4116 : "=r" (u64)
4117 : "m" (*pu64));
4118# else
4119 __asm
4120 {
4121 mov rdx, [pu64]
4122 mov rax, [rdx]
4123 mov [u64], rax
4124 }
4125# endif */
4126 u64 = *pu64;
4127# else /* !RT_ARCH_AMD64 */
4128# if RT_INLINE_ASM_GNU_STYLE
4129# if defined(PIC) || defined(__PIC__)
4130 uint32_t u32EBX = 0;
4131 uint32_t u32Spill;
4132 Assert(!((uintptr_t)pu64 & 7));
4133 __asm__ __volatile__("xor %%eax,%%eax\n\t"
4134 "xor %%ecx,%%ecx\n\t"
4135 "xor %%edx,%%edx\n\t"
4136 "xchgl %%ebx, %3\n\t"
4137 "lock; cmpxchg8b (%4)\n\t"
4138 "movl %3, %%ebx\n\t"
4139 : "=A" (u64),
4140# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4141 "+m" (*pu64),
4142# else
4143 "=m" (*pu64),
4144# endif
4145 "=c" (u32Spill)
4146 : "m" (u32EBX),
4147 "S" (pu64));
4148# else /* !PIC */
4149 __asm__ __volatile__("cmpxchg8b %1\n\t"
4150 : "=A" (u64),
4151 "+m" (*pu64)
4152 : "0" (0),
4153 "b" (0),
4154 "c" (0));
4155# endif
4156# else
4157 Assert(!((uintptr_t)pu64 & 7));
4158 __asm
4159 {
4160 xor eax, eax
4161 xor edx, edx
4162 mov edi, pu64
4163 xor ecx, ecx
4164 xor ebx, ebx
4165 lock cmpxchg8b [edi]
4166 mov dword ptr [u64], eax
4167 mov dword ptr [u64 + 4], edx
4168 }
4169# endif
4170# endif /* !RT_ARCH_AMD64 */
4171 return u64;
4172}
4173#endif
4174
4175
4176/**
4177 * Atomically reads a signed 64-bit value, ordered.
4178 *
4179 * @returns Current *pi64 value
4180 * @param pi64 Pointer to the 64-bit variable to read.
4181 * The memory pointed to must be writable.
4182 * @remark This will fault if the memory is read-only!
4183 */
4184DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
4185{
4186 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
4187}
4188
4189
4190/**
4191 * Atomically reads a signed 64-bit value, unordered.
4192 *
4193 * @returns Current *pi64 value
4194 * @param pi64 Pointer to the 64-bit variable to read.
4195 * The memory pointed to must be writable.
4196 * @remark This will fault if the memory is read-only!
4197 */
4198DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
4199{
4200 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
4201}
4202
4203
4204/**
4205 * Atomically reads a pointer value, ordered.
4206 *
4207 * @returns Current *pv value
4208 * @param ppv Pointer to the pointer variable to read.
4209 */
4210DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
4211{
4212#if ARCH_BITS == 32
4213 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
4214#elif ARCH_BITS == 64
4215 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
4216#else
4217# error "ARCH_BITS is bogus"
4218#endif
4219}
4220
4221
4222/**
4223 * Atomically reads a pointer value, unordered.
4224 *
4225 * @returns Current *pv value
4226 * @param ppv Pointer to the pointer variable to read.
4227 */
4228DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
4229{
4230#if ARCH_BITS == 32
4231 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
4232#elif ARCH_BITS == 64
4233 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
4234#else
4235# error "ARCH_BITS is bogus"
4236#endif
4237}
4238
4239
4240/**
4241 * Atomically reads a boolean value, ordered.
4242 *
4243 * @returns Current *pf value
4244 * @param pf Pointer to the boolean variable to read.
4245 */
4246DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
4247{
4248 ASMMemoryFence();
4249 return *pf; /* byte reads are atomic on x86 */
4250}
4251
4252
4253/**
4254 * Atomically reads a boolean value, unordered.
4255 *
4256 * @returns Current *pf value
4257 * @param pf Pointer to the boolean variable to read.
4258 */
4259DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
4260{
4261 return *pf; /* byte reads are atomic on x86 */
4262}
4263
4264
4265/**
4266 * Atomically read a typical IPRT handle value, ordered.
4267 *
4268 * @param ph Pointer to the handle variable to read.
4269 * @param phRes Where to store the result.
4270 *
4271 * @remarks This doesn't currently work for all handles (like RTFILE).
4272 */
4273#define ASMAtomicReadHandle(ph, phRes) \
4274 do { \
4275 *(void **)(phRes) = ASMAtomicReadPtr((void * volatile *)(ph)); \
4276 AssertCompile(sizeof(*ph) == sizeof(void *)); \
4277 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
4278 } while (0)
4279
4280
4281/**
4282 * Atomically read a typical IPRT handle value, unordered.
4283 *
4284 * @param ph Pointer to the handle variable to read.
4285 * @param phRes Where to store the result.
4286 *
4287 * @remarks This doesn't currently work for all handles (like RTFILE).
4288 */
4289#define ASMAtomicUoReadHandle(ph, phRes) \
4290 do { \
4291 *(void **)(phRes) = ASMAtomicUoReadPtr((void * volatile *)(ph)); \
4292 AssertCompile(sizeof(*ph) == sizeof(void *)); \
4293 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
4294 } while (0)
4295
4296
4297/**
4298 * Atomically read a value which size might differ
4299 * between platforms or compilers, ordered.
4300 *
4301 * @param pu Pointer to the variable to update.
4302 * @param puRes Where to store the result.
4303 */
4304#define ASMAtomicReadSize(pu, puRes) \
4305 do { \
4306 switch (sizeof(*(pu))) { \
4307 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4308 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
4309 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
4310 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
4311 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4312 } \
4313 } while (0)
4314
4315
4316/**
4317 * Atomically read a value which size might differ
4318 * between platforms or compilers, unordered.
4319 *
4320 * @param pu Pointer to the variable to update.
4321 * @param puRes Where to store the result.
4322 */
4323#define ASMAtomicUoReadSize(pu, puRes) \
4324 do { \
4325 switch (sizeof(*(pu))) { \
4326 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4327 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
4328 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
4329 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
4330 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4331 } \
4332 } while (0)
4333
4334
4335/**
4336 * Atomically writes an unsigned 8-bit value, ordered.
4337 *
4338 * @param pu8 Pointer to the 8-bit variable.
4339 * @param u8 The 8-bit value to assign to *pu8.
4340 */
4341DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
4342{
4343 ASMAtomicXchgU8(pu8, u8);
4344}
4345
4346
4347/**
4348 * Atomically writes an unsigned 8-bit value, unordered.
4349 *
4350 * @param pu8 Pointer to the 8-bit variable.
4351 * @param u8 The 8-bit value to assign to *pu8.
4352 */
4353DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
4354{
4355 *pu8 = u8; /* byte writes are atomic on x86 */
4356}
4357
4358
4359/**
4360 * Atomically writes a signed 8-bit value, ordered.
4361 *
4362 * @param pi8 Pointer to the 8-bit variable to read.
4363 * @param i8 The 8-bit value to assign to *pi8.
4364 */
4365DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
4366{
4367 ASMAtomicXchgS8(pi8, i8);
4368}
4369
4370
4371/**
4372 * Atomically writes a signed 8-bit value, unordered.
4373 *
4374 * @param pi8 Pointer to the 8-bit variable to read.
4375 * @param i8 The 8-bit value to assign to *pi8.
4376 */
4377DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
4378{
4379 *pi8 = i8; /* byte writes are atomic on x86 */
4380}
4381
4382
4383/**
4384 * Atomically writes an unsigned 16-bit value, ordered.
4385 *
4386 * @param pu16 Pointer to the 16-bit variable.
4387 * @param u16 The 16-bit value to assign to *pu16.
4388 */
4389DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
4390{
4391 ASMAtomicXchgU16(pu16, u16);
4392}
4393
4394
4395/**
4396 * Atomically writes an unsigned 16-bit value, unordered.
4397 *
4398 * @param pu16 Pointer to the 16-bit variable.
4399 * @param u16 The 16-bit value to assign to *pu16.
4400 */
4401DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
4402{
4403 Assert(!((uintptr_t)pu16 & 1));
4404 *pu16 = u16;
4405}
4406
4407
4408/**
4409 * Atomically writes a signed 16-bit value, ordered.
4410 *
4411 * @param pi16 Pointer to the 16-bit variable to read.
4412 * @param i16 The 16-bit value to assign to *pi16.
4413 */
4414DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
4415{
4416 ASMAtomicXchgS16(pi16, i16);
4417}
4418
4419
4420/**
4421 * Atomically writes a signed 16-bit value, unordered.
4422 *
4423 * @param pi16 Pointer to the 16-bit variable to read.
4424 * @param i16 The 16-bit value to assign to *pi16.
4425 */
4426DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
4427{
4428 Assert(!((uintptr_t)pi16 & 1));
4429 *pi16 = i16;
4430}
4431
4432
4433/**
4434 * Atomically writes an unsigned 32-bit value, ordered.
4435 *
4436 * @param pu32 Pointer to the 32-bit variable.
4437 * @param u32 The 32-bit value to assign to *pu32.
4438 */
4439DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
4440{
4441 ASMAtomicXchgU32(pu32, u32);
4442}
4443
4444
4445/**
4446 * Atomically writes an unsigned 32-bit value, unordered.
4447 *
4448 * @param pu32 Pointer to the 32-bit variable.
4449 * @param u32 The 32-bit value to assign to *pu32.
4450 */
4451DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
4452{
4453 Assert(!((uintptr_t)pu32 & 3));
4454 *pu32 = u32;
4455}
4456
4457
4458/**
4459 * Atomically writes a signed 32-bit value, ordered.
4460 *
4461 * @param pi32 Pointer to the 32-bit variable to read.
4462 * @param i32 The 32-bit value to assign to *pi32.
4463 */
4464DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
4465{
4466 ASMAtomicXchgS32(pi32, i32);
4467}
4468
4469
4470/**
4471 * Atomically writes a signed 32-bit value, unordered.
4472 *
4473 * @param pi32 Pointer to the 32-bit variable to read.
4474 * @param i32 The 32-bit value to assign to *pi32.
4475 */
4476DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
4477{
4478 Assert(!((uintptr_t)pi32 & 3));
4479 *pi32 = i32;
4480}
4481
4482
4483/**
4484 * Atomically writes an unsigned 64-bit value, ordered.
4485 *
4486 * @param pu64 Pointer to the 64-bit variable.
4487 * @param u64 The 64-bit value to assign to *pu64.
4488 */
4489DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
4490{
4491 ASMAtomicXchgU64(pu64, u64);
4492}
4493
4494
4495/**
4496 * Atomically writes an unsigned 64-bit value, unordered.
4497 *
4498 * @param pu64 Pointer to the 64-bit variable.
4499 * @param u64 The 64-bit value to assign to *pu64.
4500 */
4501DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
4502{
4503 Assert(!((uintptr_t)pu64 & 7));
4504#if ARCH_BITS == 64
4505 *pu64 = u64;
4506#else
4507 ASMAtomicXchgU64(pu64, u64);
4508#endif
4509}
4510
4511
4512/**
4513 * Atomically writes a signed 64-bit value, ordered.
4514 *
4515 * @param pi64 Pointer to the 64-bit variable.
4516 * @param i64 The 64-bit value to assign to *pi64.
4517 */
4518DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
4519{
4520 ASMAtomicXchgS64(pi64, i64);
4521}
4522
4523
4524/**
4525 * Atomically writes a signed 64-bit value, unordered.
4526 *
4527 * @param pi64 Pointer to the 64-bit variable.
4528 * @param i64 The 64-bit value to assign to *pi64.
4529 */
4530DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
4531{
4532 Assert(!((uintptr_t)pi64 & 7));
4533#if ARCH_BITS == 64
4534 *pi64 = i64;
4535#else
4536 ASMAtomicXchgS64(pi64, i64);
4537#endif
4538}
4539
4540
4541/**
4542 * Atomically writes a boolean value, unordered.
4543 *
4544 * @param pf Pointer to the boolean variable.
4545 * @param f The boolean value to assign to *pf.
4546 */
4547DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
4548{
4549 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
4550}
4551
4552
4553/**
4554 * Atomically writes a boolean value, unordered.
4555 *
4556 * @param pf Pointer to the boolean variable.
4557 * @param f The boolean value to assign to *pf.
4558 */
4559DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
4560{
4561 *pf = f; /* byte writes are atomic on x86 */
4562}
4563
4564
4565/**
4566 * Atomically writes a pointer value, ordered.
4567 *
4568 * @returns Current *pv value
4569 * @param ppv Pointer to the pointer variable.
4570 * @param pv The pointer value to assigne to *ppv.
4571 */
4572DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, const void *pv)
4573{
4574#if ARCH_BITS == 32
4575 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4576#elif ARCH_BITS == 64
4577 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4578#else
4579# error "ARCH_BITS is bogus"
4580#endif
4581}
4582
4583
4584/**
4585 * Atomically writes a pointer value, unordered.
4586 *
4587 * @returns Current *pv value
4588 * @param ppv Pointer to the pointer variable.
4589 * @param pv The pointer value to assigne to *ppv.
4590 */
4591DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, const void *pv)
4592{
4593#if ARCH_BITS == 32
4594 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4595#elif ARCH_BITS == 64
4596 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4597#else
4598# error "ARCH_BITS is bogus"
4599#endif
4600}
4601
4602
4603/**
4604 * Atomically write a typical IPRT handle value, ordered.
4605 *
4606 * @param ph Pointer to the variable to update.
4607 * @param hNew The value to assign to *ph.
4608 *
4609 * @remarks This doesn't currently work for all handles (like RTFILE).
4610 */
4611#define ASMAtomicWriteHandle(ph, hNew) \
4612 do { \
4613 ASMAtomicWritePtr((void * volatile *)(ph), (const void *)hNew); \
4614 AssertCompile(sizeof(*ph) == sizeof(void*)); \
4615 } while (0)
4616
4617
4618/**
4619 * Atomically write a typical IPRT handle value, unordered.
4620 *
4621 * @param ph Pointer to the variable to update.
4622 * @param hNew The value to assign to *ph.
4623 *
4624 * @remarks This doesn't currently work for all handles (like RTFILE).
4625 */
4626#define ASMAtomicUoWriteHandle(ph, hNew) \
4627 do { \
4628 ASMAtomicUoWritePtr((void * volatile *)(ph), (const void *)hNew); \
4629 AssertCompile(sizeof(*ph) == sizeof(void*)); \
4630 } while (0)
4631
4632
4633/**
4634 * Atomically write a value which size might differ
4635 * between platforms or compilers, ordered.
4636 *
4637 * @param pu Pointer to the variable to update.
4638 * @param uNew The value to assign to *pu.
4639 */
4640#define ASMAtomicWriteSize(pu, uNew) \
4641 do { \
4642 switch (sizeof(*(pu))) { \
4643 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4644 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4645 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4646 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4647 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4648 } \
4649 } while (0)
4650
4651/**
4652 * Atomically write a value which size might differ
4653 * between platforms or compilers, unordered.
4654 *
4655 * @param pu Pointer to the variable to update.
4656 * @param uNew The value to assign to *pu.
4657 */
4658#define ASMAtomicUoWriteSize(pu, uNew) \
4659 do { \
4660 switch (sizeof(*(pu))) { \
4661 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4662 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4663 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4664 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4665 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4666 } \
4667 } while (0)
4668
4669
4670
4671
4672/**
4673 * Invalidate page.
4674 *
4675 * @param pv Address of the page to invalidate.
4676 */
4677#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4678DECLASM(void) ASMInvalidatePage(void *pv);
4679#else
4680DECLINLINE(void) ASMInvalidatePage(void *pv)
4681{
4682# if RT_INLINE_ASM_USES_INTRIN
4683 __invlpg(pv);
4684
4685# elif RT_INLINE_ASM_GNU_STYLE
4686 __asm__ __volatile__("invlpg %0\n\t"
4687 : : "m" (*(uint8_t *)pv));
4688# else
4689 __asm
4690 {
4691# ifdef RT_ARCH_AMD64
4692 mov rax, [pv]
4693 invlpg [rax]
4694# else
4695 mov eax, [pv]
4696 invlpg [eax]
4697# endif
4698 }
4699# endif
4700}
4701#endif
4702
4703
4704/**
4705 * Write back the internal caches and invalidate them.
4706 */
4707#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4708DECLASM(void) ASMWriteBackAndInvalidateCaches(void);
4709#else
4710DECLINLINE(void) ASMWriteBackAndInvalidateCaches(void)
4711{
4712# if RT_INLINE_ASM_USES_INTRIN
4713 __wbinvd();
4714
4715# elif RT_INLINE_ASM_GNU_STYLE
4716 __asm__ __volatile__("wbinvd");
4717# else
4718 __asm
4719 {
4720 wbinvd
4721 }
4722# endif
4723}
4724#endif
4725
4726
4727/**
4728 * Invalidate internal and (perhaps) external caches without first
4729 * flushing dirty cache lines. Use with extreme care.
4730 */
4731#if RT_INLINE_ASM_EXTERNAL
4732DECLASM(void) ASMInvalidateInternalCaches(void);
4733#else
4734DECLINLINE(void) ASMInvalidateInternalCaches(void)
4735{
4736# if RT_INLINE_ASM_GNU_STYLE
4737 __asm__ __volatile__("invd");
4738# else
4739 __asm
4740 {
4741 invd
4742 }
4743# endif
4744}
4745#endif
4746
4747
4748#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
4749# if PAGE_SIZE != 0x1000
4750# error "PAGE_SIZE is not 0x1000!"
4751# endif
4752#endif
4753
4754/**
4755 * Zeros a 4K memory page.
4756 *
4757 * @param pv Pointer to the memory block. This must be page aligned.
4758 */
4759#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4760DECLASM(void) ASMMemZeroPage(volatile void *pv);
4761# else
4762DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
4763{
4764# if RT_INLINE_ASM_USES_INTRIN
4765# ifdef RT_ARCH_AMD64
4766 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
4767# else
4768 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
4769# endif
4770
4771# elif RT_INLINE_ASM_GNU_STYLE
4772 RTCCUINTREG uDummy;
4773# ifdef RT_ARCH_AMD64
4774 __asm__ __volatile__("rep stosq"
4775 : "=D" (pv),
4776 "=c" (uDummy)
4777 : "0" (pv),
4778 "c" (0x1000 >> 3),
4779 "a" (0)
4780 : "memory");
4781# else
4782 __asm__ __volatile__("rep stosl"
4783 : "=D" (pv),
4784 "=c" (uDummy)
4785 : "0" (pv),
4786 "c" (0x1000 >> 2),
4787 "a" (0)
4788 : "memory");
4789# endif
4790# else
4791 __asm
4792 {
4793# ifdef RT_ARCH_AMD64
4794 xor rax, rax
4795 mov ecx, 0200h
4796 mov rdi, [pv]
4797 rep stosq
4798# else
4799 xor eax, eax
4800 mov ecx, 0400h
4801 mov edi, [pv]
4802 rep stosd
4803# endif
4804 }
4805# endif
4806}
4807# endif
4808
4809
4810/**
4811 * Zeros a memory block with a 32-bit aligned size.
4812 *
4813 * @param pv Pointer to the memory block.
4814 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4815 */
4816#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4817DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
4818#else
4819DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
4820{
4821# if RT_INLINE_ASM_USES_INTRIN
4822# ifdef RT_ARCH_AMD64
4823 if (!(cb & 7))
4824 __stosq((unsigned __int64 *)pv, 0, cb / 8);
4825 else
4826# endif
4827 __stosd((unsigned long *)pv, 0, cb / 4);
4828
4829# elif RT_INLINE_ASM_GNU_STYLE
4830 __asm__ __volatile__("rep stosl"
4831 : "=D" (pv),
4832 "=c" (cb)
4833 : "0" (pv),
4834 "1" (cb >> 2),
4835 "a" (0)
4836 : "memory");
4837# else
4838 __asm
4839 {
4840 xor eax, eax
4841# ifdef RT_ARCH_AMD64
4842 mov rcx, [cb]
4843 shr rcx, 2
4844 mov rdi, [pv]
4845# else
4846 mov ecx, [cb]
4847 shr ecx, 2
4848 mov edi, [pv]
4849# endif
4850 rep stosd
4851 }
4852# endif
4853}
4854#endif
4855
4856
4857/**
4858 * Fills a memory block with a 32-bit aligned size.
4859 *
4860 * @param pv Pointer to the memory block.
4861 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4862 * @param u32 The value to fill with.
4863 */
4864#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4865DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
4866#else
4867DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
4868{
4869# if RT_INLINE_ASM_USES_INTRIN
4870# ifdef RT_ARCH_AMD64
4871 if (!(cb & 7))
4872 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
4873 else
4874# endif
4875 __stosd((unsigned long *)pv, u32, cb / 4);
4876
4877# elif RT_INLINE_ASM_GNU_STYLE
4878 __asm__ __volatile__("rep stosl"
4879 : "=D" (pv),
4880 "=c" (cb)
4881 : "0" (pv),
4882 "1" (cb >> 2),
4883 "a" (u32)
4884 : "memory");
4885# else
4886 __asm
4887 {
4888# ifdef RT_ARCH_AMD64
4889 mov rcx, [cb]
4890 shr rcx, 2
4891 mov rdi, [pv]
4892# else
4893 mov ecx, [cb]
4894 shr ecx, 2
4895 mov edi, [pv]
4896# endif
4897 mov eax, [u32]
4898 rep stosd
4899 }
4900# endif
4901}
4902#endif
4903
4904
4905/**
4906 * Checks if a memory block is filled with the specified byte.
4907 *
4908 * This is a sort of inverted memchr.
4909 *
4910 * @returns Pointer to the byte which doesn't equal u8.
4911 * @returns NULL if all equal to u8.
4912 *
4913 * @param pv Pointer to the memory block.
4914 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4915 * @param u8 The value it's supposed to be filled with.
4916 */
4917#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4918DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
4919#else
4920DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
4921{
4922/** @todo rewrite this in inline assembly? */
4923 uint8_t const *pb = (uint8_t const *)pv;
4924 for (; cb; cb--, pb++)
4925 if (RT_UNLIKELY(*pb != u8))
4926 return (void *)pb;
4927 return NULL;
4928}
4929#endif
4930
4931
4932/**
4933 * Checks if a memory block is filled with the specified 32-bit value.
4934 *
4935 * This is a sort of inverted memchr.
4936 *
4937 * @returns Pointer to the first value which doesn't equal u32.
4938 * @returns NULL if all equal to u32.
4939 *
4940 * @param pv Pointer to the memory block.
4941 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4942 * @param u32 The value it's supposed to be filled with.
4943 */
4944#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4945DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
4946#else
4947DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
4948{
4949/** @todo rewrite this in inline assembly? */
4950 uint32_t const *pu32 = (uint32_t const *)pv;
4951 for (; cb; cb -= 4, pu32++)
4952 if (RT_UNLIKELY(*pu32 != u32))
4953 return (uint32_t *)pu32;
4954 return NULL;
4955}
4956#endif
4957
4958
4959/**
4960 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
4961 *
4962 * @returns u32F1 * u32F2.
4963 */
4964#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4965DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
4966#else
4967DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
4968{
4969# ifdef RT_ARCH_AMD64
4970 return (uint64_t)u32F1 * u32F2;
4971# else /* !RT_ARCH_AMD64 */
4972 uint64_t u64;
4973# if RT_INLINE_ASM_GNU_STYLE
4974 __asm__ __volatile__("mull %%edx"
4975 : "=A" (u64)
4976 : "a" (u32F2), "d" (u32F1));
4977# else
4978 __asm
4979 {
4980 mov edx, [u32F1]
4981 mov eax, [u32F2]
4982 mul edx
4983 mov dword ptr [u64], eax
4984 mov dword ptr [u64 + 4], edx
4985 }
4986# endif
4987 return u64;
4988# endif /* !RT_ARCH_AMD64 */
4989}
4990#endif
4991
4992
4993/**
4994 * Multiplies two signed 32-bit values returning a signed 64-bit result.
4995 *
4996 * @returns u32F1 * u32F2.
4997 */
4998#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4999DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
5000#else
5001DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
5002{
5003# ifdef RT_ARCH_AMD64
5004 return (int64_t)i32F1 * i32F2;
5005# else /* !RT_ARCH_AMD64 */
5006 int64_t i64;
5007# if RT_INLINE_ASM_GNU_STYLE
5008 __asm__ __volatile__("imull %%edx"
5009 : "=A" (i64)
5010 : "a" (i32F2), "d" (i32F1));
5011# else
5012 __asm
5013 {
5014 mov edx, [i32F1]
5015 mov eax, [i32F2]
5016 imul edx
5017 mov dword ptr [i64], eax
5018 mov dword ptr [i64 + 4], edx
5019 }
5020# endif
5021 return i64;
5022# endif /* !RT_ARCH_AMD64 */
5023}
5024#endif
5025
5026
5027/**
5028 * Divides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
5029 *
5030 * @returns u64 / u32.
5031 */
5032#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5033DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
5034#else
5035DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
5036{
5037# ifdef RT_ARCH_AMD64
5038 return (uint32_t)(u64 / u32);
5039# else /* !RT_ARCH_AMD64 */
5040# if RT_INLINE_ASM_GNU_STYLE
5041 RTCCUINTREG uDummy;
5042 __asm__ __volatile__("divl %3"
5043 : "=a" (u32), "=d"(uDummy)
5044 : "A" (u64), "r" (u32));
5045# else
5046 __asm
5047 {
5048 mov eax, dword ptr [u64]
5049 mov edx, dword ptr [u64 + 4]
5050 mov ecx, [u32]
5051 div ecx
5052 mov [u32], eax
5053 }
5054# endif
5055 return u32;
5056# endif /* !RT_ARCH_AMD64 */
5057}
5058#endif
5059
5060
5061/**
5062 * Divides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
5063 *
5064 * @returns u64 / u32.
5065 */
5066#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5067DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
5068#else
5069DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
5070{
5071# ifdef RT_ARCH_AMD64
5072 return (int32_t)(i64 / i32);
5073# else /* !RT_ARCH_AMD64 */
5074# if RT_INLINE_ASM_GNU_STYLE
5075 RTCCUINTREG iDummy;
5076 __asm__ __volatile__("idivl %3"
5077 : "=a" (i32), "=d"(iDummy)
5078 : "A" (i64), "r" (i32));
5079# else
5080 __asm
5081 {
5082 mov eax, dword ptr [i64]
5083 mov edx, dword ptr [i64 + 4]
5084 mov ecx, [i32]
5085 idiv ecx
5086 mov [i32], eax
5087 }
5088# endif
5089 return i32;
5090# endif /* !RT_ARCH_AMD64 */
5091}
5092#endif
5093
5094
5095/**
5096 * Performs 64-bit unsigned by a 32-bit unsigned division with a 32-bit unsigned result,
5097 * returning the rest.
5098 *
5099 * @returns u64 % u32.
5100 *
5101 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5102 */
5103#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5104DECLASM(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32);
5105#else
5106DECLINLINE(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32)
5107{
5108# ifdef RT_ARCH_AMD64
5109 return (uint32_t)(u64 % u32);
5110# else /* !RT_ARCH_AMD64 */
5111# if RT_INLINE_ASM_GNU_STYLE
5112 RTCCUINTREG uDummy;
5113 __asm__ __volatile__("divl %3"
5114 : "=a" (uDummy), "=d"(u32)
5115 : "A" (u64), "r" (u32));
5116# else
5117 __asm
5118 {
5119 mov eax, dword ptr [u64]
5120 mov edx, dword ptr [u64 + 4]
5121 mov ecx, [u32]
5122 div ecx
5123 mov [u32], edx
5124 }
5125# endif
5126 return u32;
5127# endif /* !RT_ARCH_AMD64 */
5128}
5129#endif
5130
5131
5132/**
5133 * Performs 64-bit signed by a 32-bit signed division with a 32-bit signed result,
5134 * returning the rest.
5135 *
5136 * @returns u64 % u32.
5137 *
5138 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5139 */
5140#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5141DECLASM(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32);
5142#else
5143DECLINLINE(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32)
5144{
5145# ifdef RT_ARCH_AMD64
5146 return (int32_t)(i64 % i32);
5147# else /* !RT_ARCH_AMD64 */
5148# if RT_INLINE_ASM_GNU_STYLE
5149 RTCCUINTREG iDummy;
5150 __asm__ __volatile__("idivl %3"
5151 : "=a" (iDummy), "=d"(i32)
5152 : "A" (i64), "r" (i32));
5153# else
5154 __asm
5155 {
5156 mov eax, dword ptr [i64]
5157 mov edx, dword ptr [i64 + 4]
5158 mov ecx, [i32]
5159 idiv ecx
5160 mov [i32], edx
5161 }
5162# endif
5163 return i32;
5164# endif /* !RT_ARCH_AMD64 */
5165}
5166#endif
5167
5168
5169/**
5170 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
5171 * using a 96 bit intermediate result.
5172 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
5173 * __udivdi3 and __umoddi3 even if this inline function is not used.
5174 *
5175 * @returns (u64A * u32B) / u32C.
5176 * @param u64A The 64-bit value.
5177 * @param u32B The 32-bit value to multiple by A.
5178 * @param u32C The 32-bit value to divide A*B by.
5179 */
5180#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
5181DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
5182#else
5183DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
5184{
5185# if RT_INLINE_ASM_GNU_STYLE
5186# ifdef RT_ARCH_AMD64
5187 uint64_t u64Result, u64Spill;
5188 __asm__ __volatile__("mulq %2\n\t"
5189 "divq %3\n\t"
5190 : "=a" (u64Result),
5191 "=d" (u64Spill)
5192 : "r" ((uint64_t)u32B),
5193 "r" ((uint64_t)u32C),
5194 "0" (u64A),
5195 "1" (0));
5196 return u64Result;
5197# else
5198 uint32_t u32Dummy;
5199 uint64_t u64Result;
5200 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
5201 edx = u64Lo.hi = (u64A.lo * u32B).hi */
5202 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
5203 eax = u64A.hi */
5204 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
5205 edx = u32C */
5206 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
5207 edx = u32B */
5208 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
5209 edx = u64Hi.hi = (u64A.hi * u32B).hi */
5210 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
5211 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
5212 "divl %%ecx \n\t" /* eax = u64Hi / u32C
5213 edx = u64Hi % u32C */
5214 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
5215 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
5216 "divl %%ecx \n\t" /* u64Result.lo */
5217 "movl %%edi,%%edx \n\t" /* u64Result.hi */
5218 : "=A"(u64Result), "=c"(u32Dummy),
5219 "=S"(u32Dummy), "=D"(u32Dummy)
5220 : "a"((uint32_t)u64A),
5221 "S"((uint32_t)(u64A >> 32)),
5222 "c"(u32B),
5223 "D"(u32C));
5224 return u64Result;
5225# endif
5226# else
5227 RTUINT64U u;
5228 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
5229 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
5230 u64Hi += (u64Lo >> 32);
5231 u.s.Hi = (uint32_t)(u64Hi / u32C);
5232 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
5233 return u.u;
5234# endif
5235}
5236#endif
5237
5238
5239/**
5240 * Probes a byte pointer for read access.
5241 *
5242 * While the function will not fault if the byte is not read accessible,
5243 * the idea is to do this in a safe place like before acquiring locks
5244 * and such like.
5245 *
5246 * Also, this functions guarantees that an eager compiler is not going
5247 * to optimize the probing away.
5248 *
5249 * @param pvByte Pointer to the byte.
5250 */
5251#if RT_INLINE_ASM_EXTERNAL
5252DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
5253#else
5254DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
5255{
5256 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5257 uint8_t u8;
5258# if RT_INLINE_ASM_GNU_STYLE
5259 __asm__ __volatile__("movb (%1), %0\n\t"
5260 : "=r" (u8)
5261 : "r" (pvByte));
5262# else
5263 __asm
5264 {
5265# ifdef RT_ARCH_AMD64
5266 mov rax, [pvByte]
5267 mov al, [rax]
5268# else
5269 mov eax, [pvByte]
5270 mov al, [eax]
5271# endif
5272 mov [u8], al
5273 }
5274# endif
5275 return u8;
5276}
5277#endif
5278
5279/**
5280 * Probes a buffer for read access page by page.
5281 *
5282 * While the function will fault if the buffer is not fully read
5283 * accessible, the idea is to do this in a safe place like before
5284 * acquiring locks and such like.
5285 *
5286 * Also, this functions guarantees that an eager compiler is not going
5287 * to optimize the probing away.
5288 *
5289 * @param pvBuf Pointer to the buffer.
5290 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
5291 */
5292DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
5293{
5294 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5295 /* the first byte */
5296 const uint8_t *pu8 = (const uint8_t *)pvBuf;
5297 ASMProbeReadByte(pu8);
5298
5299 /* the pages in between pages. */
5300 while (cbBuf > /*PAGE_SIZE*/0x1000)
5301 {
5302 ASMProbeReadByte(pu8);
5303 cbBuf -= /*PAGE_SIZE*/0x1000;
5304 pu8 += /*PAGE_SIZE*/0x1000;
5305 }
5306
5307 /* the last byte */
5308 ASMProbeReadByte(pu8 + cbBuf - 1);
5309}
5310
5311
5312/** @def ASMBreakpoint
5313 * Debugger Breakpoint.
5314 * @remark In the gnu world we add a nop instruction after the int3 to
5315 * force gdb to remain at the int3 source line.
5316 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
5317 * @internal
5318 */
5319#if RT_INLINE_ASM_GNU_STYLE
5320# ifndef __L4ENV__
5321# define ASMBreakpoint() do { __asm__ __volatile__("int3\n\tnop"); } while (0)
5322# else
5323# define ASMBreakpoint() do { __asm__ __volatile__("int3; jmp 1f; 1:"); } while (0)
5324# endif
5325#else
5326# define ASMBreakpoint() __debugbreak()
5327#endif
5328
5329
5330
5331/** @defgroup grp_inline_bits Bit Operations
5332 * @{
5333 */
5334
5335
5336/**
5337 * Sets a bit in a bitmap.
5338 *
5339 * @param pvBitmap Pointer to the bitmap.
5340 * @param iBit The bit to set.
5341 */
5342#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5343DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
5344#else
5345DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
5346{
5347# if RT_INLINE_ASM_USES_INTRIN
5348 _bittestandset((long *)pvBitmap, iBit);
5349
5350# elif RT_INLINE_ASM_GNU_STYLE
5351 __asm__ __volatile__("btsl %1, %0"
5352 : "=m" (*(volatile long *)pvBitmap)
5353 : "Ir" (iBit),
5354 "m" (*(volatile long *)pvBitmap)
5355 : "memory");
5356# else
5357 __asm
5358 {
5359# ifdef RT_ARCH_AMD64
5360 mov rax, [pvBitmap]
5361 mov edx, [iBit]
5362 bts [rax], edx
5363# else
5364 mov eax, [pvBitmap]
5365 mov edx, [iBit]
5366 bts [eax], edx
5367# endif
5368 }
5369# endif
5370}
5371#endif
5372
5373
5374/**
5375 * Atomically sets a bit in a bitmap, ordered.
5376 *
5377 * @param pvBitmap Pointer to the bitmap.
5378 * @param iBit The bit to set.
5379 */
5380#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5381DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
5382#else
5383DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
5384{
5385# if RT_INLINE_ASM_USES_INTRIN
5386 _interlockedbittestandset((long *)pvBitmap, iBit);
5387# elif RT_INLINE_ASM_GNU_STYLE
5388 __asm__ __volatile__("lock; btsl %1, %0"
5389 : "=m" (*(volatile long *)pvBitmap)
5390 : "Ir" (iBit),
5391 "m" (*(volatile long *)pvBitmap)
5392 : "memory");
5393# else
5394 __asm
5395 {
5396# ifdef RT_ARCH_AMD64
5397 mov rax, [pvBitmap]
5398 mov edx, [iBit]
5399 lock bts [rax], edx
5400# else
5401 mov eax, [pvBitmap]
5402 mov edx, [iBit]
5403 lock bts [eax], edx
5404# endif
5405 }
5406# endif
5407}
5408#endif
5409
5410
5411/**
5412 * Clears a bit in a bitmap.
5413 *
5414 * @param pvBitmap Pointer to the bitmap.
5415 * @param iBit The bit to clear.
5416 */
5417#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5418DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
5419#else
5420DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
5421{
5422# if RT_INLINE_ASM_USES_INTRIN
5423 _bittestandreset((long *)pvBitmap, iBit);
5424
5425# elif RT_INLINE_ASM_GNU_STYLE
5426 __asm__ __volatile__("btrl %1, %0"
5427 : "=m" (*(volatile long *)pvBitmap)
5428 : "Ir" (iBit),
5429 "m" (*(volatile long *)pvBitmap)
5430 : "memory");
5431# else
5432 __asm
5433 {
5434# ifdef RT_ARCH_AMD64
5435 mov rax, [pvBitmap]
5436 mov edx, [iBit]
5437 btr [rax], edx
5438# else
5439 mov eax, [pvBitmap]
5440 mov edx, [iBit]
5441 btr [eax], edx
5442# endif
5443 }
5444# endif
5445}
5446#endif
5447
5448
5449/**
5450 * Atomically clears a bit in a bitmap, ordered.
5451 *
5452 * @param pvBitmap Pointer to the bitmap.
5453 * @param iBit The bit to toggle set.
5454 * @remark No memory barrier, take care on smp.
5455 */
5456#if RT_INLINE_ASM_EXTERNAL
5457DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
5458#else
5459DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
5460{
5461# if RT_INLINE_ASM_GNU_STYLE
5462 __asm__ __volatile__("lock; btrl %1, %0"
5463 : "=m" (*(volatile long *)pvBitmap)
5464 : "Ir" (iBit),
5465 "m" (*(volatile long *)pvBitmap)
5466 : "memory");
5467# else
5468 __asm
5469 {
5470# ifdef RT_ARCH_AMD64
5471 mov rax, [pvBitmap]
5472 mov edx, [iBit]
5473 lock btr [rax], edx
5474# else
5475 mov eax, [pvBitmap]
5476 mov edx, [iBit]
5477 lock btr [eax], edx
5478# endif
5479 }
5480# endif
5481}
5482#endif
5483
5484
5485/**
5486 * Toggles a bit in a bitmap.
5487 *
5488 * @param pvBitmap Pointer to the bitmap.
5489 * @param iBit The bit to toggle.
5490 */
5491#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5492DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
5493#else
5494DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
5495{
5496# if RT_INLINE_ASM_USES_INTRIN
5497 _bittestandcomplement((long *)pvBitmap, iBit);
5498# elif RT_INLINE_ASM_GNU_STYLE
5499 __asm__ __volatile__("btcl %1, %0"
5500 : "=m" (*(volatile long *)pvBitmap)
5501 : "Ir" (iBit),
5502 "m" (*(volatile long *)pvBitmap)
5503 : "memory");
5504# else
5505 __asm
5506 {
5507# ifdef RT_ARCH_AMD64
5508 mov rax, [pvBitmap]
5509 mov edx, [iBit]
5510 btc [rax], edx
5511# else
5512 mov eax, [pvBitmap]
5513 mov edx, [iBit]
5514 btc [eax], edx
5515# endif
5516 }
5517# endif
5518}
5519#endif
5520
5521
5522/**
5523 * Atomically toggles a bit in a bitmap, ordered.
5524 *
5525 * @param pvBitmap Pointer to the bitmap.
5526 * @param iBit The bit to test and set.
5527 */
5528#if RT_INLINE_ASM_EXTERNAL
5529DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
5530#else
5531DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
5532{
5533# if RT_INLINE_ASM_GNU_STYLE
5534 __asm__ __volatile__("lock; btcl %1, %0"
5535 : "=m" (*(volatile long *)pvBitmap)
5536 : "Ir" (iBit),
5537 "m" (*(volatile long *)pvBitmap)
5538 : "memory");
5539# else
5540 __asm
5541 {
5542# ifdef RT_ARCH_AMD64
5543 mov rax, [pvBitmap]
5544 mov edx, [iBit]
5545 lock btc [rax], edx
5546# else
5547 mov eax, [pvBitmap]
5548 mov edx, [iBit]
5549 lock btc [eax], edx
5550# endif
5551 }
5552# endif
5553}
5554#endif
5555
5556
5557/**
5558 * Tests and sets a bit in a bitmap.
5559 *
5560 * @returns true if the bit was set.
5561 * @returns false if the bit was clear.
5562 * @param pvBitmap Pointer to the bitmap.
5563 * @param iBit The bit to test and set.
5564 */
5565#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5566DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5567#else
5568DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5569{
5570 union { bool f; uint32_t u32; uint8_t u8; } rc;
5571# if RT_INLINE_ASM_USES_INTRIN
5572 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
5573
5574# elif RT_INLINE_ASM_GNU_STYLE
5575 __asm__ __volatile__("btsl %2, %1\n\t"
5576 "setc %b0\n\t"
5577 "andl $1, %0\n\t"
5578 : "=q" (rc.u32),
5579 "=m" (*(volatile long *)pvBitmap)
5580 : "Ir" (iBit),
5581 "m" (*(volatile long *)pvBitmap)
5582 : "memory");
5583# else
5584 __asm
5585 {
5586 mov edx, [iBit]
5587# ifdef RT_ARCH_AMD64
5588 mov rax, [pvBitmap]
5589 bts [rax], edx
5590# else
5591 mov eax, [pvBitmap]
5592 bts [eax], edx
5593# endif
5594 setc al
5595 and eax, 1
5596 mov [rc.u32], eax
5597 }
5598# endif
5599 return rc.f;
5600}
5601#endif
5602
5603
5604/**
5605 * Atomically tests and sets a bit in a bitmap, ordered.
5606 *
5607 * @returns true if the bit was set.
5608 * @returns false if the bit was clear.
5609 * @param pvBitmap Pointer to the bitmap.
5610 * @param iBit The bit to set.
5611 */
5612#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5613DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5614#else
5615DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5616{
5617 union { bool f; uint32_t u32; uint8_t u8; } rc;
5618# if RT_INLINE_ASM_USES_INTRIN
5619 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
5620# elif RT_INLINE_ASM_GNU_STYLE
5621 __asm__ __volatile__("lock; btsl %2, %1\n\t"
5622 "setc %b0\n\t"
5623 "andl $1, %0\n\t"
5624 : "=q" (rc.u32),
5625 "=m" (*(volatile long *)pvBitmap)
5626 : "Ir" (iBit),
5627 "m" (*(volatile long *)pvBitmap)
5628 : "memory");
5629# else
5630 __asm
5631 {
5632 mov edx, [iBit]
5633# ifdef RT_ARCH_AMD64
5634 mov rax, [pvBitmap]
5635 lock bts [rax], edx
5636# else
5637 mov eax, [pvBitmap]
5638 lock bts [eax], edx
5639# endif
5640 setc al
5641 and eax, 1
5642 mov [rc.u32], eax
5643 }
5644# endif
5645 return rc.f;
5646}
5647#endif
5648
5649
5650/**
5651 * Tests and clears a bit in a bitmap.
5652 *
5653 * @returns true if the bit was set.
5654 * @returns false if the bit was clear.
5655 * @param pvBitmap Pointer to the bitmap.
5656 * @param iBit The bit to test and clear.
5657 */
5658#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5659DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5660#else
5661DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5662{
5663 union { bool f; uint32_t u32; uint8_t u8; } rc;
5664# if RT_INLINE_ASM_USES_INTRIN
5665 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
5666
5667# elif RT_INLINE_ASM_GNU_STYLE
5668 __asm__ __volatile__("btrl %2, %1\n\t"
5669 "setc %b0\n\t"
5670 "andl $1, %0\n\t"
5671 : "=q" (rc.u32),
5672 "=m" (*(volatile long *)pvBitmap)
5673 : "Ir" (iBit),
5674 "m" (*(volatile long *)pvBitmap)
5675 : "memory");
5676# else
5677 __asm
5678 {
5679 mov edx, [iBit]
5680# ifdef RT_ARCH_AMD64
5681 mov rax, [pvBitmap]
5682 btr [rax], edx
5683# else
5684 mov eax, [pvBitmap]
5685 btr [eax], edx
5686# endif
5687 setc al
5688 and eax, 1
5689 mov [rc.u32], eax
5690 }
5691# endif
5692 return rc.f;
5693}
5694#endif
5695
5696
5697/**
5698 * Atomically tests and clears a bit in a bitmap, ordered.
5699 *
5700 * @returns true if the bit was set.
5701 * @returns false if the bit was clear.
5702 * @param pvBitmap Pointer to the bitmap.
5703 * @param iBit The bit to test and clear.
5704 * @remark No memory barrier, take care on smp.
5705 */
5706#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5707DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5708#else
5709DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5710{
5711 union { bool f; uint32_t u32; uint8_t u8; } rc;
5712# if RT_INLINE_ASM_USES_INTRIN
5713 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
5714
5715# elif RT_INLINE_ASM_GNU_STYLE
5716 __asm__ __volatile__("lock; btrl %2, %1\n\t"
5717 "setc %b0\n\t"
5718 "andl $1, %0\n\t"
5719 : "=q" (rc.u32),
5720 "=m" (*(volatile long *)pvBitmap)
5721 : "Ir" (iBit),
5722 "m" (*(volatile long *)pvBitmap)
5723 : "memory");
5724# else
5725 __asm
5726 {
5727 mov edx, [iBit]
5728# ifdef RT_ARCH_AMD64
5729 mov rax, [pvBitmap]
5730 lock btr [rax], edx
5731# else
5732 mov eax, [pvBitmap]
5733 lock btr [eax], edx
5734# endif
5735 setc al
5736 and eax, 1
5737 mov [rc.u32], eax
5738 }
5739# endif
5740 return rc.f;
5741}
5742#endif
5743
5744
5745/**
5746 * Tests and toggles a bit in a bitmap.
5747 *
5748 * @returns true if the bit was set.
5749 * @returns false if the bit was clear.
5750 * @param pvBitmap Pointer to the bitmap.
5751 * @param iBit The bit to test and toggle.
5752 */
5753#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5754DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5755#else
5756DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5757{
5758 union { bool f; uint32_t u32; uint8_t u8; } rc;
5759# if RT_INLINE_ASM_USES_INTRIN
5760 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
5761
5762# elif RT_INLINE_ASM_GNU_STYLE
5763 __asm__ __volatile__("btcl %2, %1\n\t"
5764 "setc %b0\n\t"
5765 "andl $1, %0\n\t"
5766 : "=q" (rc.u32),
5767 "=m" (*(volatile long *)pvBitmap)
5768 : "Ir" (iBit),
5769 "m" (*(volatile long *)pvBitmap)
5770 : "memory");
5771# else
5772 __asm
5773 {
5774 mov edx, [iBit]
5775# ifdef RT_ARCH_AMD64
5776 mov rax, [pvBitmap]
5777 btc [rax], edx
5778# else
5779 mov eax, [pvBitmap]
5780 btc [eax], edx
5781# endif
5782 setc al
5783 and eax, 1
5784 mov [rc.u32], eax
5785 }
5786# endif
5787 return rc.f;
5788}
5789#endif
5790
5791
5792/**
5793 * Atomically tests and toggles a bit in a bitmap, ordered.
5794 *
5795 * @returns true if the bit was set.
5796 * @returns false if the bit was clear.
5797 * @param pvBitmap Pointer to the bitmap.
5798 * @param iBit The bit to test and toggle.
5799 */
5800#if RT_INLINE_ASM_EXTERNAL
5801DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5802#else
5803DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5804{
5805 union { bool f; uint32_t u32; uint8_t u8; } rc;
5806# if RT_INLINE_ASM_GNU_STYLE
5807 __asm__ __volatile__("lock; btcl %2, %1\n\t"
5808 "setc %b0\n\t"
5809 "andl $1, %0\n\t"
5810 : "=q" (rc.u32),
5811 "=m" (*(volatile long *)pvBitmap)
5812 : "Ir" (iBit),
5813 "m" (*(volatile long *)pvBitmap)
5814 : "memory");
5815# else
5816 __asm
5817 {
5818 mov edx, [iBit]
5819# ifdef RT_ARCH_AMD64
5820 mov rax, [pvBitmap]
5821 lock btc [rax], edx
5822# else
5823 mov eax, [pvBitmap]
5824 lock btc [eax], edx
5825# endif
5826 setc al
5827 and eax, 1
5828 mov [rc.u32], eax
5829 }
5830# endif
5831 return rc.f;
5832}
5833#endif
5834
5835
5836/**
5837 * Tests if a bit in a bitmap is set.
5838 *
5839 * @returns true if the bit is set.
5840 * @returns false if the bit is clear.
5841 * @param pvBitmap Pointer to the bitmap.
5842 * @param iBit The bit to test.
5843 */
5844#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5845DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
5846#else
5847DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
5848{
5849 union { bool f; uint32_t u32; uint8_t u8; } rc;
5850# if RT_INLINE_ASM_USES_INTRIN
5851 rc.u32 = _bittest((long *)pvBitmap, iBit);
5852# elif RT_INLINE_ASM_GNU_STYLE
5853
5854 __asm__ __volatile__("btl %2, %1\n\t"
5855 "setc %b0\n\t"
5856 "andl $1, %0\n\t"
5857 : "=q" (rc.u32)
5858 : "m" (*(const volatile long *)pvBitmap),
5859 "Ir" (iBit)
5860 : "memory");
5861# else
5862 __asm
5863 {
5864 mov edx, [iBit]
5865# ifdef RT_ARCH_AMD64
5866 mov rax, [pvBitmap]
5867 bt [rax], edx
5868# else
5869 mov eax, [pvBitmap]
5870 bt [eax], edx
5871# endif
5872 setc al
5873 and eax, 1
5874 mov [rc.u32], eax
5875 }
5876# endif
5877 return rc.f;
5878}
5879#endif
5880
5881
5882/**
5883 * Clears a bit range within a bitmap.
5884 *
5885 * @param pvBitmap Pointer to the bitmap.
5886 * @param iBitStart The First bit to clear.
5887 * @param iBitEnd The first bit not to clear.
5888 */
5889DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5890{
5891 if (iBitStart < iBitEnd)
5892 {
5893 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
5894 int iStart = iBitStart & ~31;
5895 int iEnd = iBitEnd & ~31;
5896 if (iStart == iEnd)
5897 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
5898 else
5899 {
5900 /* bits in first dword. */
5901 if (iBitStart & 31)
5902 {
5903 *pu32 &= (1 << (iBitStart & 31)) - 1;
5904 pu32++;
5905 iBitStart = iStart + 32;
5906 }
5907
5908 /* whole dword. */
5909 if (iBitStart != iEnd)
5910 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
5911
5912 /* bits in last dword. */
5913 if (iBitEnd & 31)
5914 {
5915 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5916 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
5917 }
5918 }
5919 }
5920}
5921
5922
5923/**
5924 * Sets a bit range within a bitmap.
5925 *
5926 * @param pvBitmap Pointer to the bitmap.
5927 * @param iBitStart The First bit to set.
5928 * @param iBitEnd The first bit not to set.
5929 */
5930DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5931{
5932 if (iBitStart < iBitEnd)
5933 {
5934 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
5935 int iStart = iBitStart & ~31;
5936 int iEnd = iBitEnd & ~31;
5937 if (iStart == iEnd)
5938 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
5939 else
5940 {
5941 /* bits in first dword. */
5942 if (iBitStart & 31)
5943 {
5944 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
5945 pu32++;
5946 iBitStart = iStart + 32;
5947 }
5948
5949 /* whole dword. */
5950 if (iBitStart != iEnd)
5951 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
5952
5953 /* bits in last dword. */
5954 if (iBitEnd & 31)
5955 {
5956 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5957 *pu32 |= (1 << (iBitEnd & 31)) - 1;
5958 }
5959 }
5960 }
5961}
5962
5963
5964/**
5965 * Finds the first clear bit in a bitmap.
5966 *
5967 * @returns Index of the first zero bit.
5968 * @returns -1 if no clear bit was found.
5969 * @param pvBitmap Pointer to the bitmap.
5970 * @param cBits The number of bits in the bitmap. Multiple of 32.
5971 */
5972#if RT_INLINE_ASM_EXTERNAL
5973DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
5974#else
5975DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
5976{
5977 if (cBits)
5978 {
5979 int32_t iBit;
5980# if RT_INLINE_ASM_GNU_STYLE
5981 RTCCUINTREG uEAX, uECX, uEDI;
5982 cBits = RT_ALIGN_32(cBits, 32);
5983 __asm__ __volatile__("repe; scasl\n\t"
5984 "je 1f\n\t"
5985# ifdef RT_ARCH_AMD64
5986 "lea -4(%%rdi), %%rdi\n\t"
5987 "xorl (%%rdi), %%eax\n\t"
5988 "subq %5, %%rdi\n\t"
5989# else
5990 "lea -4(%%edi), %%edi\n\t"
5991 "xorl (%%edi), %%eax\n\t"
5992 "subl %5, %%edi\n\t"
5993# endif
5994 "shll $3, %%edi\n\t"
5995 "bsfl %%eax, %%edx\n\t"
5996 "addl %%edi, %%edx\n\t"
5997 "1:\t\n"
5998 : "=d" (iBit),
5999 "=&c" (uECX),
6000 "=&D" (uEDI),
6001 "=&a" (uEAX)
6002 : "0" (0xffffffff),
6003 "mr" (pvBitmap),
6004 "1" (cBits >> 5),
6005 "2" (pvBitmap),
6006 "3" (0xffffffff));
6007# else
6008 cBits = RT_ALIGN_32(cBits, 32);
6009 __asm
6010 {
6011# ifdef RT_ARCH_AMD64
6012 mov rdi, [pvBitmap]
6013 mov rbx, rdi
6014# else
6015 mov edi, [pvBitmap]
6016 mov ebx, edi
6017# endif
6018 mov edx, 0ffffffffh
6019 mov eax, edx
6020 mov ecx, [cBits]
6021 shr ecx, 5
6022 repe scasd
6023 je done
6024
6025# ifdef RT_ARCH_AMD64
6026 lea rdi, [rdi - 4]
6027 xor eax, [rdi]
6028 sub rdi, rbx
6029# else
6030 lea edi, [edi - 4]
6031 xor eax, [edi]
6032 sub edi, ebx
6033# endif
6034 shl edi, 3
6035 bsf edx, eax
6036 add edx, edi
6037 done:
6038 mov [iBit], edx
6039 }
6040# endif
6041 return iBit;
6042 }
6043 return -1;
6044}
6045#endif
6046
6047
6048/**
6049 * Finds the next clear bit in a bitmap.
6050 *
6051 * @returns Index of the first zero bit.
6052 * @returns -1 if no clear bit was found.
6053 * @param pvBitmap Pointer to the bitmap.
6054 * @param cBits The number of bits in the bitmap. Multiple of 32.
6055 * @param iBitPrev The bit returned from the last search.
6056 * The search will start at iBitPrev + 1.
6057 */
6058#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6059DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
6060#else
6061DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
6062{
6063 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
6064 int iBit = ++iBitPrev & 31;
6065 if (iBit)
6066 {
6067 /*
6068 * Inspect the 32-bit word containing the unaligned bit.
6069 */
6070 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
6071
6072# if RT_INLINE_ASM_USES_INTRIN
6073 unsigned long ulBit = 0;
6074 if (_BitScanForward(&ulBit, u32))
6075 return ulBit + iBitPrev;
6076# else
6077# if RT_INLINE_ASM_GNU_STYLE
6078 __asm__ __volatile__("bsf %1, %0\n\t"
6079 "jnz 1f\n\t"
6080 "movl $-1, %0\n\t"
6081 "1:\n\t"
6082 : "=r" (iBit)
6083 : "r" (u32));
6084# else
6085 __asm
6086 {
6087 mov edx, [u32]
6088 bsf eax, edx
6089 jnz done
6090 mov eax, 0ffffffffh
6091 done:
6092 mov [iBit], eax
6093 }
6094# endif
6095 if (iBit >= 0)
6096 return iBit + iBitPrev;
6097# endif
6098
6099 /*
6100 * Skip ahead and see if there is anything left to search.
6101 */
6102 iBitPrev |= 31;
6103 iBitPrev++;
6104 if (cBits <= (uint32_t)iBitPrev)
6105 return -1;
6106 }
6107
6108 /*
6109 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6110 */
6111 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6112 if (iBit >= 0)
6113 iBit += iBitPrev;
6114 return iBit;
6115}
6116#endif
6117
6118
6119/**
6120 * Finds the first set bit in a bitmap.
6121 *
6122 * @returns Index of the first set bit.
6123 * @returns -1 if no clear bit was found.
6124 * @param pvBitmap Pointer to the bitmap.
6125 * @param cBits The number of bits in the bitmap. Multiple of 32.
6126 */
6127#if RT_INLINE_ASM_EXTERNAL
6128DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
6129#else
6130DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
6131{
6132 if (cBits)
6133 {
6134 int32_t iBit;
6135# if RT_INLINE_ASM_GNU_STYLE
6136 RTCCUINTREG uEAX, uECX, uEDI;
6137 cBits = RT_ALIGN_32(cBits, 32);
6138 __asm__ __volatile__("repe; scasl\n\t"
6139 "je 1f\n\t"
6140# ifdef RT_ARCH_AMD64
6141 "lea -4(%%rdi), %%rdi\n\t"
6142 "movl (%%rdi), %%eax\n\t"
6143 "subq %5, %%rdi\n\t"
6144# else
6145 "lea -4(%%edi), %%edi\n\t"
6146 "movl (%%edi), %%eax\n\t"
6147 "subl %5, %%edi\n\t"
6148# endif
6149 "shll $3, %%edi\n\t"
6150 "bsfl %%eax, %%edx\n\t"
6151 "addl %%edi, %%edx\n\t"
6152 "1:\t\n"
6153 : "=d" (iBit),
6154 "=&c" (uECX),
6155 "=&D" (uEDI),
6156 "=&a" (uEAX)
6157 : "0" (0xffffffff),
6158 "mr" (pvBitmap),
6159 "1" (cBits >> 5),
6160 "2" (pvBitmap),
6161 "3" (0));
6162# else
6163 cBits = RT_ALIGN_32(cBits, 32);
6164 __asm
6165 {
6166# ifdef RT_ARCH_AMD64
6167 mov rdi, [pvBitmap]
6168 mov rbx, rdi
6169# else
6170 mov edi, [pvBitmap]
6171 mov ebx, edi
6172# endif
6173 mov edx, 0ffffffffh
6174 xor eax, eax
6175 mov ecx, [cBits]
6176 shr ecx, 5
6177 repe scasd
6178 je done
6179# ifdef RT_ARCH_AMD64
6180 lea rdi, [rdi - 4]
6181 mov eax, [rdi]
6182 sub rdi, rbx
6183# else
6184 lea edi, [edi - 4]
6185 mov eax, [edi]
6186 sub edi, ebx
6187# endif
6188 shl edi, 3
6189 bsf edx, eax
6190 add edx, edi
6191 done:
6192 mov [iBit], edx
6193 }
6194# endif
6195 return iBit;
6196 }
6197 return -1;
6198}
6199#endif
6200
6201
6202/**
6203 * Finds the next set bit in a bitmap.
6204 *
6205 * @returns Index of the next set bit.
6206 * @returns -1 if no set bit was found.
6207 * @param pvBitmap Pointer to the bitmap.
6208 * @param cBits The number of bits in the bitmap. Multiple of 32.
6209 * @param iBitPrev The bit returned from the last search.
6210 * The search will start at iBitPrev + 1.
6211 */
6212#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6213DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
6214#else
6215DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
6216{
6217 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
6218 int iBit = ++iBitPrev & 31;
6219 if (iBit)
6220 {
6221 /*
6222 * Inspect the 32-bit word containing the unaligned bit.
6223 */
6224 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
6225
6226# if RT_INLINE_ASM_USES_INTRIN
6227 unsigned long ulBit = 0;
6228 if (_BitScanForward(&ulBit, u32))
6229 return ulBit + iBitPrev;
6230# else
6231# if RT_INLINE_ASM_GNU_STYLE
6232 __asm__ __volatile__("bsf %1, %0\n\t"
6233 "jnz 1f\n\t"
6234 "movl $-1, %0\n\t"
6235 "1:\n\t"
6236 : "=r" (iBit)
6237 : "r" (u32));
6238# else
6239 __asm
6240 {
6241 mov edx, [u32]
6242 bsf eax, edx
6243 jnz done
6244 mov eax, 0ffffffffh
6245 done:
6246 mov [iBit], eax
6247 }
6248# endif
6249 if (iBit >= 0)
6250 return iBit + iBitPrev;
6251# endif
6252
6253 /*
6254 * Skip ahead and see if there is anything left to search.
6255 */
6256 iBitPrev |= 31;
6257 iBitPrev++;
6258 if (cBits <= (uint32_t)iBitPrev)
6259 return -1;
6260 }
6261
6262 /*
6263 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6264 */
6265 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6266 if (iBit >= 0)
6267 iBit += iBitPrev;
6268 return iBit;
6269}
6270#endif
6271
6272
6273/**
6274 * Finds the first bit which is set in the given 32-bit integer.
6275 * Bits are numbered from 1 (least significant) to 32.
6276 *
6277 * @returns index [1..32] of the first set bit.
6278 * @returns 0 if all bits are cleared.
6279 * @param u32 Integer to search for set bits.
6280 * @remark Similar to ffs() in BSD.
6281 */
6282DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
6283{
6284# if RT_INLINE_ASM_USES_INTRIN
6285 unsigned long iBit;
6286 if (_BitScanForward(&iBit, u32))
6287 iBit++;
6288 else
6289 iBit = 0;
6290# elif RT_INLINE_ASM_GNU_STYLE
6291 uint32_t iBit;
6292 __asm__ __volatile__("bsf %1, %0\n\t"
6293 "jnz 1f\n\t"
6294 "xorl %0, %0\n\t"
6295 "jmp 2f\n"
6296 "1:\n\t"
6297 "incl %0\n"
6298 "2:\n\t"
6299 : "=r" (iBit)
6300 : "rm" (u32));
6301# else
6302 uint32_t iBit;
6303 _asm
6304 {
6305 bsf eax, [u32]
6306 jnz found
6307 xor eax, eax
6308 jmp done
6309 found:
6310 inc eax
6311 done:
6312 mov [iBit], eax
6313 }
6314# endif
6315 return iBit;
6316}
6317
6318
6319/**
6320 * Finds the first bit which is set in the given 32-bit integer.
6321 * Bits are numbered from 1 (least significant) to 32.
6322 *
6323 * @returns index [1..32] of the first set bit.
6324 * @returns 0 if all bits are cleared.
6325 * @param i32 Integer to search for set bits.
6326 * @remark Similar to ffs() in BSD.
6327 */
6328DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
6329{
6330 return ASMBitFirstSetU32((uint32_t)i32);
6331}
6332
6333
6334/**
6335 * Finds the last bit which is set in the given 32-bit integer.
6336 * Bits are numbered from 1 (least significant) to 32.
6337 *
6338 * @returns index [1..32] of the last set bit.
6339 * @returns 0 if all bits are cleared.
6340 * @param u32 Integer to search for set bits.
6341 * @remark Similar to fls() in BSD.
6342 */
6343DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
6344{
6345# if RT_INLINE_ASM_USES_INTRIN
6346 unsigned long iBit;
6347 if (_BitScanReverse(&iBit, u32))
6348 iBit++;
6349 else
6350 iBit = 0;
6351# elif RT_INLINE_ASM_GNU_STYLE
6352 uint32_t iBit;
6353 __asm__ __volatile__("bsrl %1, %0\n\t"
6354 "jnz 1f\n\t"
6355 "xorl %0, %0\n\t"
6356 "jmp 2f\n"
6357 "1:\n\t"
6358 "incl %0\n"
6359 "2:\n\t"
6360 : "=r" (iBit)
6361 : "rm" (u32));
6362# else
6363 uint32_t iBit;
6364 _asm
6365 {
6366 bsr eax, [u32]
6367 jnz found
6368 xor eax, eax
6369 jmp done
6370 found:
6371 inc eax
6372 done:
6373 mov [iBit], eax
6374 }
6375# endif
6376 return iBit;
6377}
6378
6379
6380/**
6381 * Finds the last bit which is set in the given 32-bit integer.
6382 * Bits are numbered from 1 (least significant) to 32.
6383 *
6384 * @returns index [1..32] of the last set bit.
6385 * @returns 0 if all bits are cleared.
6386 * @param i32 Integer to search for set bits.
6387 * @remark Similar to fls() in BSD.
6388 */
6389DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
6390{
6391 return ASMBitLastSetS32((uint32_t)i32);
6392}
6393
6394/**
6395 * Reverse the byte order of the given 16-bit integer.
6396 *
6397 * @returns Revert
6398 * @param u16 16-bit integer value.
6399 */
6400DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
6401{
6402#if RT_INLINE_ASM_USES_INTRIN
6403 u16 = _byteswap_ushort(u16);
6404#elif RT_INLINE_ASM_GNU_STYLE
6405 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
6406#else
6407 _asm
6408 {
6409 mov ax, [u16]
6410 ror ax, 8
6411 mov [u16], ax
6412 }
6413#endif
6414 return u16;
6415}
6416
6417/**
6418 * Reverse the byte order of the given 32-bit integer.
6419 *
6420 * @returns Revert
6421 * @param u32 32-bit integer value.
6422 */
6423DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
6424{
6425#if RT_INLINE_ASM_USES_INTRIN
6426 u32 = _byteswap_ulong(u32);
6427#elif RT_INLINE_ASM_GNU_STYLE
6428 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
6429#else
6430 _asm
6431 {
6432 mov eax, [u32]
6433 bswap eax
6434 mov [u32], eax
6435 }
6436#endif
6437 return u32;
6438}
6439
6440
6441/**
6442 * Reverse the byte order of the given 64-bit integer.
6443 *
6444 * @returns Revert
6445 * @param u64 64-bit integer value.
6446 */
6447DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
6448{
6449#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
6450 u64 = _byteswap_uint64(u64);
6451#else
6452 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
6453 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
6454#endif
6455 return u64;
6456}
6457
6458
6459/** @} */
6460
6461
6462/** @} */
6463#endif
6464
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette