VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 9741

Last change on this file since 9741 was 9581, checked in by vboxsync, 17 years ago

const + small optimization

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 138.1 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 */
29
30#ifndef ___iprt_asm_h
31#define ___iprt_asm_h
32
33#include <iprt/cdefs.h>
34#include <iprt/types.h>
35#include <iprt/assert.h>
36/** @todo #include <iprt/param.h> for PAGE_SIZE. */
37/** @def RT_INLINE_ASM_USES_INTRIN
38 * Defined as 1 if we're using a _MSC_VER 1400.
39 * Otherwise defined as 0.
40 */
41
42#ifdef _MSC_VER
43# if _MSC_VER >= 1400
44# define RT_INLINE_ASM_USES_INTRIN 1
45# include <intrin.h>
46 /* Emit the intrinsics at all optimization levels. */
47# pragma intrinsic(_ReadWriteBarrier)
48# pragma intrinsic(__cpuid)
49# pragma intrinsic(_enable)
50# pragma intrinsic(_disable)
51# pragma intrinsic(__rdtsc)
52# pragma intrinsic(__readmsr)
53# pragma intrinsic(__writemsr)
54# pragma intrinsic(__outbyte)
55# pragma intrinsic(__outword)
56# pragma intrinsic(__outdword)
57# pragma intrinsic(__inbyte)
58# pragma intrinsic(__inword)
59# pragma intrinsic(__indword)
60# pragma intrinsic(__invlpg)
61# pragma intrinsic(__stosd)
62# pragma intrinsic(__stosw)
63# pragma intrinsic(__stosb)
64# pragma intrinsic(__readcr0)
65# pragma intrinsic(__readcr2)
66# pragma intrinsic(__readcr3)
67# pragma intrinsic(__readcr4)
68# pragma intrinsic(__writecr0)
69# pragma intrinsic(__writecr3)
70# pragma intrinsic(__writecr4)
71# pragma intrinsic(_BitScanForward)
72# pragma intrinsic(_BitScanReverse)
73# pragma intrinsic(_bittest)
74# pragma intrinsic(_bittestandset)
75# pragma intrinsic(_bittestandreset)
76# pragma intrinsic(_bittestandcomplement)
77# pragma intrinsic(_byteswap_ushort)
78# pragma intrinsic(_byteswap_ulong)
79# pragma intrinsic(_interlockedbittestandset)
80# pragma intrinsic(_interlockedbittestandreset)
81# pragma intrinsic(_InterlockedAnd)
82# pragma intrinsic(_InterlockedOr)
83# pragma intrinsic(_InterlockedIncrement)
84# pragma intrinsic(_InterlockedDecrement)
85# pragma intrinsic(_InterlockedExchange)
86# pragma intrinsic(_InterlockedExchangeAdd)
87# pragma intrinsic(_InterlockedCompareExchange)
88# pragma intrinsic(_InterlockedCompareExchange64)
89# ifdef RT_ARCH_AMD64
90# pragma intrinsic(__stosq)
91# pragma intrinsic(__readcr8)
92# pragma intrinsic(__writecr8)
93# pragma intrinsic(_byteswap_uint64)
94# pragma intrinsic(_InterlockedExchange64)
95# endif
96# endif
97#endif
98#ifndef RT_INLINE_ASM_USES_INTRIN
99# define RT_INLINE_ASM_USES_INTRIN 0
100#endif
101
102
103
104/** @defgroup grp_asm ASM - Assembly Routines
105 * @ingroup grp_rt
106 *
107 * @remarks The difference between ordered and unordered atomic operations are that
108 * the former will complete outstanding reads and writes before continuing
109 * while the latter doesn't make any promisses about the order. Ordered
110 * operations doesn't, it seems, make any 100% promise wrt to whether
111 * the operation will complete before any subsequent memory access.
112 * (please, correct if wrong.)
113 *
114 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
115 * are unordered (note the Uo).
116 *
117 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
118 * or even optimize assembler instructions away. For instance, in the following code
119 * the second rdmsr instruction is optimized away because gcc treats that instruction
120 * as deterministic:
121 *
122 * @code
123 * static inline uint64_t rdmsr_low(int idx)
124 * {
125 * uint32_t low;
126 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
127 * }
128 * ...
129 * uint32_t msr1 = rdmsr_low(1);
130 * foo(msr1);
131 * msr1 = rdmsr_low(1);
132 * bar(msr1);
133 * @endcode
134 *
135 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
136 * use the result of the first call as input parameter for bar() as well. For rdmsr this
137 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
138 * machine status information in general.
139 *
140 * @{
141 */
142
143/** @def RT_INLINE_ASM_EXTERNAL
144 * Defined as 1 if the compiler does not support inline assembly.
145 * The ASM* functions will then be implemented in an external .asm file.
146 *
147 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
148 * inline assmebly in their AMD64 compiler.
149 */
150#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
151# define RT_INLINE_ASM_EXTERNAL 1
152#else
153# define RT_INLINE_ASM_EXTERNAL 0
154#endif
155
156/** @def RT_INLINE_ASM_GNU_STYLE
157 * Defined as 1 if the compiler understand GNU style inline assembly.
158 */
159#if defined(_MSC_VER)
160# define RT_INLINE_ASM_GNU_STYLE 0
161#else
162# define RT_INLINE_ASM_GNU_STYLE 1
163#endif
164
165
166/** @todo find a more proper place for this structure? */
167#pragma pack(1)
168/** IDTR */
169typedef struct RTIDTR
170{
171 /** Size of the IDT. */
172 uint16_t cbIdt;
173 /** Address of the IDT. */
174 uintptr_t pIdt;
175} RTIDTR, *PRTIDTR;
176#pragma pack()
177
178#pragma pack(1)
179/** GDTR */
180typedef struct RTGDTR
181{
182 /** Size of the GDT. */
183 uint16_t cbGdt;
184 /** Address of the GDT. */
185 uintptr_t pGdt;
186} RTGDTR, *PRTGDTR;
187#pragma pack()
188
189
190/** @def ASMReturnAddress
191 * Gets the return address of the current (or calling if you like) function or method.
192 */
193#ifdef _MSC_VER
194# ifdef __cplusplus
195extern "C"
196# endif
197void * _ReturnAddress(void);
198# pragma intrinsic(_ReturnAddress)
199# define ASMReturnAddress() _ReturnAddress()
200#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
201# define ASMReturnAddress() __builtin_return_address(0)
202#else
203# error "Unsupported compiler."
204#endif
205
206
207/**
208 * Gets the content of the IDTR CPU register.
209 * @param pIdtr Where to store the IDTR contents.
210 */
211#if RT_INLINE_ASM_EXTERNAL
212DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
213#else
214DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
215{
216# if RT_INLINE_ASM_GNU_STYLE
217 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
218# else
219 __asm
220 {
221# ifdef RT_ARCH_AMD64
222 mov rax, [pIdtr]
223 sidt [rax]
224# else
225 mov eax, [pIdtr]
226 sidt [eax]
227# endif
228 }
229# endif
230}
231#endif
232
233
234/**
235 * Sets the content of the IDTR CPU register.
236 * @param pIdtr Where to load the IDTR contents from
237 */
238#if RT_INLINE_ASM_EXTERNAL
239DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
240#else
241DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
242{
243# if RT_INLINE_ASM_GNU_STYLE
244 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
245# else
246 __asm
247 {
248# ifdef RT_ARCH_AMD64
249 mov rax, [pIdtr]
250 lidt [rax]
251# else
252 mov eax, [pIdtr]
253 lidt [eax]
254# endif
255 }
256# endif
257}
258#endif
259
260
261/**
262 * Gets the content of the GDTR CPU register.
263 * @param pGdtr Where to store the GDTR contents.
264 */
265#if RT_INLINE_ASM_EXTERNAL
266DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
267#else
268DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
269{
270# if RT_INLINE_ASM_GNU_STYLE
271 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
272# else
273 __asm
274 {
275# ifdef RT_ARCH_AMD64
276 mov rax, [pGdtr]
277 sgdt [rax]
278# else
279 mov eax, [pGdtr]
280 sgdt [eax]
281# endif
282 }
283# endif
284}
285#endif
286
287/**
288 * Get the cs register.
289 * @returns cs.
290 */
291#if RT_INLINE_ASM_EXTERNAL
292DECLASM(RTSEL) ASMGetCS(void);
293#else
294DECLINLINE(RTSEL) ASMGetCS(void)
295{
296 RTSEL SelCS;
297# if RT_INLINE_ASM_GNU_STYLE
298 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
299# else
300 __asm
301 {
302 mov ax, cs
303 mov [SelCS], ax
304 }
305# endif
306 return SelCS;
307}
308#endif
309
310
311/**
312 * Get the DS register.
313 * @returns DS.
314 */
315#if RT_INLINE_ASM_EXTERNAL
316DECLASM(RTSEL) ASMGetDS(void);
317#else
318DECLINLINE(RTSEL) ASMGetDS(void)
319{
320 RTSEL SelDS;
321# if RT_INLINE_ASM_GNU_STYLE
322 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
323# else
324 __asm
325 {
326 mov ax, ds
327 mov [SelDS], ax
328 }
329# endif
330 return SelDS;
331}
332#endif
333
334
335/**
336 * Get the ES register.
337 * @returns ES.
338 */
339#if RT_INLINE_ASM_EXTERNAL
340DECLASM(RTSEL) ASMGetES(void);
341#else
342DECLINLINE(RTSEL) ASMGetES(void)
343{
344 RTSEL SelES;
345# if RT_INLINE_ASM_GNU_STYLE
346 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
347# else
348 __asm
349 {
350 mov ax, es
351 mov [SelES], ax
352 }
353# endif
354 return SelES;
355}
356#endif
357
358
359/**
360 * Get the FS register.
361 * @returns FS.
362 */
363#if RT_INLINE_ASM_EXTERNAL
364DECLASM(RTSEL) ASMGetFS(void);
365#else
366DECLINLINE(RTSEL) ASMGetFS(void)
367{
368 RTSEL SelFS;
369# if RT_INLINE_ASM_GNU_STYLE
370 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
371# else
372 __asm
373 {
374 mov ax, fs
375 mov [SelFS], ax
376 }
377# endif
378 return SelFS;
379}
380# endif
381
382
383/**
384 * Get the GS register.
385 * @returns GS.
386 */
387#if RT_INLINE_ASM_EXTERNAL
388DECLASM(RTSEL) ASMGetGS(void);
389#else
390DECLINLINE(RTSEL) ASMGetGS(void)
391{
392 RTSEL SelGS;
393# if RT_INLINE_ASM_GNU_STYLE
394 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
395# else
396 __asm
397 {
398 mov ax, gs
399 mov [SelGS], ax
400 }
401# endif
402 return SelGS;
403}
404#endif
405
406
407/**
408 * Get the SS register.
409 * @returns SS.
410 */
411#if RT_INLINE_ASM_EXTERNAL
412DECLASM(RTSEL) ASMGetSS(void);
413#else
414DECLINLINE(RTSEL) ASMGetSS(void)
415{
416 RTSEL SelSS;
417# if RT_INLINE_ASM_GNU_STYLE
418 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
419# else
420 __asm
421 {
422 mov ax, ss
423 mov [SelSS], ax
424 }
425# endif
426 return SelSS;
427}
428#endif
429
430
431/**
432 * Get the TR register.
433 * @returns TR.
434 */
435#if RT_INLINE_ASM_EXTERNAL
436DECLASM(RTSEL) ASMGetTR(void);
437#else
438DECLINLINE(RTSEL) ASMGetTR(void)
439{
440 RTSEL SelTR;
441# if RT_INLINE_ASM_GNU_STYLE
442 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
443# else
444 __asm
445 {
446 str ax
447 mov [SelTR], ax
448 }
449# endif
450 return SelTR;
451}
452#endif
453
454
455/**
456 * Get the [RE]FLAGS register.
457 * @returns [RE]FLAGS.
458 */
459#if RT_INLINE_ASM_EXTERNAL
460DECLASM(RTCCUINTREG) ASMGetFlags(void);
461#else
462DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
463{
464 RTCCUINTREG uFlags;
465# if RT_INLINE_ASM_GNU_STYLE
466# ifdef RT_ARCH_AMD64
467 __asm__ __volatile__("pushfq\n\t"
468 "popq %0\n\t"
469 : "=g" (uFlags));
470# else
471 __asm__ __volatile__("pushfl\n\t"
472 "popl %0\n\t"
473 : "=g" (uFlags));
474# endif
475# else
476 __asm
477 {
478# ifdef RT_ARCH_AMD64
479 pushfq
480 pop [uFlags]
481# else
482 pushfd
483 pop [uFlags]
484# endif
485 }
486# endif
487 return uFlags;
488}
489#endif
490
491
492/**
493 * Set the [RE]FLAGS register.
494 * @param uFlags The new [RE]FLAGS value.
495 */
496#if RT_INLINE_ASM_EXTERNAL
497DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
498#else
499DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
500{
501# if RT_INLINE_ASM_GNU_STYLE
502# ifdef RT_ARCH_AMD64
503 __asm__ __volatile__("pushq %0\n\t"
504 "popfq\n\t"
505 : : "g" (uFlags));
506# else
507 __asm__ __volatile__("pushl %0\n\t"
508 "popfl\n\t"
509 : : "g" (uFlags));
510# endif
511# else
512 __asm
513 {
514# ifdef RT_ARCH_AMD64
515 push [uFlags]
516 popfq
517# else
518 push [uFlags]
519 popfd
520# endif
521 }
522# endif
523}
524#endif
525
526
527/**
528 * Gets the content of the CPU timestamp counter register.
529 *
530 * @returns TSC.
531 */
532#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
533DECLASM(uint64_t) ASMReadTSC(void);
534#else
535DECLINLINE(uint64_t) ASMReadTSC(void)
536{
537 RTUINT64U u;
538# if RT_INLINE_ASM_GNU_STYLE
539 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
540# else
541# if RT_INLINE_ASM_USES_INTRIN
542 u.u = __rdtsc();
543# else
544 __asm
545 {
546 rdtsc
547 mov [u.s.Lo], eax
548 mov [u.s.Hi], edx
549 }
550# endif
551# endif
552 return u.u;
553}
554#endif
555
556
557/**
558 * Performs the cpuid instruction returning all registers.
559 *
560 * @param uOperator CPUID operation (eax).
561 * @param pvEAX Where to store eax.
562 * @param pvEBX Where to store ebx.
563 * @param pvECX Where to store ecx.
564 * @param pvEDX Where to store edx.
565 * @remark We're using void pointers to ease the use of special bitfield structures and such.
566 */
567#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
568DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
569#else
570DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
571{
572# if RT_INLINE_ASM_GNU_STYLE
573# ifdef RT_ARCH_AMD64
574 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
575 __asm__ ("cpuid\n\t"
576 : "=a" (uRAX),
577 "=b" (uRBX),
578 "=c" (uRCX),
579 "=d" (uRDX)
580 : "0" (uOperator));
581 *(uint32_t *)pvEAX = (uint32_t)uRAX;
582 *(uint32_t *)pvEBX = (uint32_t)uRBX;
583 *(uint32_t *)pvECX = (uint32_t)uRCX;
584 *(uint32_t *)pvEDX = (uint32_t)uRDX;
585# else
586 __asm__ ("xchgl %%ebx, %1\n\t"
587 "cpuid\n\t"
588 "xchgl %%ebx, %1\n\t"
589 : "=a" (*(uint32_t *)pvEAX),
590 "=r" (*(uint32_t *)pvEBX),
591 "=c" (*(uint32_t *)pvECX),
592 "=d" (*(uint32_t *)pvEDX)
593 : "0" (uOperator));
594# endif
595
596# elif RT_INLINE_ASM_USES_INTRIN
597 int aInfo[4];
598 __cpuid(aInfo, uOperator);
599 *(uint32_t *)pvEAX = aInfo[0];
600 *(uint32_t *)pvEBX = aInfo[1];
601 *(uint32_t *)pvECX = aInfo[2];
602 *(uint32_t *)pvEDX = aInfo[3];
603
604# else
605 uint32_t uEAX;
606 uint32_t uEBX;
607 uint32_t uECX;
608 uint32_t uEDX;
609 __asm
610 {
611 push ebx
612 mov eax, [uOperator]
613 cpuid
614 mov [uEAX], eax
615 mov [uEBX], ebx
616 mov [uECX], ecx
617 mov [uEDX], edx
618 pop ebx
619 }
620 *(uint32_t *)pvEAX = uEAX;
621 *(uint32_t *)pvEBX = uEBX;
622 *(uint32_t *)pvECX = uECX;
623 *(uint32_t *)pvEDX = uEDX;
624# endif
625}
626#endif
627
628
629/**
630 * Performs the cpuid instruction returning all registers.
631 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
632 *
633 * @param uOperator CPUID operation (eax).
634 * @param uIdxECX ecx index
635 * @param pvEAX Where to store eax.
636 * @param pvEBX Where to store ebx.
637 * @param pvECX Where to store ecx.
638 * @param pvEDX Where to store edx.
639 * @remark We're using void pointers to ease the use of special bitfield structures and such.
640 */
641#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
642DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
643#else
644DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
645{
646# if RT_INLINE_ASM_GNU_STYLE
647# ifdef RT_ARCH_AMD64
648 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
649 __asm__ ("cpuid\n\t"
650 : "=a" (uRAX),
651 "=b" (uRBX),
652 "=c" (uRCX),
653 "=d" (uRDX)
654 : "0" (uOperator),
655 "2" (uIdxECX));
656 *(uint32_t *)pvEAX = (uint32_t)uRAX;
657 *(uint32_t *)pvEBX = (uint32_t)uRBX;
658 *(uint32_t *)pvECX = (uint32_t)uRCX;
659 *(uint32_t *)pvEDX = (uint32_t)uRDX;
660# else
661 __asm__ ("xchgl %%ebx, %1\n\t"
662 "cpuid\n\t"
663 "xchgl %%ebx, %1\n\t"
664 : "=a" (*(uint32_t *)pvEAX),
665 "=r" (*(uint32_t *)pvEBX),
666 "=c" (*(uint32_t *)pvECX),
667 "=d" (*(uint32_t *)pvEDX)
668 : "0" (uOperator),
669 "2" (uIdxECX));
670# endif
671
672# elif RT_INLINE_ASM_USES_INTRIN
673 int aInfo[4];
674 /* ??? another intrinsic ??? */
675 __cpuid(aInfo, uOperator);
676 *(uint32_t *)pvEAX = aInfo[0];
677 *(uint32_t *)pvEBX = aInfo[1];
678 *(uint32_t *)pvECX = aInfo[2];
679 *(uint32_t *)pvEDX = aInfo[3];
680
681# else
682 uint32_t uEAX;
683 uint32_t uEBX;
684 uint32_t uECX;
685 uint32_t uEDX;
686 __asm
687 {
688 push ebx
689 mov eax, [uOperator]
690 mov ecx, [uIdxECX]
691 cpuid
692 mov [uEAX], eax
693 mov [uEBX], ebx
694 mov [uECX], ecx
695 mov [uEDX], edx
696 pop ebx
697 }
698 *(uint32_t *)pvEAX = uEAX;
699 *(uint32_t *)pvEBX = uEBX;
700 *(uint32_t *)pvECX = uECX;
701 *(uint32_t *)pvEDX = uEDX;
702# endif
703}
704#endif
705
706
707/**
708 * Performs the cpuid instruction returning ecx and edx.
709 *
710 * @param uOperator CPUID operation (eax).
711 * @param pvECX Where to store ecx.
712 * @param pvEDX Where to store edx.
713 * @remark We're using void pointers to ease the use of special bitfield structures and such.
714 */
715#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
716DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
717#else
718DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
719{
720 uint32_t uEBX;
721 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
722}
723#endif
724
725
726/**
727 * Performs the cpuid instruction returning edx.
728 *
729 * @param uOperator CPUID operation (eax).
730 * @returns EDX after cpuid operation.
731 */
732#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
733DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
734#else
735DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
736{
737 RTCCUINTREG xDX;
738# if RT_INLINE_ASM_GNU_STYLE
739# ifdef RT_ARCH_AMD64
740 RTCCUINTREG uSpill;
741 __asm__ ("cpuid"
742 : "=a" (uSpill),
743 "=d" (xDX)
744 : "0" (uOperator)
745 : "rbx", "rcx");
746# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: PIC by default. */
747 __asm__ ("push %%ebx\n\t"
748 "cpuid\n\t"
749 "pop %%ebx\n\t"
750 : "=a" (uOperator),
751 "=d" (xDX)
752 : "0" (uOperator)
753 : "ecx");
754# else
755 __asm__ ("cpuid"
756 : "=a" (uOperator),
757 "=d" (xDX)
758 : "0" (uOperator)
759 : "ebx", "ecx");
760# endif
761
762# elif RT_INLINE_ASM_USES_INTRIN
763 int aInfo[4];
764 __cpuid(aInfo, uOperator);
765 xDX = aInfo[3];
766
767# else
768 __asm
769 {
770 push ebx
771 mov eax, [uOperator]
772 cpuid
773 mov [xDX], edx
774 pop ebx
775 }
776# endif
777 return (uint32_t)xDX;
778}
779#endif
780
781
782/**
783 * Performs the cpuid instruction returning ecx.
784 *
785 * @param uOperator CPUID operation (eax).
786 * @returns ECX after cpuid operation.
787 */
788#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
789DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
790#else
791DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
792{
793 RTCCUINTREG xCX;
794# if RT_INLINE_ASM_GNU_STYLE
795# ifdef RT_ARCH_AMD64
796 RTCCUINTREG uSpill;
797 __asm__ ("cpuid"
798 : "=a" (uSpill),
799 "=c" (xCX)
800 : "0" (uOperator)
801 : "rbx", "rdx");
802# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
803 __asm__ ("push %%ebx\n\t"
804 "cpuid\n\t"
805 "pop %%ebx\n\t"
806 : "=a" (uOperator),
807 "=c" (xCX)
808 : "0" (uOperator)
809 : "edx");
810# else
811 __asm__ ("cpuid"
812 : "=a" (uOperator),
813 "=c" (xCX)
814 : "0" (uOperator)
815 : "ebx", "edx");
816
817# endif
818
819# elif RT_INLINE_ASM_USES_INTRIN
820 int aInfo[4];
821 __cpuid(aInfo, uOperator);
822 xCX = aInfo[2];
823
824# else
825 __asm
826 {
827 push ebx
828 mov eax, [uOperator]
829 cpuid
830 mov [xCX], ecx
831 pop ebx
832 }
833# endif
834 return (uint32_t)xCX;
835}
836#endif
837
838
839/**
840 * Checks if the current CPU supports CPUID.
841 *
842 * @returns true if CPUID is supported.
843 */
844DECLINLINE(bool) ASMHasCpuId(void)
845{
846#ifdef RT_ARCH_AMD64
847 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
848#else /* !RT_ARCH_AMD64 */
849 bool fRet = false;
850# if RT_INLINE_ASM_GNU_STYLE
851 uint32_t u1;
852 uint32_t u2;
853 __asm__ ("pushf\n\t"
854 "pop %1\n\t"
855 "mov %1, %2\n\t"
856 "xorl $0x200000, %1\n\t"
857 "push %1\n\t"
858 "popf\n\t"
859 "pushf\n\t"
860 "pop %1\n\t"
861 "cmpl %1, %2\n\t"
862 "setne %0\n\t"
863 "push %2\n\t"
864 "popf\n\t"
865 : "=m" (fRet), "=r" (u1), "=r" (u2));
866# else
867 __asm
868 {
869 pushfd
870 pop eax
871 mov ebx, eax
872 xor eax, 0200000h
873 push eax
874 popfd
875 pushfd
876 pop eax
877 cmp eax, ebx
878 setne fRet
879 push ebx
880 popfd
881 }
882# endif
883 return fRet;
884#endif /* !RT_ARCH_AMD64 */
885}
886
887
888/**
889 * Gets the APIC ID of the current CPU.
890 *
891 * @returns the APIC ID.
892 */
893#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
894DECLASM(uint8_t) ASMGetApicId(void);
895#else
896DECLINLINE(uint8_t) ASMGetApicId(void)
897{
898 RTCCUINTREG xBX;
899# if RT_INLINE_ASM_GNU_STYLE
900# ifdef RT_ARCH_AMD64
901 RTCCUINTREG uSpill;
902 __asm__ ("cpuid"
903 : "=a" (uSpill),
904 "=b" (xBX)
905 : "0" (1)
906 : "rcx", "rdx");
907# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__)
908 RTCCUINTREG uSpill;
909 __asm__ ("mov %%ebx,%1\n\t"
910 "cpuid\n\t"
911 "xchgl %%ebx,%1\n\t"
912 : "=a" (uSpill),
913 "=r" (xBX)
914 : "0" (1)
915 : "ecx", "edx");
916# else
917 RTCCUINTREG uSpill;
918 __asm__ ("cpuid"
919 : "=a" (uSpill),
920 "=b" (xBX)
921 : "0" (1)
922 : "ecx", "edx");
923# endif
924
925# elif RT_INLINE_ASM_USES_INTRIN
926 int aInfo[4];
927 __cpuid(aInfo, 1);
928 xBX = aInfo[1];
929
930# else
931 __asm
932 {
933 push ebx
934 mov eax, 1
935 cpuid
936 mov [xBX], ebx
937 pop ebx
938 }
939# endif
940 return (uint8_t)(xBX >> 24);
941}
942#endif
943
944
945/**
946 * Tests if it an genuin Intel CPU based on the ASMCpuId(0) output.
947 *
948 * @returns true/false.
949 * @param uEBX EBX return from ASMCpuId(0)
950 * @param uECX ECX return from ASMCpuId(0)
951 * @param uEDX EDX return from ASMCpuId(0)
952 */
953DECLINLINE(bool) ASMIsIntelCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
954{
955 return uEBX == 0x756e6547
956 || uECX == 0x6c65746e
957 || uEDX == 0x49656e69;
958}
959
960
961/**
962 * Tests if this is an genuin Intel CPU.
963 *
964 * @returns true/false.
965 */
966DECLINLINE(bool) ASMIsIntelCpu(void)
967{
968 uint32_t uEAX, uEBX, uECX, uEDX;
969 ASMCpuId(1, &uEAX, &uEBX, &uECX, &uEDX);
970 return ASMIsIntelCpuEx(uEBX, uECX, uEDX);
971}
972
973
974/**
975 * Extracts the CPU family from ASMCpuId(1) or ASMCpuId(0x80000001)
976 *
977 * @returns Family.
978 * @param uEAX EAX return from ASMCpuId(1) or ASMCpuId(0x80000001).
979 */
980DECLINLINE(uint32_t) ASMGetCpuFamily(uint32_t uEAX)
981{
982 return ((uEAX >> 8) & 0xf) == 0xf
983 ? ((uEAX >> 20) & 0x7f) + 0xf
984 : ((uEAX >> 8) & 0xf);
985}
986
987
988/**
989 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), Intel variant.
990 *
991 * @returns Model.
992 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
993 * @param fIntel Whether it's an intel CPU.
994 */
995DECLINLINE(uint32_t) ASMGetCpuModelIntel(uint32_t uEAX)
996{
997 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6) /* family! */
998 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
999 : ((uEAX >> 4) & 0xf);
1000}
1001
1002
1003/**
1004 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), AMD variant.
1005 *
1006 * @returns Model.
1007 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1008 * @param fIntel Whether it's an intel CPU.
1009 */
1010DECLINLINE(uint32_t) ASMGetCpuModelAMD(uint32_t uEAX)
1011{
1012 return ((uEAX >> 8) & 0xf) == 0xf
1013 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1014 : ((uEAX >> 4) & 0xf);
1015}
1016
1017
1018/**
1019 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001)
1020 *
1021 * @returns Model.
1022 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1023 * @param fIntel Whether it's an intel CPU. Use ASMIsIntelCpuEx() or ASMIsIntelCpu().
1024 */
1025DECLINLINE(uint32_t) ASMGetCpuModel(uint32_t uEAX, bool fIntel)
1026{
1027 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6 && fIntel) /* family! */
1028 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1029 : ((uEAX >> 4) & 0xf);
1030}
1031
1032
1033/**
1034 * Extracts the CPU stepping from ASMCpuId(1) or ASMCpuId(0x80000001)
1035 *
1036 * @returns Model.
1037 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1038 */
1039DECLINLINE(uint32_t) ASMGetCpuStepping(uint32_t uEAX)
1040{
1041 return uEAX & 0xf;
1042}
1043
1044
1045/**
1046 * Get cr0.
1047 * @returns cr0.
1048 */
1049#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1050DECLASM(RTCCUINTREG) ASMGetCR0(void);
1051#else
1052DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
1053{
1054 RTCCUINTREG uCR0;
1055# if RT_INLINE_ASM_USES_INTRIN
1056 uCR0 = __readcr0();
1057
1058# elif RT_INLINE_ASM_GNU_STYLE
1059# ifdef RT_ARCH_AMD64
1060 __asm__ __volatile__("movq %%cr0, %0\t\n" : "=r" (uCR0));
1061# else
1062 __asm__ __volatile__("movl %%cr0, %0\t\n" : "=r" (uCR0));
1063# endif
1064# else
1065 __asm
1066 {
1067# ifdef RT_ARCH_AMD64
1068 mov rax, cr0
1069 mov [uCR0], rax
1070# else
1071 mov eax, cr0
1072 mov [uCR0], eax
1073# endif
1074 }
1075# endif
1076 return uCR0;
1077}
1078#endif
1079
1080
1081/**
1082 * Sets the CR0 register.
1083 * @param uCR0 The new CR0 value.
1084 */
1085#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1086DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
1087#else
1088DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
1089{
1090# if RT_INLINE_ASM_USES_INTRIN
1091 __writecr0(uCR0);
1092
1093# elif RT_INLINE_ASM_GNU_STYLE
1094# ifdef RT_ARCH_AMD64
1095 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
1096# else
1097 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
1098# endif
1099# else
1100 __asm
1101 {
1102# ifdef RT_ARCH_AMD64
1103 mov rax, [uCR0]
1104 mov cr0, rax
1105# else
1106 mov eax, [uCR0]
1107 mov cr0, eax
1108# endif
1109 }
1110# endif
1111}
1112#endif
1113
1114
1115/**
1116 * Get cr2.
1117 * @returns cr2.
1118 */
1119#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1120DECLASM(RTCCUINTREG) ASMGetCR2(void);
1121#else
1122DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
1123{
1124 RTCCUINTREG uCR2;
1125# if RT_INLINE_ASM_USES_INTRIN
1126 uCR2 = __readcr2();
1127
1128# elif RT_INLINE_ASM_GNU_STYLE
1129# ifdef RT_ARCH_AMD64
1130 __asm__ __volatile__("movq %%cr2, %0\t\n" : "=r" (uCR2));
1131# else
1132 __asm__ __volatile__("movl %%cr2, %0\t\n" : "=r" (uCR2));
1133# endif
1134# else
1135 __asm
1136 {
1137# ifdef RT_ARCH_AMD64
1138 mov rax, cr2
1139 mov [uCR2], rax
1140# else
1141 mov eax, cr2
1142 mov [uCR2], eax
1143# endif
1144 }
1145# endif
1146 return uCR2;
1147}
1148#endif
1149
1150
1151/**
1152 * Sets the CR2 register.
1153 * @param uCR2 The new CR0 value.
1154 */
1155#if RT_INLINE_ASM_EXTERNAL
1156DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1157#else
1158DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1159{
1160# if RT_INLINE_ASM_GNU_STYLE
1161# ifdef RT_ARCH_AMD64
1162 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1163# else
1164 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1165# endif
1166# else
1167 __asm
1168 {
1169# ifdef RT_ARCH_AMD64
1170 mov rax, [uCR2]
1171 mov cr2, rax
1172# else
1173 mov eax, [uCR2]
1174 mov cr2, eax
1175# endif
1176 }
1177# endif
1178}
1179#endif
1180
1181
1182/**
1183 * Get cr3.
1184 * @returns cr3.
1185 */
1186#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1187DECLASM(RTCCUINTREG) ASMGetCR3(void);
1188#else
1189DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1190{
1191 RTCCUINTREG uCR3;
1192# if RT_INLINE_ASM_USES_INTRIN
1193 uCR3 = __readcr3();
1194
1195# elif RT_INLINE_ASM_GNU_STYLE
1196# ifdef RT_ARCH_AMD64
1197 __asm__ __volatile__("movq %%cr3, %0\t\n" : "=r" (uCR3));
1198# else
1199 __asm__ __volatile__("movl %%cr3, %0\t\n" : "=r" (uCR3));
1200# endif
1201# else
1202 __asm
1203 {
1204# ifdef RT_ARCH_AMD64
1205 mov rax, cr3
1206 mov [uCR3], rax
1207# else
1208 mov eax, cr3
1209 mov [uCR3], eax
1210# endif
1211 }
1212# endif
1213 return uCR3;
1214}
1215#endif
1216
1217
1218/**
1219 * Sets the CR3 register.
1220 *
1221 * @param uCR3 New CR3 value.
1222 */
1223#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1224DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1225#else
1226DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1227{
1228# if RT_INLINE_ASM_USES_INTRIN
1229 __writecr3(uCR3);
1230
1231# elif RT_INLINE_ASM_GNU_STYLE
1232# ifdef RT_ARCH_AMD64
1233 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1234# else
1235 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1236# endif
1237# else
1238 __asm
1239 {
1240# ifdef RT_ARCH_AMD64
1241 mov rax, [uCR3]
1242 mov cr3, rax
1243# else
1244 mov eax, [uCR3]
1245 mov cr3, eax
1246# endif
1247 }
1248# endif
1249}
1250#endif
1251
1252
1253/**
1254 * Reloads the CR3 register.
1255 */
1256#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1257DECLASM(void) ASMReloadCR3(void);
1258#else
1259DECLINLINE(void) ASMReloadCR3(void)
1260{
1261# if RT_INLINE_ASM_USES_INTRIN
1262 __writecr3(__readcr3());
1263
1264# elif RT_INLINE_ASM_GNU_STYLE
1265 RTCCUINTREG u;
1266# ifdef RT_ARCH_AMD64
1267 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1268 "movq %0, %%cr3\n\t"
1269 : "=r" (u));
1270# else
1271 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1272 "movl %0, %%cr3\n\t"
1273 : "=r" (u));
1274# endif
1275# else
1276 __asm
1277 {
1278# ifdef RT_ARCH_AMD64
1279 mov rax, cr3
1280 mov cr3, rax
1281# else
1282 mov eax, cr3
1283 mov cr3, eax
1284# endif
1285 }
1286# endif
1287}
1288#endif
1289
1290
1291/**
1292 * Get cr4.
1293 * @returns cr4.
1294 */
1295#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1296DECLASM(RTCCUINTREG) ASMGetCR4(void);
1297#else
1298DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1299{
1300 RTCCUINTREG uCR4;
1301# if RT_INLINE_ASM_USES_INTRIN
1302 uCR4 = __readcr4();
1303
1304# elif RT_INLINE_ASM_GNU_STYLE
1305# ifdef RT_ARCH_AMD64
1306 __asm__ __volatile__("movq %%cr4, %0\t\n" : "=r" (uCR4));
1307# else
1308 __asm__ __volatile__("movl %%cr4, %0\t\n" : "=r" (uCR4));
1309# endif
1310# else
1311 __asm
1312 {
1313# ifdef RT_ARCH_AMD64
1314 mov rax, cr4
1315 mov [uCR4], rax
1316# else
1317 push eax /* just in case */
1318 /*mov eax, cr4*/
1319 _emit 0x0f
1320 _emit 0x20
1321 _emit 0xe0
1322 mov [uCR4], eax
1323 pop eax
1324# endif
1325 }
1326# endif
1327 return uCR4;
1328}
1329#endif
1330
1331
1332/**
1333 * Sets the CR4 register.
1334 *
1335 * @param uCR4 New CR4 value.
1336 */
1337#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1338DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1339#else
1340DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1341{
1342# if RT_INLINE_ASM_USES_INTRIN
1343 __writecr4(uCR4);
1344
1345# elif RT_INLINE_ASM_GNU_STYLE
1346# ifdef RT_ARCH_AMD64
1347 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1348# else
1349 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1350# endif
1351# else
1352 __asm
1353 {
1354# ifdef RT_ARCH_AMD64
1355 mov rax, [uCR4]
1356 mov cr4, rax
1357# else
1358 mov eax, [uCR4]
1359 _emit 0x0F
1360 _emit 0x22
1361 _emit 0xE0 /* mov cr4, eax */
1362# endif
1363 }
1364# endif
1365}
1366#endif
1367
1368
1369/**
1370 * Get cr8.
1371 * @returns cr8.
1372 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1373 */
1374#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1375DECLASM(RTCCUINTREG) ASMGetCR8(void);
1376#else
1377DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1378{
1379# ifdef RT_ARCH_AMD64
1380 RTCCUINTREG uCR8;
1381# if RT_INLINE_ASM_USES_INTRIN
1382 uCR8 = __readcr8();
1383
1384# elif RT_INLINE_ASM_GNU_STYLE
1385 __asm__ __volatile__("movq %%cr8, %0\t\n" : "=r" (uCR8));
1386# else
1387 __asm
1388 {
1389 mov rax, cr8
1390 mov [uCR8], rax
1391 }
1392# endif
1393 return uCR8;
1394# else /* !RT_ARCH_AMD64 */
1395 return 0;
1396# endif /* !RT_ARCH_AMD64 */
1397}
1398#endif
1399
1400
1401/**
1402 * Enables interrupts (EFLAGS.IF).
1403 */
1404#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1405DECLASM(void) ASMIntEnable(void);
1406#else
1407DECLINLINE(void) ASMIntEnable(void)
1408{
1409# if RT_INLINE_ASM_GNU_STYLE
1410 __asm("sti\n");
1411# elif RT_INLINE_ASM_USES_INTRIN
1412 _enable();
1413# else
1414 __asm sti
1415# endif
1416}
1417#endif
1418
1419
1420/**
1421 * Disables interrupts (!EFLAGS.IF).
1422 */
1423#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1424DECLASM(void) ASMIntDisable(void);
1425#else
1426DECLINLINE(void) ASMIntDisable(void)
1427{
1428# if RT_INLINE_ASM_GNU_STYLE
1429 __asm("cli\n");
1430# elif RT_INLINE_ASM_USES_INTRIN
1431 _disable();
1432# else
1433 __asm cli
1434# endif
1435}
1436#endif
1437
1438
1439/**
1440 * Disables interrupts and returns previous xFLAGS.
1441 */
1442#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1443DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1444#else
1445DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1446{
1447 RTCCUINTREG xFlags;
1448# if RT_INLINE_ASM_GNU_STYLE
1449# ifdef RT_ARCH_AMD64
1450 __asm__ __volatile__("pushfq\n\t"
1451 "cli\n\t"
1452 "popq %0\n\t"
1453 : "=rm" (xFlags));
1454# else
1455 __asm__ __volatile__("pushfl\n\t"
1456 "cli\n\t"
1457 "popl %0\n\t"
1458 : "=rm" (xFlags));
1459# endif
1460# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1461 xFlags = ASMGetFlags();
1462 _disable();
1463# else
1464 __asm {
1465 pushfd
1466 cli
1467 pop [xFlags]
1468 }
1469# endif
1470 return xFlags;
1471}
1472#endif
1473
1474
1475/**
1476 * Reads a machine specific register.
1477 *
1478 * @returns Register content.
1479 * @param uRegister Register to read.
1480 */
1481#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1482DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1483#else
1484DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1485{
1486 RTUINT64U u;
1487# if RT_INLINE_ASM_GNU_STYLE
1488 __asm__ __volatile__("rdmsr\n\t"
1489 : "=a" (u.s.Lo),
1490 "=d" (u.s.Hi)
1491 : "c" (uRegister));
1492
1493# elif RT_INLINE_ASM_USES_INTRIN
1494 u.u = __readmsr(uRegister);
1495
1496# else
1497 __asm
1498 {
1499 mov ecx, [uRegister]
1500 rdmsr
1501 mov [u.s.Lo], eax
1502 mov [u.s.Hi], edx
1503 }
1504# endif
1505
1506 return u.u;
1507}
1508#endif
1509
1510
1511/**
1512 * Writes a machine specific register.
1513 *
1514 * @returns Register content.
1515 * @param uRegister Register to write to.
1516 * @param u64Val Value to write.
1517 */
1518#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1519DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1520#else
1521DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1522{
1523 RTUINT64U u;
1524
1525 u.u = u64Val;
1526# if RT_INLINE_ASM_GNU_STYLE
1527 __asm__ __volatile__("wrmsr\n\t"
1528 ::"a" (u.s.Lo),
1529 "d" (u.s.Hi),
1530 "c" (uRegister));
1531
1532# elif RT_INLINE_ASM_USES_INTRIN
1533 __writemsr(uRegister, u.u);
1534
1535# else
1536 __asm
1537 {
1538 mov ecx, [uRegister]
1539 mov edx, [u.s.Hi]
1540 mov eax, [u.s.Lo]
1541 wrmsr
1542 }
1543# endif
1544}
1545#endif
1546
1547
1548/**
1549 * Reads low part of a machine specific register.
1550 *
1551 * @returns Register content.
1552 * @param uRegister Register to read.
1553 */
1554#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1555DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1556#else
1557DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1558{
1559 uint32_t u32;
1560# if RT_INLINE_ASM_GNU_STYLE
1561 __asm__ __volatile__("rdmsr\n\t"
1562 : "=a" (u32)
1563 : "c" (uRegister)
1564 : "edx");
1565
1566# elif RT_INLINE_ASM_USES_INTRIN
1567 u32 = (uint32_t)__readmsr(uRegister);
1568
1569#else
1570 __asm
1571 {
1572 mov ecx, [uRegister]
1573 rdmsr
1574 mov [u32], eax
1575 }
1576# endif
1577
1578 return u32;
1579}
1580#endif
1581
1582
1583/**
1584 * Reads high part of a machine specific register.
1585 *
1586 * @returns Register content.
1587 * @param uRegister Register to read.
1588 */
1589#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1590DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1591#else
1592DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1593{
1594 uint32_t u32;
1595# if RT_INLINE_ASM_GNU_STYLE
1596 __asm__ __volatile__("rdmsr\n\t"
1597 : "=d" (u32)
1598 : "c" (uRegister)
1599 : "eax");
1600
1601# elif RT_INLINE_ASM_USES_INTRIN
1602 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1603
1604# else
1605 __asm
1606 {
1607 mov ecx, [uRegister]
1608 rdmsr
1609 mov [u32], edx
1610 }
1611# endif
1612
1613 return u32;
1614}
1615#endif
1616
1617
1618/**
1619 * Gets dr7.
1620 *
1621 * @returns dr7.
1622 */
1623#if RT_INLINE_ASM_EXTERNAL
1624DECLASM(RTCCUINTREG) ASMGetDR7(void);
1625#else
1626DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1627{
1628 RTCCUINTREG uDR7;
1629# if RT_INLINE_ASM_GNU_STYLE
1630# ifdef RT_ARCH_AMD64
1631 __asm__ __volatile__("movq %%dr7, %0\n\t" : "=r" (uDR7));
1632# else
1633 __asm__ __volatile__("movl %%dr7, %0\n\t" : "=r" (uDR7));
1634# endif
1635# else
1636 __asm
1637 {
1638# ifdef RT_ARCH_AMD64
1639 mov rax, dr7
1640 mov [uDR7], rax
1641# else
1642 mov eax, dr7
1643 mov [uDR7], eax
1644# endif
1645 }
1646# endif
1647 return uDR7;
1648}
1649#endif
1650
1651
1652/**
1653 * Gets dr6.
1654 *
1655 * @returns dr6.
1656 */
1657#if RT_INLINE_ASM_EXTERNAL
1658DECLASM(RTCCUINTREG) ASMGetDR6(void);
1659#else
1660DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1661{
1662 RTCCUINTREG uDR6;
1663# if RT_INLINE_ASM_GNU_STYLE
1664# ifdef RT_ARCH_AMD64
1665 __asm__ __volatile__("movq %%dr6, %0\n\t" : "=r" (uDR6));
1666# else
1667 __asm__ __volatile__("movl %%dr6, %0\n\t" : "=r" (uDR6));
1668# endif
1669# else
1670 __asm
1671 {
1672# ifdef RT_ARCH_AMD64
1673 mov rax, dr6
1674 mov [uDR6], rax
1675# else
1676 mov eax, dr6
1677 mov [uDR6], eax
1678# endif
1679 }
1680# endif
1681 return uDR6;
1682}
1683#endif
1684
1685
1686/**
1687 * Reads and clears DR6.
1688 *
1689 * @returns DR6.
1690 */
1691#if RT_INLINE_ASM_EXTERNAL
1692DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1693#else
1694DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1695{
1696 RTCCUINTREG uDR6;
1697# if RT_INLINE_ASM_GNU_STYLE
1698 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1699# ifdef RT_ARCH_AMD64
1700 __asm__ __volatile__("movq %%dr6, %0\n\t"
1701 "movq %1, %%dr6\n\t"
1702 : "=r" (uDR6)
1703 : "r" (uNewValue));
1704# else
1705 __asm__ __volatile__("movl %%dr6, %0\n\t"
1706 "movl %1, %%dr6\n\t"
1707 : "=r" (uDR6)
1708 : "r" (uNewValue));
1709# endif
1710# else
1711 __asm
1712 {
1713# ifdef RT_ARCH_AMD64
1714 mov rax, dr6
1715 mov [uDR6], rax
1716 mov rcx, rax
1717 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1718 mov dr6, rcx
1719# else
1720 mov eax, dr6
1721 mov [uDR6], eax
1722 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1723 mov dr6, ecx
1724# endif
1725 }
1726# endif
1727 return uDR6;
1728}
1729#endif
1730
1731
1732/**
1733 * Compiler memory barrier.
1734 *
1735 * Ensure that the compiler does not use any cached (register/tmp stack) memory
1736 * values or any outstanding writes when returning from this function.
1737 *
1738 * This function must be used if non-volatile data is modified by a
1739 * device or the VMM. Typical cases are port access, MMIO access,
1740 * trapping instruction, etc.
1741 */
1742#if RT_INLINE_ASM_GNU_STYLE
1743# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
1744#elif RT_INLINE_ASM_USES_INTRIN
1745# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
1746#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
1747DECLINLINE(void) ASMCompilerBarrier(void)
1748{
1749 __asm
1750 {
1751 }
1752}
1753#endif
1754
1755
1756/**
1757 * Writes a 8-bit unsigned integer to an I/O port, ordered.
1758 *
1759 * @param Port I/O port to read from.
1760 * @param u8 8-bit integer to write.
1761 */
1762#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1763DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1764#else
1765DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1766{
1767# if RT_INLINE_ASM_GNU_STYLE
1768 __asm__ __volatile__("outb %b1, %w0\n\t"
1769 :: "Nd" (Port),
1770 "a" (u8));
1771
1772# elif RT_INLINE_ASM_USES_INTRIN
1773 __outbyte(Port, u8);
1774
1775# else
1776 __asm
1777 {
1778 mov dx, [Port]
1779 mov al, [u8]
1780 out dx, al
1781 }
1782# endif
1783}
1784#endif
1785
1786
1787/**
1788 * Gets a 8-bit unsigned integer from an I/O port, ordered.
1789 *
1790 * @returns 8-bit integer.
1791 * @param Port I/O port to read from.
1792 */
1793#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1794DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1795#else
1796DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1797{
1798 uint8_t u8;
1799# if RT_INLINE_ASM_GNU_STYLE
1800 __asm__ __volatile__("inb %w1, %b0\n\t"
1801 : "=a" (u8)
1802 : "Nd" (Port));
1803
1804# elif RT_INLINE_ASM_USES_INTRIN
1805 u8 = __inbyte(Port);
1806
1807# else
1808 __asm
1809 {
1810 mov dx, [Port]
1811 in al, dx
1812 mov [u8], al
1813 }
1814# endif
1815 return u8;
1816}
1817#endif
1818
1819
1820/**
1821 * Writes a 16-bit unsigned integer to an I/O port, ordered.
1822 *
1823 * @param Port I/O port to read from.
1824 * @param u16 16-bit integer to write.
1825 */
1826#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1827DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1828#else
1829DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1830{
1831# if RT_INLINE_ASM_GNU_STYLE
1832 __asm__ __volatile__("outw %w1, %w0\n\t"
1833 :: "Nd" (Port),
1834 "a" (u16));
1835
1836# elif RT_INLINE_ASM_USES_INTRIN
1837 __outword(Port, u16);
1838
1839# else
1840 __asm
1841 {
1842 mov dx, [Port]
1843 mov ax, [u16]
1844 out dx, ax
1845 }
1846# endif
1847}
1848#endif
1849
1850
1851/**
1852 * Gets a 16-bit unsigned integer from an I/O port, ordered.
1853 *
1854 * @returns 16-bit integer.
1855 * @param Port I/O port to read from.
1856 */
1857#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1858DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1859#else
1860DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1861{
1862 uint16_t u16;
1863# if RT_INLINE_ASM_GNU_STYLE
1864 __asm__ __volatile__("inw %w1, %w0\n\t"
1865 : "=a" (u16)
1866 : "Nd" (Port));
1867
1868# elif RT_INLINE_ASM_USES_INTRIN
1869 u16 = __inword(Port);
1870
1871# else
1872 __asm
1873 {
1874 mov dx, [Port]
1875 in ax, dx
1876 mov [u16], ax
1877 }
1878# endif
1879 return u16;
1880}
1881#endif
1882
1883
1884/**
1885 * Writes a 32-bit unsigned integer to an I/O port, ordered.
1886 *
1887 * @param Port I/O port to read from.
1888 * @param u32 32-bit integer to write.
1889 */
1890#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1891DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1892#else
1893DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1894{
1895# if RT_INLINE_ASM_GNU_STYLE
1896 __asm__ __volatile__("outl %1, %w0\n\t"
1897 :: "Nd" (Port),
1898 "a" (u32));
1899
1900# elif RT_INLINE_ASM_USES_INTRIN
1901 __outdword(Port, u32);
1902
1903# else
1904 __asm
1905 {
1906 mov dx, [Port]
1907 mov eax, [u32]
1908 out dx, eax
1909 }
1910# endif
1911}
1912#endif
1913
1914
1915/**
1916 * Gets a 32-bit unsigned integer from an I/O port, ordered.
1917 *
1918 * @returns 32-bit integer.
1919 * @param Port I/O port to read from.
1920 */
1921#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1922DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1923#else
1924DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1925{
1926 uint32_t u32;
1927# if RT_INLINE_ASM_GNU_STYLE
1928 __asm__ __volatile__("inl %w1, %0\n\t"
1929 : "=a" (u32)
1930 : "Nd" (Port));
1931
1932# elif RT_INLINE_ASM_USES_INTRIN
1933 u32 = __indword(Port);
1934
1935# else
1936 __asm
1937 {
1938 mov dx, [Port]
1939 in eax, dx
1940 mov [u32], eax
1941 }
1942# endif
1943 return u32;
1944}
1945#endif
1946
1947/** @todo string i/o */
1948
1949
1950/**
1951 * Atomically Exchange an unsigned 8-bit value, ordered.
1952 *
1953 * @returns Current *pu8 value
1954 * @param pu8 Pointer to the 8-bit variable to update.
1955 * @param u8 The 8-bit value to assign to *pu8.
1956 */
1957#if RT_INLINE_ASM_EXTERNAL
1958DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1959#else
1960DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1961{
1962# if RT_INLINE_ASM_GNU_STYLE
1963 __asm__ __volatile__("xchgb %0, %1\n\t"
1964 : "=m" (*pu8),
1965 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
1966 : "1" (u8));
1967# else
1968 __asm
1969 {
1970# ifdef RT_ARCH_AMD64
1971 mov rdx, [pu8]
1972 mov al, [u8]
1973 xchg [rdx], al
1974 mov [u8], al
1975# else
1976 mov edx, [pu8]
1977 mov al, [u8]
1978 xchg [edx], al
1979 mov [u8], al
1980# endif
1981 }
1982# endif
1983 return u8;
1984}
1985#endif
1986
1987
1988/**
1989 * Atomically Exchange a signed 8-bit value, ordered.
1990 *
1991 * @returns Current *pu8 value
1992 * @param pi8 Pointer to the 8-bit variable to update.
1993 * @param i8 The 8-bit value to assign to *pi8.
1994 */
1995DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1996{
1997 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1998}
1999
2000
2001/**
2002 * Atomically Exchange a bool value, ordered.
2003 *
2004 * @returns Current *pf value
2005 * @param pf Pointer to the 8-bit variable to update.
2006 * @param f The 8-bit value to assign to *pi8.
2007 */
2008DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
2009{
2010#ifdef _MSC_VER
2011 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2012#else
2013 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2014#endif
2015}
2016
2017
2018/**
2019 * Atomically Exchange an unsigned 16-bit value, ordered.
2020 *
2021 * @returns Current *pu16 value
2022 * @param pu16 Pointer to the 16-bit variable to update.
2023 * @param u16 The 16-bit value to assign to *pu16.
2024 */
2025#if RT_INLINE_ASM_EXTERNAL
2026DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
2027#else
2028DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
2029{
2030# if RT_INLINE_ASM_GNU_STYLE
2031 __asm__ __volatile__("xchgw %0, %1\n\t"
2032 : "=m" (*pu16),
2033 "=r" (u16)
2034 : "1" (u16));
2035# else
2036 __asm
2037 {
2038# ifdef RT_ARCH_AMD64
2039 mov rdx, [pu16]
2040 mov ax, [u16]
2041 xchg [rdx], ax
2042 mov [u16], ax
2043# else
2044 mov edx, [pu16]
2045 mov ax, [u16]
2046 xchg [edx], ax
2047 mov [u16], ax
2048# endif
2049 }
2050# endif
2051 return u16;
2052}
2053#endif
2054
2055
2056/**
2057 * Atomically Exchange a signed 16-bit value, ordered.
2058 *
2059 * @returns Current *pu16 value
2060 * @param pi16 Pointer to the 16-bit variable to update.
2061 * @param i16 The 16-bit value to assign to *pi16.
2062 */
2063DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
2064{
2065 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
2066}
2067
2068
2069/**
2070 * Atomically Exchange an unsigned 32-bit value, ordered.
2071 *
2072 * @returns Current *pu32 value
2073 * @param pu32 Pointer to the 32-bit variable to update.
2074 * @param u32 The 32-bit value to assign to *pu32.
2075 */
2076#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2077DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
2078#else
2079DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
2080{
2081# if RT_INLINE_ASM_GNU_STYLE
2082 __asm__ __volatile__("xchgl %0, %1\n\t"
2083 : "=m" (*pu32),
2084 "=r" (u32)
2085 : "1" (u32));
2086
2087# elif RT_INLINE_ASM_USES_INTRIN
2088 u32 = _InterlockedExchange((long *)pu32, u32);
2089
2090# else
2091 __asm
2092 {
2093# ifdef RT_ARCH_AMD64
2094 mov rdx, [pu32]
2095 mov eax, u32
2096 xchg [rdx], eax
2097 mov [u32], eax
2098# else
2099 mov edx, [pu32]
2100 mov eax, u32
2101 xchg [edx], eax
2102 mov [u32], eax
2103# endif
2104 }
2105# endif
2106 return u32;
2107}
2108#endif
2109
2110
2111/**
2112 * Atomically Exchange a signed 32-bit value, ordered.
2113 *
2114 * @returns Current *pu32 value
2115 * @param pi32 Pointer to the 32-bit variable to update.
2116 * @param i32 The 32-bit value to assign to *pi32.
2117 */
2118DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
2119{
2120 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
2121}
2122
2123
2124/**
2125 * Atomically Exchange an unsigned 64-bit value, ordered.
2126 *
2127 * @returns Current *pu64 value
2128 * @param pu64 Pointer to the 64-bit variable to update.
2129 * @param u64 The 64-bit value to assign to *pu64.
2130 */
2131#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2132DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2133#else
2134DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2135{
2136# if defined(RT_ARCH_AMD64)
2137# if RT_INLINE_ASM_USES_INTRIN
2138 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2139
2140# elif RT_INLINE_ASM_GNU_STYLE
2141 __asm__ __volatile__("xchgq %0, %1\n\t"
2142 : "=m" (*pu64),
2143 "=r" (u64)
2144 : "1" (u64));
2145# else
2146 __asm
2147 {
2148 mov rdx, [pu64]
2149 mov rax, [u64]
2150 xchg [rdx], rax
2151 mov [u64], rax
2152 }
2153# endif
2154# else /* !RT_ARCH_AMD64 */
2155# if RT_INLINE_ASM_GNU_STYLE
2156# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2157 uint32_t u32EBX = (uint32_t)u64;
2158 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2159 "xchgl %%ebx, %3\n\t"
2160 "1:\n\t"
2161 "lock; cmpxchg8b (%5)\n\t"
2162 "jnz 1b\n\t"
2163 "movl %3, %%ebx\n\t"
2164 /*"xchgl %%esi, %5\n\t"*/
2165 : "=A" (u64),
2166 "=m" (*pu64)
2167 : "0" (*pu64),
2168 "m" ( u32EBX ),
2169 "c" ( (uint32_t)(u64 >> 32) ),
2170 "S" (pu64) );
2171# else /* !PIC */
2172 __asm__ __volatile__("1:\n\t"
2173 "lock; cmpxchg8b %1\n\t"
2174 "jnz 1b\n\t"
2175 : "=A" (u64),
2176 "=m" (*pu64)
2177 : "0" (*pu64),
2178 "b" ( (uint32_t)u64 ),
2179 "c" ( (uint32_t)(u64 >> 32) ));
2180# endif
2181# else
2182 __asm
2183 {
2184 mov ebx, dword ptr [u64]
2185 mov ecx, dword ptr [u64 + 4]
2186 mov edi, pu64
2187 mov eax, dword ptr [edi]
2188 mov edx, dword ptr [edi + 4]
2189 retry:
2190 lock cmpxchg8b [edi]
2191 jnz retry
2192 mov dword ptr [u64], eax
2193 mov dword ptr [u64 + 4], edx
2194 }
2195# endif
2196# endif /* !RT_ARCH_AMD64 */
2197 return u64;
2198}
2199#endif
2200
2201
2202/**
2203 * Atomically Exchange an signed 64-bit value, ordered.
2204 *
2205 * @returns Current *pi64 value
2206 * @param pi64 Pointer to the 64-bit variable to update.
2207 * @param i64 The 64-bit value to assign to *pi64.
2208 */
2209DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2210{
2211 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2212}
2213
2214
2215#ifdef RT_ARCH_AMD64
2216/**
2217 * Atomically Exchange an unsigned 128-bit value, ordered.
2218 *
2219 * @returns Current *pu128.
2220 * @param pu128 Pointer to the 128-bit variable to update.
2221 * @param u128 The 128-bit value to assign to *pu128.
2222 *
2223 * @remark We cannot really assume that any hardware supports this. Nor do I have
2224 * GAS support for it. So, for the time being we'll BREAK the atomic
2225 * bit of this function and use two 64-bit exchanges instead.
2226 */
2227# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2228DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2229# else
2230DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2231{
2232 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2233 {
2234 /** @todo this is clumsy code */
2235 RTUINT128U u128Ret;
2236 u128Ret.u = u128;
2237 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2238 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2239 return u128Ret.u;
2240 }
2241#if 0 /* later? */
2242 else
2243 {
2244# if RT_INLINE_ASM_GNU_STYLE
2245 __asm__ __volatile__("1:\n\t"
2246 "lock; cmpxchg8b %1\n\t"
2247 "jnz 1b\n\t"
2248 : "=A" (u128),
2249 "=m" (*pu128)
2250 : "0" (*pu128),
2251 "b" ( (uint64_t)u128 ),
2252 "c" ( (uint64_t)(u128 >> 64) ));
2253# else
2254 __asm
2255 {
2256 mov rbx, dword ptr [u128]
2257 mov rcx, dword ptr [u128 + 8]
2258 mov rdi, pu128
2259 mov rax, dword ptr [rdi]
2260 mov rdx, dword ptr [rdi + 8]
2261 retry:
2262 lock cmpxchg16b [rdi]
2263 jnz retry
2264 mov dword ptr [u128], rax
2265 mov dword ptr [u128 + 8], rdx
2266 }
2267# endif
2268 }
2269 return u128;
2270#endif
2271}
2272# endif
2273#endif /* RT_ARCH_AMD64 */
2274
2275
2276/**
2277 * Atomically Exchange a value which size might differ
2278 * between platforms or compilers, ordered.
2279 *
2280 * @param pu Pointer to the variable to update.
2281 * @param uNew The value to assign to *pu.
2282 */
2283#define ASMAtomicXchgSize(pu, uNew) \
2284 do { \
2285 switch (sizeof(*(pu))) { \
2286 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2287 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2288 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2289 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2290 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2291 } \
2292 } while (0)
2293
2294
2295/**
2296 * Atomically Exchange a pointer value, ordered.
2297 *
2298 * @returns Current *ppv value
2299 * @param ppv Pointer to the pointer variable to update.
2300 * @param pv The pointer value to assign to *ppv.
2301 */
2302DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2303{
2304#if ARCH_BITS == 32
2305 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2306#elif ARCH_BITS == 64
2307 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2308#else
2309# error "ARCH_BITS is bogus"
2310#endif
2311}
2312
2313
2314/**
2315 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
2316 *
2317 * @returns true if xchg was done.
2318 * @returns false if xchg wasn't done.
2319 *
2320 * @param pu32 Pointer to the value to update.
2321 * @param u32New The new value to assigned to *pu32.
2322 * @param u32Old The old value to *pu32 compare with.
2323 */
2324#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2325DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2326#else
2327DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
2328{
2329# if RT_INLINE_ASM_GNU_STYLE
2330 uint8_t u8Ret;
2331 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2332 "setz %1\n\t"
2333 : "=m" (*pu32),
2334 "=qm" (u8Ret),
2335 "=a" (u32Old)
2336 : "r" (u32New),
2337 "2" (u32Old));
2338 return (bool)u8Ret;
2339
2340# elif RT_INLINE_ASM_USES_INTRIN
2341 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2342
2343# else
2344 uint32_t u32Ret;
2345 __asm
2346 {
2347# ifdef RT_ARCH_AMD64
2348 mov rdx, [pu32]
2349# else
2350 mov edx, [pu32]
2351# endif
2352 mov eax, [u32Old]
2353 mov ecx, [u32New]
2354# ifdef RT_ARCH_AMD64
2355 lock cmpxchg [rdx], ecx
2356# else
2357 lock cmpxchg [edx], ecx
2358# endif
2359 setz al
2360 movzx eax, al
2361 mov [u32Ret], eax
2362 }
2363 return !!u32Ret;
2364# endif
2365}
2366#endif
2367
2368
2369/**
2370 * Atomically Compare and Exchange a signed 32-bit value, ordered.
2371 *
2372 * @returns true if xchg was done.
2373 * @returns false if xchg wasn't done.
2374 *
2375 * @param pi32 Pointer to the value to update.
2376 * @param i32New The new value to assigned to *pi32.
2377 * @param i32Old The old value to *pi32 compare with.
2378 */
2379DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2380{
2381 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2382}
2383
2384
2385/**
2386 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
2387 *
2388 * @returns true if xchg was done.
2389 * @returns false if xchg wasn't done.
2390 *
2391 * @param pu64 Pointer to the 64-bit variable to update.
2392 * @param u64New The 64-bit value to assign to *pu64.
2393 * @param u64Old The value to compare with.
2394 */
2395#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2396DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2397#else
2398DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, uint64_t u64Old)
2399{
2400# if RT_INLINE_ASM_USES_INTRIN
2401 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2402
2403# elif defined(RT_ARCH_AMD64)
2404# if RT_INLINE_ASM_GNU_STYLE
2405 uint8_t u8Ret;
2406 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2407 "setz %1\n\t"
2408 : "=m" (*pu64),
2409 "=qm" (u8Ret),
2410 "=a" (u64Old)
2411 : "r" (u64New),
2412 "2" (u64Old));
2413 return (bool)u8Ret;
2414# else
2415 bool fRet;
2416 __asm
2417 {
2418 mov rdx, [pu32]
2419 mov rax, [u64Old]
2420 mov rcx, [u64New]
2421 lock cmpxchg [rdx], rcx
2422 setz al
2423 mov [fRet], al
2424 }
2425 return fRet;
2426# endif
2427# else /* !RT_ARCH_AMD64 */
2428 uint32_t u32Ret;
2429# if RT_INLINE_ASM_GNU_STYLE
2430# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2431 uint32_t u32EBX = (uint32_t)u64New;
2432 uint32_t u32Spill;
2433 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
2434 "lock; cmpxchg8b (%6)\n\t"
2435 "setz %%al\n\t"
2436 "movl %4, %%ebx\n\t"
2437 "movzbl %%al, %%eax\n\t"
2438 : "=a" (u32Ret),
2439 "=d" (u32Spill),
2440 "=m" (*pu64)
2441 : "A" (u64Old),
2442 "m" ( u32EBX ),
2443 "c" ( (uint32_t)(u64New >> 32) ),
2444 "S" (pu64) );
2445# else /* !PIC */
2446 uint32_t u32Spill;
2447 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
2448 "setz %%al\n\t"
2449 "movzbl %%al, %%eax\n\t"
2450 : "=a" (u32Ret),
2451 "=d" (u32Spill),
2452 "=m" (*pu64)
2453 : "A" (u64Old),
2454 "b" ( (uint32_t)u64New ),
2455 "c" ( (uint32_t)(u64New >> 32) ));
2456# endif
2457 return (bool)u32Ret;
2458# else
2459 __asm
2460 {
2461 mov ebx, dword ptr [u64New]
2462 mov ecx, dword ptr [u64New + 4]
2463 mov edi, [pu64]
2464 mov eax, dword ptr [u64Old]
2465 mov edx, dword ptr [u64Old + 4]
2466 lock cmpxchg8b [edi]
2467 setz al
2468 movzx eax, al
2469 mov dword ptr [u32Ret], eax
2470 }
2471 return !!u32Ret;
2472# endif
2473# endif /* !RT_ARCH_AMD64 */
2474}
2475#endif
2476
2477
2478/**
2479 * Atomically Compare and exchange a signed 64-bit value, ordered.
2480 *
2481 * @returns true if xchg was done.
2482 * @returns false if xchg wasn't done.
2483 *
2484 * @param pi64 Pointer to the 64-bit variable to update.
2485 * @param i64 The 64-bit value to assign to *pu64.
2486 * @param i64Old The value to compare with.
2487 */
2488DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2489{
2490 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2491}
2492
2493
2494/** @def ASMAtomicCmpXchgSize
2495 * Atomically Compare and Exchange a value which size might differ
2496 * between platforms or compilers, ordered.
2497 *
2498 * @param pu Pointer to the value to update.
2499 * @param uNew The new value to assigned to *pu.
2500 * @param uOld The old value to *pu compare with.
2501 * @param fRc Where to store the result.
2502 */
2503#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2504 do { \
2505 switch (sizeof(*(pu))) { \
2506 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2507 break; \
2508 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2509 break; \
2510 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2511 (fRc) = false; \
2512 break; \
2513 } \
2514 } while (0)
2515
2516
2517/**
2518 * Atomically Compare and Exchange a pointer value, ordered.
2519 *
2520 * @returns true if xchg was done.
2521 * @returns false if xchg wasn't done.
2522 *
2523 * @param ppv Pointer to the value to update.
2524 * @param pvNew The new value to assigned to *ppv.
2525 * @param pvOld The old value to *ppv compare with.
2526 */
2527DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2528{
2529#if ARCH_BITS == 32
2530 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2531#elif ARCH_BITS == 64
2532 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2533#else
2534# error "ARCH_BITS is bogus"
2535#endif
2536}
2537
2538
2539/**
2540 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
2541 * passes back old value, ordered.
2542 *
2543 * @returns true if xchg was done.
2544 * @returns false if xchg wasn't done.
2545 *
2546 * @param pu32 Pointer to the value to update.
2547 * @param u32New The new value to assigned to *pu32.
2548 * @param u32Old The old value to *pu32 compare with.
2549 * @param pu32Old Pointer store the old value at.
2550 */
2551#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2552DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
2553#else
2554DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
2555{
2556# if RT_INLINE_ASM_GNU_STYLE
2557 uint8_t u8Ret;
2558 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2559 "setz %1\n\t"
2560 : "=m" (*pu32),
2561 "=qm" (u8Ret),
2562 "=a" (*pu32Old)
2563 : "r" (u32New),
2564 "a" (u32Old));
2565 return (bool)u8Ret;
2566
2567# elif RT_INLINE_ASM_USES_INTRIN
2568 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
2569
2570# else
2571 uint32_t u32Ret;
2572 __asm
2573 {
2574# ifdef RT_ARCH_AMD64
2575 mov rdx, [pu32]
2576# else
2577 mov edx, [pu32]
2578# endif
2579 mov eax, [u32Old]
2580 mov ecx, [u32New]
2581# ifdef RT_ARCH_AMD64
2582 lock cmpxchg [rdx], ecx
2583 mov rdx, [pu32Old]
2584 mov [rdx], eax
2585# else
2586 lock cmpxchg [edx], ecx
2587 mov edx, [pu32Old]
2588 mov [edx], eax
2589# endif
2590 setz al
2591 movzx eax, al
2592 mov [u32Ret], eax
2593 }
2594 return !!u32Ret;
2595# endif
2596}
2597#endif
2598
2599
2600/**
2601 * Atomically Compare and Exchange a signed 32-bit value, additionally
2602 * passes back old value, ordered.
2603 *
2604 * @returns true if xchg was done.
2605 * @returns false if xchg wasn't done.
2606 *
2607 * @param pi32 Pointer to the value to update.
2608 * @param i32New The new value to assigned to *pi32.
2609 * @param i32Old The old value to *pi32 compare with.
2610 * @param pi32Old Pointer store the old value at.
2611 */
2612DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
2613{
2614 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
2615}
2616
2617
2618/**
2619 * Atomically Compare and exchange an unsigned 64-bit value, additionally
2620 * passing back old value, ordered.
2621 *
2622 * @returns true if xchg was done.
2623 * @returns false if xchg wasn't done.
2624 *
2625 * @param pu64 Pointer to the 64-bit variable to update.
2626 * @param u64New The 64-bit value to assign to *pu64.
2627 * @param u64Old The value to compare with.
2628 * @param pu64Old Pointer store the old value at.
2629 */
2630#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2631DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
2632#else
2633DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
2634{
2635# if RT_INLINE_ASM_USES_INTRIN
2636 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
2637
2638# elif defined(RT_ARCH_AMD64)
2639# if RT_INLINE_ASM_GNU_STYLE
2640 uint8_t u8Ret;
2641 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2642 "setz %1\n\t"
2643 : "=m" (*pu64),
2644 "=qm" (u8Ret),
2645 "=a" (*pu64Old)
2646 : "r" (u64New),
2647 "a" (u64Old));
2648 return (bool)u8Ret;
2649# else
2650 bool fRet;
2651 __asm
2652 {
2653 mov rdx, [pu32]
2654 mov rax, [u64Old]
2655 mov rcx, [u64New]
2656 lock cmpxchg [rdx], rcx
2657 mov rdx, [pu64Old]
2658 mov [rdx], rax
2659 setz al
2660 mov [fRet], al
2661 }
2662 return fRet;
2663# endif
2664# else /* !RT_ARCH_AMD64 */
2665# if RT_INLINE_ASM_GNU_STYLE
2666 uint64_t u64Ret;
2667# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2668 /* NB: this code uses a memory clobber description, because the clean
2669 * solution with an output value for *pu64 makes gcc run out of registers.
2670 * This will cause suboptimal code, and anyone with a better solution is
2671 * welcome to improve this. */
2672 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
2673 "lock; cmpxchg8b %3\n\t"
2674 "xchgl %%ebx, %1\n\t"
2675 : "=A" (u64Ret)
2676 : "DS" ((uint32_t)u64New),
2677 "c" ((uint32_t)(u64New >> 32)),
2678 "m" (*pu64),
2679 "0" (u64Old)
2680 : "memory" );
2681# else /* !PIC */
2682 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
2683 : "=A" (u64Ret),
2684 "=m" (*pu64)
2685 : "b" ((uint32_t)u64New),
2686 "c" ((uint32_t)(u64New >> 32)),
2687 "m" (*pu64),
2688 "0" (u64Old));
2689# endif
2690 *pu64Old = u64Ret;
2691 return u64Ret == u64Old;
2692# else
2693 uint32_t u32Ret;
2694 __asm
2695 {
2696 mov ebx, dword ptr [u64New]
2697 mov ecx, dword ptr [u64New + 4]
2698 mov edi, [pu64]
2699 mov eax, dword ptr [u64Old]
2700 mov edx, dword ptr [u64Old + 4]
2701 lock cmpxchg8b [edi]
2702 mov ebx, [pu64Old]
2703 mov [ebx], eax
2704 setz al
2705 movzx eax, al
2706 add ebx, 4
2707 mov [ebx], edx
2708 mov dword ptr [u32Ret], eax
2709 }
2710 return !!u32Ret;
2711# endif
2712# endif /* !RT_ARCH_AMD64 */
2713}
2714#endif
2715
2716
2717/**
2718 * Atomically Compare and exchange a signed 64-bit value, additionally
2719 * passing back old value, ordered.
2720 *
2721 * @returns true if xchg was done.
2722 * @returns false if xchg wasn't done.
2723 *
2724 * @param pi64 Pointer to the 64-bit variable to update.
2725 * @param i64 The 64-bit value to assign to *pu64.
2726 * @param i64Old The value to compare with.
2727 * @param pi64Old Pointer store the old value at.
2728 */
2729DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
2730{
2731 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
2732}
2733
2734
2735/** @def ASMAtomicCmpXchgExSize
2736 * Atomically Compare and Exchange a value which size might differ
2737 * between platforms or compilers. Additionally passes back old value.
2738 *
2739 * @param pu Pointer to the value to update.
2740 * @param uNew The new value to assigned to *pu.
2741 * @param uOld The old value to *pu compare with.
2742 * @param fRc Where to store the result.
2743 * @param uOldVal Where to store the old value.
2744 */
2745#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, uOldVal) \
2746 do { \
2747 switch (sizeof(*(pu))) { \
2748 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)&(uOldVal)); \
2749 break; \
2750 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)&(uOldVal)); \
2751 break; \
2752 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2753 (fRc) = false; \
2754 (uOldVal) = 0; \
2755 break; \
2756 } \
2757 } while (0)
2758
2759
2760/**
2761 * Atomically Compare and Exchange a pointer value, additionally
2762 * passing back old value, ordered.
2763 *
2764 * @returns true if xchg was done.
2765 * @returns false if xchg wasn't done.
2766 *
2767 * @param ppv Pointer to the value to update.
2768 * @param pvNew The new value to assigned to *ppv.
2769 * @param pvOld The old value to *ppv compare with.
2770 * @param ppvOld Pointer store the old value at.
2771 */
2772DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, void *pvNew, void *pvOld, void **ppvOld)
2773{
2774#if ARCH_BITS == 32
2775 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
2776#elif ARCH_BITS == 64
2777 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
2778#else
2779# error "ARCH_BITS is bogus"
2780#endif
2781}
2782
2783
2784/**
2785 * Atomically exchanges and adds to a 32-bit value, ordered.
2786 *
2787 * @returns The old value.
2788 * @param pu32 Pointer to the value.
2789 * @param u32 Number to add.
2790 */
2791#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2792DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
2793#else
2794DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
2795{
2796# if RT_INLINE_ASM_USES_INTRIN
2797 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
2798 return u32;
2799
2800# elif RT_INLINE_ASM_GNU_STYLE
2801 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2802 : "=r" (u32),
2803 "=m" (*pu32)
2804 : "0" (u32)
2805 : "memory");
2806 return u32;
2807# else
2808 __asm
2809 {
2810 mov eax, [u32]
2811# ifdef RT_ARCH_AMD64
2812 mov rdx, [pu32]
2813 lock xadd [rdx], eax
2814# else
2815 mov edx, [pu32]
2816 lock xadd [edx], eax
2817# endif
2818 mov [u32], eax
2819 }
2820 return u32;
2821# endif
2822}
2823#endif
2824
2825
2826/**
2827 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2828 *
2829 * @returns The old value.
2830 * @param pi32 Pointer to the value.
2831 * @param i32 Number to add.
2832 */
2833DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
2834{
2835 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
2836}
2837
2838
2839/**
2840 * Atomically increment a 32-bit value, ordered.
2841 *
2842 * @returns The new value.
2843 * @param pu32 Pointer to the value to increment.
2844 */
2845#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2846DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2847#else
2848DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2849{
2850 uint32_t u32;
2851# if RT_INLINE_ASM_USES_INTRIN
2852 u32 = _InterlockedIncrement((long *)pu32);
2853 return u32;
2854
2855# elif RT_INLINE_ASM_GNU_STYLE
2856 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2857 : "=r" (u32),
2858 "=m" (*pu32)
2859 : "0" (1)
2860 : "memory");
2861 return u32+1;
2862# else
2863 __asm
2864 {
2865 mov eax, 1
2866# ifdef RT_ARCH_AMD64
2867 mov rdx, [pu32]
2868 lock xadd [rdx], eax
2869# else
2870 mov edx, [pu32]
2871 lock xadd [edx], eax
2872# endif
2873 mov u32, eax
2874 }
2875 return u32+1;
2876# endif
2877}
2878#endif
2879
2880
2881/**
2882 * Atomically increment a signed 32-bit value, ordered.
2883 *
2884 * @returns The new value.
2885 * @param pi32 Pointer to the value to increment.
2886 */
2887DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2888{
2889 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2890}
2891
2892
2893/**
2894 * Atomically decrement an unsigned 32-bit value, ordered.
2895 *
2896 * @returns The new value.
2897 * @param pu32 Pointer to the value to decrement.
2898 */
2899#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2900DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2901#else
2902DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2903{
2904 uint32_t u32;
2905# if RT_INLINE_ASM_USES_INTRIN
2906 u32 = _InterlockedDecrement((long *)pu32);
2907 return u32;
2908
2909# elif RT_INLINE_ASM_GNU_STYLE
2910 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2911 : "=r" (u32),
2912 "=m" (*pu32)
2913 : "0" (-1)
2914 : "memory");
2915 return u32-1;
2916# else
2917 __asm
2918 {
2919 mov eax, -1
2920# ifdef RT_ARCH_AMD64
2921 mov rdx, [pu32]
2922 lock xadd [rdx], eax
2923# else
2924 mov edx, [pu32]
2925 lock xadd [edx], eax
2926# endif
2927 mov u32, eax
2928 }
2929 return u32-1;
2930# endif
2931}
2932#endif
2933
2934
2935/**
2936 * Atomically decrement a signed 32-bit value, ordered.
2937 *
2938 * @returns The new value.
2939 * @param pi32 Pointer to the value to decrement.
2940 */
2941DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2942{
2943 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2944}
2945
2946
2947/**
2948 * Atomically Or an unsigned 32-bit value, ordered.
2949 *
2950 * @param pu32 Pointer to the pointer variable to OR u32 with.
2951 * @param u32 The value to OR *pu32 with.
2952 */
2953#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2954DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2955#else
2956DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2957{
2958# if RT_INLINE_ASM_USES_INTRIN
2959 _InterlockedOr((long volatile *)pu32, (long)u32);
2960
2961# elif RT_INLINE_ASM_GNU_STYLE
2962 __asm__ __volatile__("lock; orl %1, %0\n\t"
2963 : "=m" (*pu32)
2964 : "ir" (u32));
2965# else
2966 __asm
2967 {
2968 mov eax, [u32]
2969# ifdef RT_ARCH_AMD64
2970 mov rdx, [pu32]
2971 lock or [rdx], eax
2972# else
2973 mov edx, [pu32]
2974 lock or [edx], eax
2975# endif
2976 }
2977# endif
2978}
2979#endif
2980
2981
2982/**
2983 * Atomically Or a signed 32-bit value, ordered.
2984 *
2985 * @param pi32 Pointer to the pointer variable to OR u32 with.
2986 * @param i32 The value to OR *pu32 with.
2987 */
2988DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2989{
2990 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2991}
2992
2993
2994/**
2995 * Atomically And an unsigned 32-bit value, ordered.
2996 *
2997 * @param pu32 Pointer to the pointer variable to AND u32 with.
2998 * @param u32 The value to AND *pu32 with.
2999 */
3000#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3001DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3002#else
3003DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3004{
3005# if RT_INLINE_ASM_USES_INTRIN
3006 _InterlockedAnd((long volatile *)pu32, u32);
3007
3008# elif RT_INLINE_ASM_GNU_STYLE
3009 __asm__ __volatile__("lock; andl %1, %0\n\t"
3010 : "=m" (*pu32)
3011 : "ir" (u32));
3012# else
3013 __asm
3014 {
3015 mov eax, [u32]
3016# ifdef RT_ARCH_AMD64
3017 mov rdx, [pu32]
3018 lock and [rdx], eax
3019# else
3020 mov edx, [pu32]
3021 lock and [edx], eax
3022# endif
3023 }
3024# endif
3025}
3026#endif
3027
3028
3029/**
3030 * Atomically And a signed 32-bit value, ordered.
3031 *
3032 * @param pi32 Pointer to the pointer variable to AND i32 with.
3033 * @param i32 The value to AND *pi32 with.
3034 */
3035DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3036{
3037 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3038}
3039
3040
3041/**
3042 * Memory fence, waits for any pending writes and reads to complete.
3043 */
3044DECLINLINE(void) ASMMemoryFence(void)
3045{
3046 /** @todo use mfence? check if all cpus we care for support it. */
3047 uint32_t volatile u32;
3048 ASMAtomicXchgU32(&u32, 0);
3049}
3050
3051
3052/**
3053 * Write fence, waits for any pending writes to complete.
3054 */
3055DECLINLINE(void) ASMWriteFence(void)
3056{
3057 /** @todo use sfence? check if all cpus we care for support it. */
3058 ASMMemoryFence();
3059}
3060
3061
3062/**
3063 * Read fence, waits for any pending reads to complete.
3064 */
3065DECLINLINE(void) ASMReadFence(void)
3066{
3067 /** @todo use lfence? check if all cpus we care for support it. */
3068 ASMMemoryFence();
3069}
3070
3071
3072/**
3073 * Atomically reads an unsigned 8-bit value, ordered.
3074 *
3075 * @returns Current *pu8 value
3076 * @param pu8 Pointer to the 8-bit variable to read.
3077 */
3078DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
3079{
3080 ASMMemoryFence();
3081 return *pu8; /* byte reads are atomic on x86 */
3082}
3083
3084
3085/**
3086 * Atomically reads an unsigned 8-bit value, unordered.
3087 *
3088 * @returns Current *pu8 value
3089 * @param pu8 Pointer to the 8-bit variable to read.
3090 */
3091DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
3092{
3093 return *pu8; /* byte reads are atomic on x86 */
3094}
3095
3096
3097/**
3098 * Atomically reads a signed 8-bit value, ordered.
3099 *
3100 * @returns Current *pi8 value
3101 * @param pi8 Pointer to the 8-bit variable to read.
3102 */
3103DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
3104{
3105 ASMMemoryFence();
3106 return *pi8; /* byte reads are atomic on x86 */
3107}
3108
3109
3110/**
3111 * Atomically reads a signed 8-bit value, unordered.
3112 *
3113 * @returns Current *pi8 value
3114 * @param pi8 Pointer to the 8-bit variable to read.
3115 */
3116DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
3117{
3118 return *pi8; /* byte reads are atomic on x86 */
3119}
3120
3121
3122/**
3123 * Atomically reads an unsigned 16-bit value, ordered.
3124 *
3125 * @returns Current *pu16 value
3126 * @param pu16 Pointer to the 16-bit variable to read.
3127 */
3128DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
3129{
3130 ASMMemoryFence();
3131 Assert(!((uintptr_t)pu16 & 1));
3132 return *pu16;
3133}
3134
3135
3136/**
3137 * Atomically reads an unsigned 16-bit value, unordered.
3138 *
3139 * @returns Current *pu16 value
3140 * @param pu16 Pointer to the 16-bit variable to read.
3141 */
3142DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
3143{
3144 Assert(!((uintptr_t)pu16 & 1));
3145 return *pu16;
3146}
3147
3148
3149/**
3150 * Atomically reads a signed 16-bit value, ordered.
3151 *
3152 * @returns Current *pi16 value
3153 * @param pi16 Pointer to the 16-bit variable to read.
3154 */
3155DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
3156{
3157 ASMMemoryFence();
3158 Assert(!((uintptr_t)pi16 & 1));
3159 return *pi16;
3160}
3161
3162
3163/**
3164 * Atomically reads a signed 16-bit value, unordered.
3165 *
3166 * @returns Current *pi16 value
3167 * @param pi16 Pointer to the 16-bit variable to read.
3168 */
3169DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
3170{
3171 Assert(!((uintptr_t)pi16 & 1));
3172 return *pi16;
3173}
3174
3175
3176/**
3177 * Atomically reads an unsigned 32-bit value, ordered.
3178 *
3179 * @returns Current *pu32 value
3180 * @param pu32 Pointer to the 32-bit variable to read.
3181 */
3182DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
3183{
3184 ASMMemoryFence();
3185 Assert(!((uintptr_t)pu32 & 3));
3186 return *pu32;
3187}
3188
3189
3190/**
3191 * Atomically reads an unsigned 32-bit value, unordered.
3192 *
3193 * @returns Current *pu32 value
3194 * @param pu32 Pointer to the 32-bit variable to read.
3195 */
3196DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
3197{
3198 Assert(!((uintptr_t)pu32 & 3));
3199 return *pu32;
3200}
3201
3202
3203/**
3204 * Atomically reads a signed 32-bit value, ordered.
3205 *
3206 * @returns Current *pi32 value
3207 * @param pi32 Pointer to the 32-bit variable to read.
3208 */
3209DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
3210{
3211 ASMMemoryFence();
3212 Assert(!((uintptr_t)pi32 & 3));
3213 return *pi32;
3214}
3215
3216
3217/**
3218 * Atomically reads a signed 32-bit value, unordered.
3219 *
3220 * @returns Current *pi32 value
3221 * @param pi32 Pointer to the 32-bit variable to read.
3222 */
3223DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
3224{
3225 Assert(!((uintptr_t)pi32 & 3));
3226 return *pi32;
3227}
3228
3229
3230/**
3231 * Atomically reads an unsigned 64-bit value, ordered.
3232 *
3233 * @returns Current *pu64 value
3234 * @param pu64 Pointer to the 64-bit variable to read.
3235 * The memory pointed to must be writable.
3236 * @remark This will fault if the memory is read-only!
3237 */
3238#if RT_INLINE_ASM_EXTERNAL
3239DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
3240#else
3241DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
3242{
3243 uint64_t u64;
3244# ifdef RT_ARCH_AMD64
3245# if RT_INLINE_ASM_GNU_STYLE
3246 Assert(!((uintptr_t)pu64 & 7));
3247 __asm__ __volatile__( "mfence\n\t"
3248 "movq %1, %0\n\t"
3249 : "=r" (u64)
3250 : "m" (*pu64));
3251# else
3252 __asm
3253 {
3254 mfence
3255 mov rdx, [pu64]
3256 mov rax, [rdx]
3257 mov [u64], rax
3258 }
3259# endif
3260# else /* !RT_ARCH_AMD64 */
3261# if RT_INLINE_ASM_GNU_STYLE
3262# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3263 uint32_t u32EBX = 0;
3264 Assert(!((uintptr_t)pu64 & 7));
3265 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3266 "lock; cmpxchg8b (%5)\n\t"
3267 "movl %3, %%ebx\n\t"
3268 : "=A" (u64),
3269 "=m" (*pu64)
3270 : "0" (0),
3271 "m" (u32EBX),
3272 "c" (0),
3273 "S" (pu64));
3274# else /* !PIC */
3275 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3276 : "=A" (u64),
3277 "=m" (*pu64)
3278 : "0" (0),
3279 "b" (0),
3280 "c" (0));
3281# endif
3282# else
3283 Assert(!((uintptr_t)pu64 & 7));
3284 __asm
3285 {
3286 xor eax, eax
3287 xor edx, edx
3288 mov edi, pu64
3289 xor ecx, ecx
3290 xor ebx, ebx
3291 lock cmpxchg8b [edi]
3292 mov dword ptr [u64], eax
3293 mov dword ptr [u64 + 4], edx
3294 }
3295# endif
3296# endif /* !RT_ARCH_AMD64 */
3297 return u64;
3298}
3299#endif
3300
3301
3302/**
3303 * Atomically reads an unsigned 64-bit value, unordered.
3304 *
3305 * @returns Current *pu64 value
3306 * @param pu64 Pointer to the 64-bit variable to read.
3307 * The memory pointed to must be writable.
3308 * @remark This will fault if the memory is read-only!
3309 */
3310#if RT_INLINE_ASM_EXTERNAL
3311DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
3312#else
3313DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
3314{
3315 uint64_t u64;
3316# ifdef RT_ARCH_AMD64
3317# if RT_INLINE_ASM_GNU_STYLE
3318 Assert(!((uintptr_t)pu64 & 7));
3319 __asm__ __volatile__("movq %1, %0\n\t"
3320 : "=r" (u64)
3321 : "m" (*pu64));
3322# else
3323 __asm
3324 {
3325 mov rdx, [pu64]
3326 mov rax, [rdx]
3327 mov [u64], rax
3328 }
3329# endif
3330# else /* !RT_ARCH_AMD64 */
3331# if RT_INLINE_ASM_GNU_STYLE
3332# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3333 uint32_t u32EBX = 0;
3334 Assert(!((uintptr_t)pu64 & 7));
3335 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3336 "lock; cmpxchg8b (%5)\n\t"
3337 "movl %3, %%ebx\n\t"
3338 : "=A" (u64),
3339 "=m" (*pu64)
3340 : "0" (0),
3341 "m" (u32EBX),
3342 "c" (0),
3343 "S" (pu64));
3344# else /* !PIC */
3345 __asm__ __volatile__("cmpxchg8b %1\n\t"
3346 : "=A" (u64),
3347 "=m" (*pu64)
3348 : "0" (0),
3349 "b" (0),
3350 "c" (0));
3351# endif
3352# else
3353 Assert(!((uintptr_t)pu64 & 7));
3354 __asm
3355 {
3356 xor eax, eax
3357 xor edx, edx
3358 mov edi, pu64
3359 xor ecx, ecx
3360 xor ebx, ebx
3361 lock cmpxchg8b [edi]
3362 mov dword ptr [u64], eax
3363 mov dword ptr [u64 + 4], edx
3364 }
3365# endif
3366# endif /* !RT_ARCH_AMD64 */
3367 return u64;
3368}
3369#endif
3370
3371
3372/**
3373 * Atomically reads a signed 64-bit value, ordered.
3374 *
3375 * @returns Current *pi64 value
3376 * @param pi64 Pointer to the 64-bit variable to read.
3377 * The memory pointed to must be writable.
3378 * @remark This will fault if the memory is read-only!
3379 */
3380DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
3381{
3382 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
3383}
3384
3385
3386/**
3387 * Atomically reads a signed 64-bit value, unordered.
3388 *
3389 * @returns Current *pi64 value
3390 * @param pi64 Pointer to the 64-bit variable to read.
3391 * The memory pointed to must be writable.
3392 * @remark This will fault if the memory is read-only!
3393 */
3394DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
3395{
3396 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
3397}
3398
3399
3400/**
3401 * Atomically reads a pointer value, ordered.
3402 *
3403 * @returns Current *pv value
3404 * @param ppv Pointer to the pointer variable to read.
3405 */
3406DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
3407{
3408#if ARCH_BITS == 32
3409 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
3410#elif ARCH_BITS == 64
3411 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
3412#else
3413# error "ARCH_BITS is bogus"
3414#endif
3415}
3416
3417
3418/**
3419 * Atomically reads a pointer value, unordered.
3420 *
3421 * @returns Current *pv value
3422 * @param ppv Pointer to the pointer variable to read.
3423 */
3424DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
3425{
3426#if ARCH_BITS == 32
3427 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
3428#elif ARCH_BITS == 64
3429 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
3430#else
3431# error "ARCH_BITS is bogus"
3432#endif
3433}
3434
3435
3436/**
3437 * Atomically reads a boolean value, ordered.
3438 *
3439 * @returns Current *pf value
3440 * @param pf Pointer to the boolean variable to read.
3441 */
3442DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
3443{
3444 ASMMemoryFence();
3445 return *pf; /* byte reads are atomic on x86 */
3446}
3447
3448
3449/**
3450 * Atomically reads a boolean value, unordered.
3451 *
3452 * @returns Current *pf value
3453 * @param pf Pointer to the boolean variable to read.
3454 */
3455DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
3456{
3457 return *pf; /* byte reads are atomic on x86 */
3458}
3459
3460
3461/**
3462 * Atomically read a value which size might differ
3463 * between platforms or compilers, ordered.
3464 *
3465 * @param pu Pointer to the variable to update.
3466 * @param puRes Where to store the result.
3467 */
3468#define ASMAtomicReadSize(pu, puRes) \
3469 do { \
3470 switch (sizeof(*(pu))) { \
3471 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3472 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
3473 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
3474 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
3475 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3476 } \
3477 } while (0)
3478
3479
3480/**
3481 * Atomically read a value which size might differ
3482 * between platforms or compilers, unordered.
3483 *
3484 * @param pu Pointer to the variable to update.
3485 * @param puRes Where to store the result.
3486 */
3487#define ASMAtomicUoReadSize(pu, puRes) \
3488 do { \
3489 switch (sizeof(*(pu))) { \
3490 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3491 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
3492 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
3493 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
3494 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3495 } \
3496 } while (0)
3497
3498
3499/**
3500 * Atomically writes an unsigned 8-bit value, ordered.
3501 *
3502 * @param pu8 Pointer to the 8-bit variable.
3503 * @param u8 The 8-bit value to assign to *pu8.
3504 */
3505DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
3506{
3507 ASMAtomicXchgU8(pu8, u8);
3508}
3509
3510
3511/**
3512 * Atomically writes an unsigned 8-bit value, unordered.
3513 *
3514 * @param pu8 Pointer to the 8-bit variable.
3515 * @param u8 The 8-bit value to assign to *pu8.
3516 */
3517DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
3518{
3519 *pu8 = u8; /* byte writes are atomic on x86 */
3520}
3521
3522
3523/**
3524 * Atomically writes a signed 8-bit value, ordered.
3525 *
3526 * @param pi8 Pointer to the 8-bit variable to read.
3527 * @param i8 The 8-bit value to assign to *pi8.
3528 */
3529DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
3530{
3531 ASMAtomicXchgS8(pi8, i8);
3532}
3533
3534
3535/**
3536 * Atomically writes a signed 8-bit value, unordered.
3537 *
3538 * @param pi8 Pointer to the 8-bit variable to read.
3539 * @param i8 The 8-bit value to assign to *pi8.
3540 */
3541DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
3542{
3543 *pi8 = i8; /* byte writes are atomic on x86 */
3544}
3545
3546
3547/**
3548 * Atomically writes an unsigned 16-bit value, ordered.
3549 *
3550 * @param pu16 Pointer to the 16-bit variable.
3551 * @param u16 The 16-bit value to assign to *pu16.
3552 */
3553DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
3554{
3555 ASMAtomicXchgU16(pu16, u16);
3556}
3557
3558
3559/**
3560 * Atomically writes an unsigned 16-bit value, unordered.
3561 *
3562 * @param pu16 Pointer to the 16-bit variable.
3563 * @param u16 The 16-bit value to assign to *pu16.
3564 */
3565DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
3566{
3567 Assert(!((uintptr_t)pu16 & 1));
3568 *pu16 = u16;
3569}
3570
3571
3572/**
3573 * Atomically writes a signed 16-bit value, ordered.
3574 *
3575 * @param pi16 Pointer to the 16-bit variable to read.
3576 * @param i16 The 16-bit value to assign to *pi16.
3577 */
3578DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
3579{
3580 ASMAtomicXchgS16(pi16, i16);
3581}
3582
3583
3584/**
3585 * Atomically writes a signed 16-bit value, unordered.
3586 *
3587 * @param pi16 Pointer to the 16-bit variable to read.
3588 * @param i16 The 16-bit value to assign to *pi16.
3589 */
3590DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
3591{
3592 Assert(!((uintptr_t)pi16 & 1));
3593 *pi16 = i16;
3594}
3595
3596
3597/**
3598 * Atomically writes an unsigned 32-bit value, ordered.
3599 *
3600 * @param pu32 Pointer to the 32-bit variable.
3601 * @param u32 The 32-bit value to assign to *pu32.
3602 */
3603DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
3604{
3605 ASMAtomicXchgU32(pu32, u32);
3606}
3607
3608
3609/**
3610 * Atomically writes an unsigned 32-bit value, unordered.
3611 *
3612 * @param pu32 Pointer to the 32-bit variable.
3613 * @param u32 The 32-bit value to assign to *pu32.
3614 */
3615DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
3616{
3617 Assert(!((uintptr_t)pu32 & 3));
3618 *pu32 = u32;
3619}
3620
3621
3622/**
3623 * Atomically writes a signed 32-bit value, ordered.
3624 *
3625 * @param pi32 Pointer to the 32-bit variable to read.
3626 * @param i32 The 32-bit value to assign to *pi32.
3627 */
3628DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
3629{
3630 ASMAtomicXchgS32(pi32, i32);
3631}
3632
3633
3634/**
3635 * Atomically writes a signed 32-bit value, unordered.
3636 *
3637 * @param pi32 Pointer to the 32-bit variable to read.
3638 * @param i32 The 32-bit value to assign to *pi32.
3639 */
3640DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
3641{
3642 Assert(!((uintptr_t)pi32 & 3));
3643 *pi32 = i32;
3644}
3645
3646
3647/**
3648 * Atomically writes an unsigned 64-bit value, ordered.
3649 *
3650 * @param pu64 Pointer to the 64-bit variable.
3651 * @param u64 The 64-bit value to assign to *pu64.
3652 */
3653DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
3654{
3655 ASMAtomicXchgU64(pu64, u64);
3656}
3657
3658
3659/**
3660 * Atomically writes an unsigned 64-bit value, unordered.
3661 *
3662 * @param pu64 Pointer to the 64-bit variable.
3663 * @param u64 The 64-bit value to assign to *pu64.
3664 */
3665DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
3666{
3667 Assert(!((uintptr_t)pu64 & 7));
3668#if ARCH_BITS == 64
3669 *pu64 = u64;
3670#else
3671 ASMAtomicXchgU64(pu64, u64);
3672#endif
3673}
3674
3675
3676/**
3677 * Atomically writes a signed 64-bit value, ordered.
3678 *
3679 * @param pi64 Pointer to the 64-bit variable.
3680 * @param i64 The 64-bit value to assign to *pi64.
3681 */
3682DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
3683{
3684 ASMAtomicXchgS64(pi64, i64);
3685}
3686
3687
3688/**
3689 * Atomically writes a signed 64-bit value, unordered.
3690 *
3691 * @param pi64 Pointer to the 64-bit variable.
3692 * @param i64 The 64-bit value to assign to *pi64.
3693 */
3694DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
3695{
3696 Assert(!((uintptr_t)pi64 & 7));
3697#if ARCH_BITS == 64
3698 *pi64 = i64;
3699#else
3700 ASMAtomicXchgS64(pi64, i64);
3701#endif
3702}
3703
3704
3705/**
3706 * Atomically writes a boolean value, unordered.
3707 *
3708 * @param pf Pointer to the boolean variable.
3709 * @param f The boolean value to assign to *pf.
3710 */
3711DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
3712{
3713 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
3714}
3715
3716
3717/**
3718 * Atomically writes a boolean value, unordered.
3719 *
3720 * @param pf Pointer to the boolean variable.
3721 * @param f The boolean value to assign to *pf.
3722 */
3723DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
3724{
3725 *pf = f; /* byte writes are atomic on x86 */
3726}
3727
3728
3729/**
3730 * Atomically writes a pointer value, ordered.
3731 *
3732 * @returns Current *pv value
3733 * @param ppv Pointer to the pointer variable.
3734 * @param pv The pointer value to assigne to *ppv.
3735 */
3736DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, void *pv)
3737{
3738#if ARCH_BITS == 32
3739 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
3740#elif ARCH_BITS == 64
3741 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
3742#else
3743# error "ARCH_BITS is bogus"
3744#endif
3745}
3746
3747
3748/**
3749 * Atomically writes a pointer value, unordered.
3750 *
3751 * @returns Current *pv value
3752 * @param ppv Pointer to the pointer variable.
3753 * @param pv The pointer value to assigne to *ppv.
3754 */
3755DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, void *pv)
3756{
3757#if ARCH_BITS == 32
3758 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
3759#elif ARCH_BITS == 64
3760 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
3761#else
3762# error "ARCH_BITS is bogus"
3763#endif
3764}
3765
3766
3767/**
3768 * Atomically write a value which size might differ
3769 * between platforms or compilers, ordered.
3770 *
3771 * @param pu Pointer to the variable to update.
3772 * @param uNew The value to assign to *pu.
3773 */
3774#define ASMAtomicWriteSize(pu, uNew) \
3775 do { \
3776 switch (sizeof(*(pu))) { \
3777 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
3778 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3779 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3780 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3781 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3782 } \
3783 } while (0)
3784
3785/**
3786 * Atomically write a value which size might differ
3787 * between platforms or compilers, unordered.
3788 *
3789 * @param pu Pointer to the variable to update.
3790 * @param uNew The value to assign to *pu.
3791 */
3792#define ASMAtomicUoWriteSize(pu, uNew) \
3793 do { \
3794 switch (sizeof(*(pu))) { \
3795 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
3796 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3797 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3798 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3799 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3800 } \
3801 } while (0)
3802
3803
3804
3805
3806/**
3807 * Invalidate page.
3808 *
3809 * @param pv Address of the page to invalidate.
3810 */
3811#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3812DECLASM(void) ASMInvalidatePage(void *pv);
3813#else
3814DECLINLINE(void) ASMInvalidatePage(void *pv)
3815{
3816# if RT_INLINE_ASM_USES_INTRIN
3817 __invlpg(pv);
3818
3819# elif RT_INLINE_ASM_GNU_STYLE
3820 __asm__ __volatile__("invlpg %0\n\t"
3821 : : "m" (*(uint8_t *)pv));
3822# else
3823 __asm
3824 {
3825# ifdef RT_ARCH_AMD64
3826 mov rax, [pv]
3827 invlpg [rax]
3828# else
3829 mov eax, [pv]
3830 invlpg [eax]
3831# endif
3832 }
3833# endif
3834}
3835#endif
3836
3837
3838#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3839# if PAGE_SIZE != 0x1000
3840# error "PAGE_SIZE is not 0x1000!"
3841# endif
3842#endif
3843
3844/**
3845 * Zeros a 4K memory page.
3846 *
3847 * @param pv Pointer to the memory block. This must be page aligned.
3848 */
3849#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3850DECLASM(void) ASMMemZeroPage(volatile void *pv);
3851# else
3852DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
3853{
3854# if RT_INLINE_ASM_USES_INTRIN
3855# ifdef RT_ARCH_AMD64
3856 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
3857# else
3858 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
3859# endif
3860
3861# elif RT_INLINE_ASM_GNU_STYLE
3862 RTCCUINTREG uDummy;
3863# ifdef RT_ARCH_AMD64
3864 __asm__ __volatile__ ("rep stosq"
3865 : "=D" (pv),
3866 "=c" (uDummy)
3867 : "0" (pv),
3868 "c" (0x1000 >> 3),
3869 "a" (0)
3870 : "memory");
3871# else
3872 __asm__ __volatile__ ("rep stosl"
3873 : "=D" (pv),
3874 "=c" (uDummy)
3875 : "0" (pv),
3876 "c" (0x1000 >> 2),
3877 "a" (0)
3878 : "memory");
3879# endif
3880# else
3881 __asm
3882 {
3883# ifdef RT_ARCH_AMD64
3884 xor rax, rax
3885 mov ecx, 0200h
3886 mov rdi, [pv]
3887 rep stosq
3888# else
3889 xor eax, eax
3890 mov ecx, 0400h
3891 mov edi, [pv]
3892 rep stosd
3893# endif
3894 }
3895# endif
3896}
3897# endif
3898
3899
3900/**
3901 * Zeros a memory block with a 32-bit aligned size.
3902 *
3903 * @param pv Pointer to the memory block.
3904 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3905 */
3906#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3907DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3908#else
3909DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3910{
3911# if RT_INLINE_ASM_USES_INTRIN
3912# ifdef RT_ARCH_AMD64
3913 if (!(cb & 7))
3914 __stosq((unsigned __int64 *)pv, 0, cb / 8);
3915 else
3916# endif
3917 __stosd((unsigned long *)pv, 0, cb / 4);
3918
3919# elif RT_INLINE_ASM_GNU_STYLE
3920 __asm__ __volatile__ ("rep stosl"
3921 : "=D" (pv),
3922 "=c" (cb)
3923 : "0" (pv),
3924 "1" (cb >> 2),
3925 "a" (0)
3926 : "memory");
3927# else
3928 __asm
3929 {
3930 xor eax, eax
3931# ifdef RT_ARCH_AMD64
3932 mov rcx, [cb]
3933 shr rcx, 2
3934 mov rdi, [pv]
3935# else
3936 mov ecx, [cb]
3937 shr ecx, 2
3938 mov edi, [pv]
3939# endif
3940 rep stosd
3941 }
3942# endif
3943}
3944#endif
3945
3946
3947/**
3948 * Fills a memory block with a 32-bit aligned size.
3949 *
3950 * @param pv Pointer to the memory block.
3951 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3952 * @param u32 The value to fill with.
3953 */
3954#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3955DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3956#else
3957DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3958{
3959# if RT_INLINE_ASM_USES_INTRIN
3960# ifdef RT_ARCH_AMD64
3961 if (!(cb & 7))
3962 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3963 else
3964# endif
3965 __stosd((unsigned long *)pv, u32, cb / 4);
3966
3967# elif RT_INLINE_ASM_GNU_STYLE
3968 __asm__ __volatile__ ("rep stosl"
3969 : "=D" (pv),
3970 "=c" (cb)
3971 : "0" (pv),
3972 "1" (cb >> 2),
3973 "a" (u32)
3974 : "memory");
3975# else
3976 __asm
3977 {
3978# ifdef RT_ARCH_AMD64
3979 mov rcx, [cb]
3980 shr rcx, 2
3981 mov rdi, [pv]
3982# else
3983 mov ecx, [cb]
3984 shr ecx, 2
3985 mov edi, [pv]
3986# endif
3987 mov eax, [u32]
3988 rep stosd
3989 }
3990# endif
3991}
3992#endif
3993
3994
3995/**
3996 * Checks if a memory block is filled with the specified byte.
3997 *
3998 * This is a sort of inverted memchr.
3999 *
4000 * @returns Pointer to the byte which doesn't equal u8.
4001 * @returns NULL if all equal to u8.
4002 *
4003 * @param pv Pointer to the memory block.
4004 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4005 * @param u8 The value it's supposed to be filled with.
4006 */
4007#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4008DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
4009#else
4010DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
4011{
4012/** @todo rewrite this in inline assembly? */
4013 uint8_t const *pb = (uint8_t const *)pv;
4014 for (; cb; cb--, pb++)
4015 if (RT_UNLIKELY(*pb != u8))
4016 return (void *)pb;
4017 return NULL;
4018}
4019#endif
4020
4021
4022/**
4023 * Checks if a memory block is filled with the specified 32-bit value.
4024 *
4025 * This is a sort of inverted memchr.
4026 *
4027 * @returns Pointer to the first value which doesn't equal u32.
4028 * @returns NULL if all equal to u32.
4029 *
4030 * @param pv Pointer to the memory block.
4031 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4032 * @param u32 The value it's supposed to be filled with.
4033 */
4034#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4035DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
4036#else
4037DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
4038{
4039/** @todo rewrite this in inline assembly? */
4040 uint32_t const *pu32 = (uint32_t const *)pv;
4041 for (; cb; cb -= 4, pu32++)
4042 if (RT_UNLIKELY(*pu32 != u32))
4043 return (uint32_t *)pu32;
4044 return NULL;
4045}
4046#endif
4047
4048
4049/**
4050 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
4051 *
4052 * @returns u32F1 * u32F2.
4053 */
4054#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4055DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
4056#else
4057DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
4058{
4059# ifdef RT_ARCH_AMD64
4060 return (uint64_t)u32F1 * u32F2;
4061# else /* !RT_ARCH_AMD64 */
4062 uint64_t u64;
4063# if RT_INLINE_ASM_GNU_STYLE
4064 __asm__ __volatile__("mull %%edx"
4065 : "=A" (u64)
4066 : "a" (u32F2), "d" (u32F1));
4067# else
4068 __asm
4069 {
4070 mov edx, [u32F1]
4071 mov eax, [u32F2]
4072 mul edx
4073 mov dword ptr [u64], eax
4074 mov dword ptr [u64 + 4], edx
4075 }
4076# endif
4077 return u64;
4078# endif /* !RT_ARCH_AMD64 */
4079}
4080#endif
4081
4082
4083/**
4084 * Multiplies two signed 32-bit values returning a signed 64-bit result.
4085 *
4086 * @returns u32F1 * u32F2.
4087 */
4088#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4089DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
4090#else
4091DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
4092{
4093# ifdef RT_ARCH_AMD64
4094 return (int64_t)i32F1 * i32F2;
4095# else /* !RT_ARCH_AMD64 */
4096 int64_t i64;
4097# if RT_INLINE_ASM_GNU_STYLE
4098 __asm__ __volatile__("imull %%edx"
4099 : "=A" (i64)
4100 : "a" (i32F2), "d" (i32F1));
4101# else
4102 __asm
4103 {
4104 mov edx, [i32F1]
4105 mov eax, [i32F2]
4106 imul edx
4107 mov dword ptr [i64], eax
4108 mov dword ptr [i64 + 4], edx
4109 }
4110# endif
4111 return i64;
4112# endif /* !RT_ARCH_AMD64 */
4113}
4114#endif
4115
4116
4117/**
4118 * Devides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
4119 *
4120 * @returns u64 / u32.
4121 */
4122#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4123DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
4124#else
4125DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
4126{
4127# ifdef RT_ARCH_AMD64
4128 return (uint32_t)(u64 / u32);
4129# else /* !RT_ARCH_AMD64 */
4130# if RT_INLINE_ASM_GNU_STYLE
4131 RTCCUINTREG uDummy;
4132 __asm__ __volatile__("divl %3"
4133 : "=a" (u32), "=d"(uDummy)
4134 : "A" (u64), "r" (u32));
4135# else
4136 __asm
4137 {
4138 mov eax, dword ptr [u64]
4139 mov edx, dword ptr [u64 + 4]
4140 mov ecx, [u32]
4141 div ecx
4142 mov [u32], eax
4143 }
4144# endif
4145 return u32;
4146# endif /* !RT_ARCH_AMD64 */
4147}
4148#endif
4149
4150
4151/**
4152 * Devides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
4153 *
4154 * @returns u64 / u32.
4155 */
4156#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4157DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
4158#else
4159DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
4160{
4161# ifdef RT_ARCH_AMD64
4162 return (int32_t)(i64 / i32);
4163# else /* !RT_ARCH_AMD64 */
4164# if RT_INLINE_ASM_GNU_STYLE
4165 RTCCUINTREG iDummy;
4166 __asm__ __volatile__("idivl %3"
4167 : "=a" (i32), "=d"(iDummy)
4168 : "A" (i64), "r" (i32));
4169# else
4170 __asm
4171 {
4172 mov eax, dword ptr [i64]
4173 mov edx, dword ptr [i64 + 4]
4174 mov ecx, [i32]
4175 idiv ecx
4176 mov [i32], eax
4177 }
4178# endif
4179 return i32;
4180# endif /* !RT_ARCH_AMD64 */
4181}
4182#endif
4183
4184
4185/**
4186 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
4187 * using a 96 bit intermediate result.
4188 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
4189 * __udivdi3 and __umoddi3 even if this inline function is not used.
4190 *
4191 * @returns (u64A * u32B) / u32C.
4192 * @param u64A The 64-bit value.
4193 * @param u32B The 32-bit value to multiple by A.
4194 * @param u32C The 32-bit value to divide A*B by.
4195 */
4196#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
4197DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
4198#else
4199DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
4200{
4201# if RT_INLINE_ASM_GNU_STYLE
4202# ifdef RT_ARCH_AMD64
4203 uint64_t u64Result, u64Spill;
4204 __asm__ __volatile__("mulq %2\n\t"
4205 "divq %3\n\t"
4206 : "=a" (u64Result),
4207 "=d" (u64Spill)
4208 : "r" ((uint64_t)u32B),
4209 "r" ((uint64_t)u32C),
4210 "0" (u64A),
4211 "1" (0));
4212 return u64Result;
4213# else
4214 uint32_t u32Dummy;
4215 uint64_t u64Result;
4216 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
4217 edx = u64Lo.hi = (u64A.lo * u32B).hi */
4218 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
4219 eax = u64A.hi */
4220 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
4221 edx = u32C */
4222 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
4223 edx = u32B */
4224 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
4225 edx = u64Hi.hi = (u64A.hi * u32B).hi */
4226 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
4227 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
4228 "divl %%ecx \n\t" /* eax = u64Hi / u32C
4229 edx = u64Hi % u32C */
4230 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
4231 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
4232 "divl %%ecx \n\t" /* u64Result.lo */
4233 "movl %%edi,%%edx \n\t" /* u64Result.hi */
4234 : "=A"(u64Result), "=c"(u32Dummy),
4235 "=S"(u32Dummy), "=D"(u32Dummy)
4236 : "a"((uint32_t)u64A),
4237 "S"((uint32_t)(u64A >> 32)),
4238 "c"(u32B),
4239 "D"(u32C));
4240 return u64Result;
4241# endif
4242# else
4243 RTUINT64U u;
4244 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
4245 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
4246 u64Hi += (u64Lo >> 32);
4247 u.s.Hi = (uint32_t)(u64Hi / u32C);
4248 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
4249 return u.u;
4250# endif
4251}
4252#endif
4253
4254
4255/**
4256 * Probes a byte pointer for read access.
4257 *
4258 * While the function will not fault if the byte is not read accessible,
4259 * the idea is to do this in a safe place like before acquiring locks
4260 * and such like.
4261 *
4262 * Also, this functions guarantees that an eager compiler is not going
4263 * to optimize the probing away.
4264 *
4265 * @param pvByte Pointer to the byte.
4266 */
4267#if RT_INLINE_ASM_EXTERNAL
4268DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
4269#else
4270DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
4271{
4272 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4273 uint8_t u8;
4274# if RT_INLINE_ASM_GNU_STYLE
4275 __asm__ __volatile__("movb (%1), %0\n\t"
4276 : "=r" (u8)
4277 : "r" (pvByte));
4278# else
4279 __asm
4280 {
4281# ifdef RT_ARCH_AMD64
4282 mov rax, [pvByte]
4283 mov al, [rax]
4284# else
4285 mov eax, [pvByte]
4286 mov al, [eax]
4287# endif
4288 mov [u8], al
4289 }
4290# endif
4291 return u8;
4292}
4293#endif
4294
4295/**
4296 * Probes a buffer for read access page by page.
4297 *
4298 * While the function will fault if the buffer is not fully read
4299 * accessible, the idea is to do this in a safe place like before
4300 * acquiring locks and such like.
4301 *
4302 * Also, this functions guarantees that an eager compiler is not going
4303 * to optimize the probing away.
4304 *
4305 * @param pvBuf Pointer to the buffer.
4306 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4307 */
4308DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
4309{
4310 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4311 /* the first byte */
4312 const uint8_t *pu8 = (const uint8_t *)pvBuf;
4313 ASMProbeReadByte(pu8);
4314
4315 /* the pages in between pages. */
4316 while (cbBuf > /*PAGE_SIZE*/0x1000)
4317 {
4318 ASMProbeReadByte(pu8);
4319 cbBuf -= /*PAGE_SIZE*/0x1000;
4320 pu8 += /*PAGE_SIZE*/0x1000;
4321 }
4322
4323 /* the last byte */
4324 ASMProbeReadByte(pu8 + cbBuf - 1);
4325}
4326
4327
4328/** @def ASMBreakpoint
4329 * Debugger Breakpoint.
4330 * @remark In the gnu world we add a nop instruction after the int3 to
4331 * force gdb to remain at the int3 source line.
4332 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
4333 * @internal
4334 */
4335#if RT_INLINE_ASM_GNU_STYLE
4336# ifndef __L4ENV__
4337# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
4338# else
4339# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
4340# endif
4341#else
4342# define ASMBreakpoint() __debugbreak()
4343#endif
4344
4345
4346
4347/** @defgroup grp_inline_bits Bit Operations
4348 * @{
4349 */
4350
4351
4352/**
4353 * Sets a bit in a bitmap.
4354 *
4355 * @param pvBitmap Pointer to the bitmap.
4356 * @param iBit The bit to set.
4357 */
4358#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4359DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
4360#else
4361DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
4362{
4363# if RT_INLINE_ASM_USES_INTRIN
4364 _bittestandset((long *)pvBitmap, iBit);
4365
4366# elif RT_INLINE_ASM_GNU_STYLE
4367 __asm__ __volatile__ ("btsl %1, %0"
4368 : "=m" (*(volatile long *)pvBitmap)
4369 : "Ir" (iBit)
4370 : "memory");
4371# else
4372 __asm
4373 {
4374# ifdef RT_ARCH_AMD64
4375 mov rax, [pvBitmap]
4376 mov edx, [iBit]
4377 bts [rax], edx
4378# else
4379 mov eax, [pvBitmap]
4380 mov edx, [iBit]
4381 bts [eax], edx
4382# endif
4383 }
4384# endif
4385}
4386#endif
4387
4388
4389/**
4390 * Atomically sets a bit in a bitmap, ordered.
4391 *
4392 * @param pvBitmap Pointer to the bitmap.
4393 * @param iBit The bit to set.
4394 */
4395#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4396DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
4397#else
4398DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
4399{
4400# if RT_INLINE_ASM_USES_INTRIN
4401 _interlockedbittestandset((long *)pvBitmap, iBit);
4402# elif RT_INLINE_ASM_GNU_STYLE
4403 __asm__ __volatile__ ("lock; btsl %1, %0"
4404 : "=m" (*(volatile long *)pvBitmap)
4405 : "Ir" (iBit)
4406 : "memory");
4407# else
4408 __asm
4409 {
4410# ifdef RT_ARCH_AMD64
4411 mov rax, [pvBitmap]
4412 mov edx, [iBit]
4413 lock bts [rax], edx
4414# else
4415 mov eax, [pvBitmap]
4416 mov edx, [iBit]
4417 lock bts [eax], edx
4418# endif
4419 }
4420# endif
4421}
4422#endif
4423
4424
4425/**
4426 * Clears a bit in a bitmap.
4427 *
4428 * @param pvBitmap Pointer to the bitmap.
4429 * @param iBit The bit to clear.
4430 */
4431#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4432DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
4433#else
4434DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
4435{
4436# if RT_INLINE_ASM_USES_INTRIN
4437 _bittestandreset((long *)pvBitmap, iBit);
4438
4439# elif RT_INLINE_ASM_GNU_STYLE
4440 __asm__ __volatile__ ("btrl %1, %0"
4441 : "=m" (*(volatile long *)pvBitmap)
4442 : "Ir" (iBit)
4443 : "memory");
4444# else
4445 __asm
4446 {
4447# ifdef RT_ARCH_AMD64
4448 mov rax, [pvBitmap]
4449 mov edx, [iBit]
4450 btr [rax], edx
4451# else
4452 mov eax, [pvBitmap]
4453 mov edx, [iBit]
4454 btr [eax], edx
4455# endif
4456 }
4457# endif
4458}
4459#endif
4460
4461
4462/**
4463 * Atomically clears a bit in a bitmap, ordered.
4464 *
4465 * @param pvBitmap Pointer to the bitmap.
4466 * @param iBit The bit to toggle set.
4467 * @remark No memory barrier, take care on smp.
4468 */
4469#if RT_INLINE_ASM_EXTERNAL
4470DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
4471#else
4472DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
4473{
4474# if RT_INLINE_ASM_GNU_STYLE
4475 __asm__ __volatile__ ("lock; btrl %1, %0"
4476 : "=m" (*(volatile long *)pvBitmap)
4477 : "Ir" (iBit)
4478 : "memory");
4479# else
4480 __asm
4481 {
4482# ifdef RT_ARCH_AMD64
4483 mov rax, [pvBitmap]
4484 mov edx, [iBit]
4485 lock btr [rax], edx
4486# else
4487 mov eax, [pvBitmap]
4488 mov edx, [iBit]
4489 lock btr [eax], edx
4490# endif
4491 }
4492# endif
4493}
4494#endif
4495
4496
4497/**
4498 * Toggles a bit in a bitmap.
4499 *
4500 * @param pvBitmap Pointer to the bitmap.
4501 * @param iBit The bit to toggle.
4502 */
4503#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4504DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
4505#else
4506DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
4507{
4508# if RT_INLINE_ASM_USES_INTRIN
4509 _bittestandcomplement((long *)pvBitmap, iBit);
4510# elif RT_INLINE_ASM_GNU_STYLE
4511 __asm__ __volatile__ ("btcl %1, %0"
4512 : "=m" (*(volatile long *)pvBitmap)
4513 : "Ir" (iBit)
4514 : "memory");
4515# else
4516 __asm
4517 {
4518# ifdef RT_ARCH_AMD64
4519 mov rax, [pvBitmap]
4520 mov edx, [iBit]
4521 btc [rax], edx
4522# else
4523 mov eax, [pvBitmap]
4524 mov edx, [iBit]
4525 btc [eax], edx
4526# endif
4527 }
4528# endif
4529}
4530#endif
4531
4532
4533/**
4534 * Atomically toggles a bit in a bitmap, ordered.
4535 *
4536 * @param pvBitmap Pointer to the bitmap.
4537 * @param iBit The bit to test and set.
4538 */
4539#if RT_INLINE_ASM_EXTERNAL
4540DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
4541#else
4542DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
4543{
4544# if RT_INLINE_ASM_GNU_STYLE
4545 __asm__ __volatile__ ("lock; btcl %1, %0"
4546 : "=m" (*(volatile long *)pvBitmap)
4547 : "Ir" (iBit)
4548 : "memory");
4549# else
4550 __asm
4551 {
4552# ifdef RT_ARCH_AMD64
4553 mov rax, [pvBitmap]
4554 mov edx, [iBit]
4555 lock btc [rax], edx
4556# else
4557 mov eax, [pvBitmap]
4558 mov edx, [iBit]
4559 lock btc [eax], edx
4560# endif
4561 }
4562# endif
4563}
4564#endif
4565
4566
4567/**
4568 * Tests and sets a bit in a bitmap.
4569 *
4570 * @returns true if the bit was set.
4571 * @returns false if the bit was clear.
4572 * @param pvBitmap Pointer to the bitmap.
4573 * @param iBit The bit to test and set.
4574 */
4575#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4576DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4577#else
4578DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4579{
4580 union { bool f; uint32_t u32; uint8_t u8; } rc;
4581# if RT_INLINE_ASM_USES_INTRIN
4582 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
4583
4584# elif RT_INLINE_ASM_GNU_STYLE
4585 __asm__ __volatile__ ("btsl %2, %1\n\t"
4586 "setc %b0\n\t"
4587 "andl $1, %0\n\t"
4588 : "=q" (rc.u32),
4589 "=m" (*(volatile long *)pvBitmap)
4590 : "Ir" (iBit)
4591 : "memory");
4592# else
4593 __asm
4594 {
4595 mov edx, [iBit]
4596# ifdef RT_ARCH_AMD64
4597 mov rax, [pvBitmap]
4598 bts [rax], edx
4599# else
4600 mov eax, [pvBitmap]
4601 bts [eax], edx
4602# endif
4603 setc al
4604 and eax, 1
4605 mov [rc.u32], eax
4606 }
4607# endif
4608 return rc.f;
4609}
4610#endif
4611
4612
4613/**
4614 * Atomically tests and sets a bit in a bitmap, ordered.
4615 *
4616 * @returns true if the bit was set.
4617 * @returns false if the bit was clear.
4618 * @param pvBitmap Pointer to the bitmap.
4619 * @param iBit The bit to set.
4620 */
4621#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4622DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4623#else
4624DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4625{
4626 union { bool f; uint32_t u32; uint8_t u8; } rc;
4627# if RT_INLINE_ASM_USES_INTRIN
4628 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
4629# elif RT_INLINE_ASM_GNU_STYLE
4630 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
4631 "setc %b0\n\t"
4632 "andl $1, %0\n\t"
4633 : "=q" (rc.u32),
4634 "=m" (*(volatile long *)pvBitmap)
4635 : "Ir" (iBit)
4636 : "memory");
4637# else
4638 __asm
4639 {
4640 mov edx, [iBit]
4641# ifdef RT_ARCH_AMD64
4642 mov rax, [pvBitmap]
4643 lock bts [rax], edx
4644# else
4645 mov eax, [pvBitmap]
4646 lock bts [eax], edx
4647# endif
4648 setc al
4649 and eax, 1
4650 mov [rc.u32], eax
4651 }
4652# endif
4653 return rc.f;
4654}
4655#endif
4656
4657
4658/**
4659 * Tests and clears a bit in a bitmap.
4660 *
4661 * @returns true if the bit was set.
4662 * @returns false if the bit was clear.
4663 * @param pvBitmap Pointer to the bitmap.
4664 * @param iBit The bit to test and clear.
4665 */
4666#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4667DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4668#else
4669DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4670{
4671 union { bool f; uint32_t u32; uint8_t u8; } rc;
4672# if RT_INLINE_ASM_USES_INTRIN
4673 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
4674
4675# elif RT_INLINE_ASM_GNU_STYLE
4676 __asm__ __volatile__ ("btrl %2, %1\n\t"
4677 "setc %b0\n\t"
4678 "andl $1, %0\n\t"
4679 : "=q" (rc.u32),
4680 "=m" (*(volatile long *)pvBitmap)
4681 : "Ir" (iBit)
4682 : "memory");
4683# else
4684 __asm
4685 {
4686 mov edx, [iBit]
4687# ifdef RT_ARCH_AMD64
4688 mov rax, [pvBitmap]
4689 btr [rax], edx
4690# else
4691 mov eax, [pvBitmap]
4692 btr [eax], edx
4693# endif
4694 setc al
4695 and eax, 1
4696 mov [rc.u32], eax
4697 }
4698# endif
4699 return rc.f;
4700}
4701#endif
4702
4703
4704/**
4705 * Atomically tests and clears a bit in a bitmap, ordered.
4706 *
4707 * @returns true if the bit was set.
4708 * @returns false if the bit was clear.
4709 * @param pvBitmap Pointer to the bitmap.
4710 * @param iBit The bit to test and clear.
4711 * @remark No memory barrier, take care on smp.
4712 */
4713#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4714DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4715#else
4716DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4717{
4718 union { bool f; uint32_t u32; uint8_t u8; } rc;
4719# if RT_INLINE_ASM_USES_INTRIN
4720 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
4721
4722# elif RT_INLINE_ASM_GNU_STYLE
4723 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
4724 "setc %b0\n\t"
4725 "andl $1, %0\n\t"
4726 : "=q" (rc.u32),
4727 "=m" (*(volatile long *)pvBitmap)
4728 : "Ir" (iBit)
4729 : "memory");
4730# else
4731 __asm
4732 {
4733 mov edx, [iBit]
4734# ifdef RT_ARCH_AMD64
4735 mov rax, [pvBitmap]
4736 lock btr [rax], edx
4737# else
4738 mov eax, [pvBitmap]
4739 lock btr [eax], edx
4740# endif
4741 setc al
4742 and eax, 1
4743 mov [rc.u32], eax
4744 }
4745# endif
4746 return rc.f;
4747}
4748#endif
4749
4750
4751/**
4752 * Tests and toggles a bit in a bitmap.
4753 *
4754 * @returns true if the bit was set.
4755 * @returns false if the bit was clear.
4756 * @param pvBitmap Pointer to the bitmap.
4757 * @param iBit The bit to test and toggle.
4758 */
4759#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4760DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4761#else
4762DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4763{
4764 union { bool f; uint32_t u32; uint8_t u8; } rc;
4765# if RT_INLINE_ASM_USES_INTRIN
4766 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
4767
4768# elif RT_INLINE_ASM_GNU_STYLE
4769 __asm__ __volatile__ ("btcl %2, %1\n\t"
4770 "setc %b0\n\t"
4771 "andl $1, %0\n\t"
4772 : "=q" (rc.u32),
4773 "=m" (*(volatile long *)pvBitmap)
4774 : "Ir" (iBit)
4775 : "memory");
4776# else
4777 __asm
4778 {
4779 mov edx, [iBit]
4780# ifdef RT_ARCH_AMD64
4781 mov rax, [pvBitmap]
4782 btc [rax], edx
4783# else
4784 mov eax, [pvBitmap]
4785 btc [eax], edx
4786# endif
4787 setc al
4788 and eax, 1
4789 mov [rc.u32], eax
4790 }
4791# endif
4792 return rc.f;
4793}
4794#endif
4795
4796
4797/**
4798 * Atomically tests and toggles a bit in a bitmap, ordered.
4799 *
4800 * @returns true if the bit was set.
4801 * @returns false if the bit was clear.
4802 * @param pvBitmap Pointer to the bitmap.
4803 * @param iBit The bit to test and toggle.
4804 */
4805#if RT_INLINE_ASM_EXTERNAL
4806DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4807#else
4808DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4809{
4810 union { bool f; uint32_t u32; uint8_t u8; } rc;
4811# if RT_INLINE_ASM_GNU_STYLE
4812 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
4813 "setc %b0\n\t"
4814 "andl $1, %0\n\t"
4815 : "=q" (rc.u32),
4816 "=m" (*(volatile long *)pvBitmap)
4817 : "Ir" (iBit)
4818 : "memory");
4819# else
4820 __asm
4821 {
4822 mov edx, [iBit]
4823# ifdef RT_ARCH_AMD64
4824 mov rax, [pvBitmap]
4825 lock btc [rax], edx
4826# else
4827 mov eax, [pvBitmap]
4828 lock btc [eax], edx
4829# endif
4830 setc al
4831 and eax, 1
4832 mov [rc.u32], eax
4833 }
4834# endif
4835 return rc.f;
4836}
4837#endif
4838
4839
4840/**
4841 * Tests if a bit in a bitmap is set.
4842 *
4843 * @returns true if the bit is set.
4844 * @returns false if the bit is clear.
4845 * @param pvBitmap Pointer to the bitmap.
4846 * @param iBit The bit to test.
4847 */
4848#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4849DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
4850#else
4851DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
4852{
4853 union { bool f; uint32_t u32; uint8_t u8; } rc;
4854# if RT_INLINE_ASM_USES_INTRIN
4855 rc.u32 = _bittest((long *)pvBitmap, iBit);
4856# elif RT_INLINE_ASM_GNU_STYLE
4857
4858 __asm__ __volatile__ ("btl %2, %1\n\t"
4859 "setc %b0\n\t"
4860 "andl $1, %0\n\t"
4861 : "=q" (rc.u32)
4862 : "m" (*(const volatile long *)pvBitmap),
4863 "Ir" (iBit)
4864 : "memory");
4865# else
4866 __asm
4867 {
4868 mov edx, [iBit]
4869# ifdef RT_ARCH_AMD64
4870 mov rax, [pvBitmap]
4871 bt [rax], edx
4872# else
4873 mov eax, [pvBitmap]
4874 bt [eax], edx
4875# endif
4876 setc al
4877 and eax, 1
4878 mov [rc.u32], eax
4879 }
4880# endif
4881 return rc.f;
4882}
4883#endif
4884
4885
4886/**
4887 * Clears a bit range within a bitmap.
4888 *
4889 * @param pvBitmap Pointer to the bitmap.
4890 * @param iBitStart The First bit to clear.
4891 * @param iBitEnd The first bit not to clear.
4892 */
4893DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4894{
4895 if (iBitStart < iBitEnd)
4896 {
4897 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4898 int iStart = iBitStart & ~31;
4899 int iEnd = iBitEnd & ~31;
4900 if (iStart == iEnd)
4901 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
4902 else
4903 {
4904 /* bits in first dword. */
4905 if (iBitStart & 31)
4906 {
4907 *pu32 &= (1 << (iBitStart & 31)) - 1;
4908 pu32++;
4909 iBitStart = iStart + 32;
4910 }
4911
4912 /* whole dword. */
4913 if (iBitStart != iEnd)
4914 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4915
4916 /* bits in last dword. */
4917 if (iBitEnd & 31)
4918 {
4919 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4920 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
4921 }
4922 }
4923 }
4924}
4925
4926
4927/**
4928 * Sets a bit range within a bitmap.
4929 *
4930 * @param pvBitmap Pointer to the bitmap.
4931 * @param iBitStart The First bit to set.
4932 * @param iBitEnd The first bit not to set.
4933 */
4934DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4935{
4936 if (iBitStart < iBitEnd)
4937 {
4938 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4939 int iStart = iBitStart & ~31;
4940 int iEnd = iBitEnd & ~31;
4941 if (iStart == iEnd)
4942 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
4943 else
4944 {
4945 /* bits in first dword. */
4946 if (iBitStart & 31)
4947 {
4948 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
4949 pu32++;
4950 iBitStart = iStart + 32;
4951 }
4952
4953 /* whole dword. */
4954 if (iBitStart != iEnd)
4955 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
4956
4957 /* bits in last dword. */
4958 if (iBitEnd & 31)
4959 {
4960 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4961 *pu32 |= (1 << (iBitEnd & 31)) - 1;
4962 }
4963 }
4964 }
4965}
4966
4967
4968/**
4969 * Finds the first clear bit in a bitmap.
4970 *
4971 * @returns Index of the first zero bit.
4972 * @returns -1 if no clear bit was found.
4973 * @param pvBitmap Pointer to the bitmap.
4974 * @param cBits The number of bits in the bitmap. Multiple of 32.
4975 */
4976#if RT_INLINE_ASM_EXTERNAL
4977DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
4978#else
4979DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
4980{
4981 if (cBits)
4982 {
4983 int32_t iBit;
4984# if RT_INLINE_ASM_GNU_STYLE
4985 RTCCUINTREG uEAX, uECX, uEDI;
4986 cBits = RT_ALIGN_32(cBits, 32);
4987 __asm__ __volatile__("repe; scasl\n\t"
4988 "je 1f\n\t"
4989# ifdef RT_ARCH_AMD64
4990 "lea -4(%%rdi), %%rdi\n\t"
4991 "xorl (%%rdi), %%eax\n\t"
4992 "subq %5, %%rdi\n\t"
4993# else
4994 "lea -4(%%edi), %%edi\n\t"
4995 "xorl (%%edi), %%eax\n\t"
4996 "subl %5, %%edi\n\t"
4997# endif
4998 "shll $3, %%edi\n\t"
4999 "bsfl %%eax, %%edx\n\t"
5000 "addl %%edi, %%edx\n\t"
5001 "1:\t\n"
5002 : "=d" (iBit),
5003 "=&c" (uECX),
5004 "=&D" (uEDI),
5005 "=&a" (uEAX)
5006 : "0" (0xffffffff),
5007 "mr" (pvBitmap),
5008 "1" (cBits >> 5),
5009 "2" (pvBitmap),
5010 "3" (0xffffffff));
5011# else
5012 cBits = RT_ALIGN_32(cBits, 32);
5013 __asm
5014 {
5015# ifdef RT_ARCH_AMD64
5016 mov rdi, [pvBitmap]
5017 mov rbx, rdi
5018# else
5019 mov edi, [pvBitmap]
5020 mov ebx, edi
5021# endif
5022 mov edx, 0ffffffffh
5023 mov eax, edx
5024 mov ecx, [cBits]
5025 shr ecx, 5
5026 repe scasd
5027 je done
5028
5029# ifdef RT_ARCH_AMD64
5030 lea rdi, [rdi - 4]
5031 xor eax, [rdi]
5032 sub rdi, rbx
5033# else
5034 lea edi, [edi - 4]
5035 xor eax, [edi]
5036 sub edi, ebx
5037# endif
5038 shl edi, 3
5039 bsf edx, eax
5040 add edx, edi
5041 done:
5042 mov [iBit], edx
5043 }
5044# endif
5045 return iBit;
5046 }
5047 return -1;
5048}
5049#endif
5050
5051
5052/**
5053 * Finds the next clear bit in a bitmap.
5054 *
5055 * @returns Index of the first zero bit.
5056 * @returns -1 if no clear bit was found.
5057 * @param pvBitmap Pointer to the bitmap.
5058 * @param cBits The number of bits in the bitmap. Multiple of 32.
5059 * @param iBitPrev The bit returned from the last search.
5060 * The search will start at iBitPrev + 1.
5061 */
5062#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5063DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5064#else
5065DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5066{
5067 int iBit = ++iBitPrev & 31;
5068 pvBitmap = (const volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
5069 cBits -= iBitPrev & ~31;
5070 if (iBit)
5071 {
5072 /* inspect the first dword. */
5073 uint32_t u32 = (~*(const volatile uint32_t *)pvBitmap) >> iBit;
5074# if RT_INLINE_ASM_USES_INTRIN
5075 unsigned long ulBit = 0;
5076 if (_BitScanForward(&ulBit, u32))
5077 return ulBit + iBitPrev;
5078 iBit = -1;
5079# else
5080# if RT_INLINE_ASM_GNU_STYLE
5081 __asm__ __volatile__("bsf %1, %0\n\t"
5082 "jnz 1f\n\t"
5083 "movl $-1, %0\n\t"
5084 "1:\n\t"
5085 : "=r" (iBit)
5086 : "r" (u32));
5087# else
5088 __asm
5089 {
5090 mov edx, [u32]
5091 bsf eax, edx
5092 jnz done
5093 mov eax, 0ffffffffh
5094 done:
5095 mov [iBit], eax
5096 }
5097# endif
5098 if (iBit >= 0)
5099 return iBit + iBitPrev;
5100# endif
5101 /* Search the rest of the bitmap, if there is anything. */
5102 if (cBits > 32)
5103 {
5104 iBit = ASMBitFirstClear((const volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
5105 if (iBit >= 0)
5106 return iBit + (iBitPrev & ~31) + 32;
5107 }
5108 }
5109 else
5110 {
5111 /* Search the rest of the bitmap. */
5112 iBit = ASMBitFirstClear(pvBitmap, cBits);
5113 if (iBit >= 0)
5114 return iBit + (iBitPrev & ~31);
5115 }
5116 return iBit;
5117}
5118#endif
5119
5120
5121/**
5122 * Finds the first set bit in a bitmap.
5123 *
5124 * @returns Index of the first set bit.
5125 * @returns -1 if no clear bit was found.
5126 * @param pvBitmap Pointer to the bitmap.
5127 * @param cBits The number of bits in the bitmap. Multiple of 32.
5128 */
5129#if RT_INLINE_ASM_EXTERNAL
5130DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
5131#else
5132DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
5133{
5134 if (cBits)
5135 {
5136 int32_t iBit;
5137# if RT_INLINE_ASM_GNU_STYLE
5138 RTCCUINTREG uEAX, uECX, uEDI;
5139 cBits = RT_ALIGN_32(cBits, 32);
5140 __asm__ __volatile__("repe; scasl\n\t"
5141 "je 1f\n\t"
5142# ifdef RT_ARCH_AMD64
5143 "lea -4(%%rdi), %%rdi\n\t"
5144 "movl (%%rdi), %%eax\n\t"
5145 "subq %5, %%rdi\n\t"
5146# else
5147 "lea -4(%%edi), %%edi\n\t"
5148 "movl (%%edi), %%eax\n\t"
5149 "subl %5, %%edi\n\t"
5150# endif
5151 "shll $3, %%edi\n\t"
5152 "bsfl %%eax, %%edx\n\t"
5153 "addl %%edi, %%edx\n\t"
5154 "1:\t\n"
5155 : "=d" (iBit),
5156 "=&c" (uECX),
5157 "=&D" (uEDI),
5158 "=&a" (uEAX)
5159 : "0" (0xffffffff),
5160 "mr" (pvBitmap),
5161 "1" (cBits >> 5),
5162 "2" (pvBitmap),
5163 "3" (0));
5164# else
5165 cBits = RT_ALIGN_32(cBits, 32);
5166 __asm
5167 {
5168# ifdef RT_ARCH_AMD64
5169 mov rdi, [pvBitmap]
5170 mov rbx, rdi
5171# else
5172 mov edi, [pvBitmap]
5173 mov ebx, edi
5174# endif
5175 mov edx, 0ffffffffh
5176 xor eax, eax
5177 mov ecx, [cBits]
5178 shr ecx, 5
5179 repe scasd
5180 je done
5181# ifdef RT_ARCH_AMD64
5182 lea rdi, [rdi - 4]
5183 mov eax, [rdi]
5184 sub rdi, rbx
5185# else
5186 lea edi, [edi - 4]
5187 mov eax, [edi]
5188 sub edi, ebx
5189# endif
5190 shl edi, 3
5191 bsf edx, eax
5192 add edx, edi
5193 done:
5194 mov [iBit], edx
5195 }
5196# endif
5197 return iBit;
5198 }
5199 return -1;
5200}
5201#endif
5202
5203
5204/**
5205 * Finds the next set bit in a bitmap.
5206 *
5207 * @returns Index of the next set bit.
5208 * @returns -1 if no set bit was found.
5209 * @param pvBitmap Pointer to the bitmap.
5210 * @param cBits The number of bits in the bitmap. Multiple of 32.
5211 * @param iBitPrev The bit returned from the last search.
5212 * The search will start at iBitPrev + 1.
5213 */
5214#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5215DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5216#else
5217DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5218{
5219 int iBit = ++iBitPrev & 31;
5220 pvBitmap = (const volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
5221 cBits -= iBitPrev & ~31;
5222 if (iBit)
5223 {
5224 /* inspect the first dword. */
5225 uint32_t u32 = *(const volatile uint32_t *)pvBitmap >> iBit;
5226# if RT_INLINE_ASM_USES_INTRIN
5227 unsigned long ulBit = 0;
5228 if (_BitScanForward(&ulBit, u32))
5229 return ulBit + iBitPrev;
5230 iBit = -1;
5231# else
5232# if RT_INLINE_ASM_GNU_STYLE
5233 __asm__ __volatile__("bsf %1, %0\n\t"
5234 "jnz 1f\n\t"
5235 "movl $-1, %0\n\t"
5236 "1:\n\t"
5237 : "=r" (iBit)
5238 : "r" (u32));
5239# else
5240 __asm
5241 {
5242 mov edx, u32
5243 bsf eax, edx
5244 jnz done
5245 mov eax, 0ffffffffh
5246 done:
5247 mov [iBit], eax
5248 }
5249# endif
5250 if (iBit >= 0)
5251 return iBit + iBitPrev;
5252# endif
5253 /* Search the rest of the bitmap, if there is anything. */
5254 if (cBits > 32)
5255 {
5256 iBit = ASMBitFirstSet((const volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
5257 if (iBit >= 0)
5258 return iBit + (iBitPrev & ~31) + 32;
5259 }
5260
5261 }
5262 else
5263 {
5264 /* Search the rest of the bitmap. */
5265 iBit = ASMBitFirstSet(pvBitmap, cBits);
5266 if (iBit >= 0)
5267 return iBit + (iBitPrev & ~31);
5268 }
5269 return iBit;
5270}
5271#endif
5272
5273
5274/**
5275 * Finds the first bit which is set in the given 32-bit integer.
5276 * Bits are numbered from 1 (least significant) to 32.
5277 *
5278 * @returns index [1..32] of the first set bit.
5279 * @returns 0 if all bits are cleared.
5280 * @param u32 Integer to search for set bits.
5281 * @remark Similar to ffs() in BSD.
5282 */
5283DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5284{
5285# if RT_INLINE_ASM_USES_INTRIN
5286 unsigned long iBit;
5287 if (_BitScanForward(&iBit, u32))
5288 iBit++;
5289 else
5290 iBit = 0;
5291# elif RT_INLINE_ASM_GNU_STYLE
5292 uint32_t iBit;
5293 __asm__ __volatile__("bsf %1, %0\n\t"
5294 "jnz 1f\n\t"
5295 "xorl %0, %0\n\t"
5296 "jmp 2f\n"
5297 "1:\n\t"
5298 "incl %0\n"
5299 "2:\n\t"
5300 : "=r" (iBit)
5301 : "rm" (u32));
5302# else
5303 uint32_t iBit;
5304 _asm
5305 {
5306 bsf eax, [u32]
5307 jnz found
5308 xor eax, eax
5309 jmp done
5310 found:
5311 inc eax
5312 done:
5313 mov [iBit], eax
5314 }
5315# endif
5316 return iBit;
5317}
5318
5319
5320/**
5321 * Finds the first bit which is set in the given 32-bit integer.
5322 * Bits are numbered from 1 (least significant) to 32.
5323 *
5324 * @returns index [1..32] of the first set bit.
5325 * @returns 0 if all bits are cleared.
5326 * @param i32 Integer to search for set bits.
5327 * @remark Similar to ffs() in BSD.
5328 */
5329DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5330{
5331 return ASMBitFirstSetU32((uint32_t)i32);
5332}
5333
5334
5335/**
5336 * Finds the last bit which is set in the given 32-bit integer.
5337 * Bits are numbered from 1 (least significant) to 32.
5338 *
5339 * @returns index [1..32] of the last set bit.
5340 * @returns 0 if all bits are cleared.
5341 * @param u32 Integer to search for set bits.
5342 * @remark Similar to fls() in BSD.
5343 */
5344DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5345{
5346# if RT_INLINE_ASM_USES_INTRIN
5347 unsigned long iBit;
5348 if (_BitScanReverse(&iBit, u32))
5349 iBit++;
5350 else
5351 iBit = 0;
5352# elif RT_INLINE_ASM_GNU_STYLE
5353 uint32_t iBit;
5354 __asm__ __volatile__("bsrl %1, %0\n\t"
5355 "jnz 1f\n\t"
5356 "xorl %0, %0\n\t"
5357 "jmp 2f\n"
5358 "1:\n\t"
5359 "incl %0\n"
5360 "2:\n\t"
5361 : "=r" (iBit)
5362 : "rm" (u32));
5363# else
5364 uint32_t iBit;
5365 _asm
5366 {
5367 bsr eax, [u32]
5368 jnz found
5369 xor eax, eax
5370 jmp done
5371 found:
5372 inc eax
5373 done:
5374 mov [iBit], eax
5375 }
5376# endif
5377 return iBit;
5378}
5379
5380
5381/**
5382 * Finds the last bit which is set in the given 32-bit integer.
5383 * Bits are numbered from 1 (least significant) to 32.
5384 *
5385 * @returns index [1..32] of the last set bit.
5386 * @returns 0 if all bits are cleared.
5387 * @param i32 Integer to search for set bits.
5388 * @remark Similar to fls() in BSD.
5389 */
5390DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5391{
5392 return ASMBitLastSetS32((uint32_t)i32);
5393}
5394
5395
5396/**
5397 * Reverse the byte order of the given 32-bit integer.
5398 * @param u32 Integer
5399 */
5400DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5401{
5402#if RT_INLINE_ASM_USES_INTRIN
5403 u32 = _byteswap_ulong(u32);
5404#elif RT_INLINE_ASM_GNU_STYLE
5405 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5406#else
5407 _asm
5408 {
5409 mov eax, [u32]
5410 bswap eax
5411 mov [u32], eax
5412 }
5413#endif
5414 return u32;
5415}
5416
5417/** @} */
5418
5419
5420/** @} */
5421#endif
5422
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette