VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 8746

Last change on this file since 8746 was 8277, checked in by vboxsync, 17 years ago

indent. put the remarks on the group definition.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 134.0 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 */
29
30#ifndef ___iprt_asm_h
31#define ___iprt_asm_h
32
33#include <iprt/cdefs.h>
34#include <iprt/types.h>
35#include <iprt/assert.h>
36/** @todo #include <iprt/param.h> for PAGE_SIZE. */
37/** @def RT_INLINE_ASM_USES_INTRIN
38 * Defined as 1 if we're using a _MSC_VER 1400.
39 * Otherwise defined as 0.
40 */
41
42#ifdef _MSC_VER
43# if _MSC_VER >= 1400
44# define RT_INLINE_ASM_USES_INTRIN 1
45# include <intrin.h>
46 /* Emit the intrinsics at all optimization levels. */
47# pragma intrinsic(_ReadWriteBarrier)
48# pragma intrinsic(__cpuid)
49# pragma intrinsic(_enable)
50# pragma intrinsic(_disable)
51# pragma intrinsic(__rdtsc)
52# pragma intrinsic(__readmsr)
53# pragma intrinsic(__writemsr)
54# pragma intrinsic(__outbyte)
55# pragma intrinsic(__outword)
56# pragma intrinsic(__outdword)
57# pragma intrinsic(__inbyte)
58# pragma intrinsic(__inword)
59# pragma intrinsic(__indword)
60# pragma intrinsic(__invlpg)
61# pragma intrinsic(__stosd)
62# pragma intrinsic(__stosw)
63# pragma intrinsic(__stosb)
64# pragma intrinsic(__readcr0)
65# pragma intrinsic(__readcr2)
66# pragma intrinsic(__readcr3)
67# pragma intrinsic(__readcr4)
68# pragma intrinsic(__writecr0)
69# pragma intrinsic(__writecr3)
70# pragma intrinsic(__writecr4)
71# pragma intrinsic(_BitScanForward)
72# pragma intrinsic(_BitScanReverse)
73# pragma intrinsic(_bittest)
74# pragma intrinsic(_bittestandset)
75# pragma intrinsic(_bittestandreset)
76# pragma intrinsic(_bittestandcomplement)
77# pragma intrinsic(_byteswap_ushort)
78# pragma intrinsic(_byteswap_ulong)
79# pragma intrinsic(_interlockedbittestandset)
80# pragma intrinsic(_interlockedbittestandreset)
81# pragma intrinsic(_InterlockedAnd)
82# pragma intrinsic(_InterlockedOr)
83# pragma intrinsic(_InterlockedIncrement)
84# pragma intrinsic(_InterlockedDecrement)
85# pragma intrinsic(_InterlockedExchange)
86# pragma intrinsic(_InterlockedExchangeAdd)
87# pragma intrinsic(_InterlockedCompareExchange)
88# pragma intrinsic(_InterlockedCompareExchange64)
89# ifdef RT_ARCH_AMD64
90# pragma intrinsic(__stosq)
91# pragma intrinsic(__readcr8)
92# pragma intrinsic(__writecr8)
93# pragma intrinsic(_byteswap_uint64)
94# pragma intrinsic(_InterlockedExchange64)
95# endif
96# endif
97#endif
98#ifndef RT_INLINE_ASM_USES_INTRIN
99# define RT_INLINE_ASM_USES_INTRIN 0
100#endif
101
102
103
104/** @defgroup grp_asm ASM - Assembly Routines
105 * @ingroup grp_rt
106 *
107 * @remarks The difference between ordered and unordered atomic operations are that
108 * the former will complete outstanding reads and writes before continuing
109 * while the latter doesn't make any promisses about the order. Ordered
110 * operations doesn't, it seems, make any 100% promise wrt to whether
111 * the operation will complete before any subsequent memory access.
112 * (please, correct if wrong.)
113 *
114 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
115 * are unordered (note the Uo).
116 *
117 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
118 * or even optimize assembler instructions away. For instance, in the following code
119 * the second rdmsr instruction is optimized away because gcc treats that instruction
120 * as deterministic:
121 *
122 * @code
123 * static inline uint64_t rdmsr_low(int idx)
124 * {
125 * uint32_t low;
126 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
127 * }
128 * ...
129 * uint32_t msr1 = rdmsr_low(1);
130 * foo(msr1);
131 * msr1 = rdmsr_low(1);
132 * bar(msr1);
133 * @endcode
134 *
135 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
136 * use the result of the first call as input parameter for bar() as well. For rdmsr this
137 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
138 * machine status information in general.
139 *
140 * @{
141 */
142
143/** @def RT_INLINE_ASM_EXTERNAL
144 * Defined as 1 if the compiler does not support inline assembly.
145 * The ASM* functions will then be implemented in an external .asm file.
146 *
147 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
148 * inline assmebly in their AMD64 compiler.
149 */
150#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
151# define RT_INLINE_ASM_EXTERNAL 1
152#else
153# define RT_INLINE_ASM_EXTERNAL 0
154#endif
155
156/** @def RT_INLINE_ASM_GNU_STYLE
157 * Defined as 1 if the compiler understand GNU style inline assembly.
158 */
159#if defined(_MSC_VER)
160# define RT_INLINE_ASM_GNU_STYLE 0
161#else
162# define RT_INLINE_ASM_GNU_STYLE 1
163#endif
164
165
166/** @todo find a more proper place for this structure? */
167#pragma pack(1)
168/** IDTR */
169typedef struct RTIDTR
170{
171 /** Size of the IDT. */
172 uint16_t cbIdt;
173 /** Address of the IDT. */
174 uintptr_t pIdt;
175} RTIDTR, *PRTIDTR;
176#pragma pack()
177
178#pragma pack(1)
179/** GDTR */
180typedef struct RTGDTR
181{
182 /** Size of the GDT. */
183 uint16_t cbGdt;
184 /** Address of the GDT. */
185 uintptr_t pGdt;
186} RTGDTR, *PRTGDTR;
187#pragma pack()
188
189
190/** @def ASMReturnAddress
191 * Gets the return address of the current (or calling if you like) function or method.
192 */
193#ifdef _MSC_VER
194# ifdef __cplusplus
195extern "C"
196# endif
197void * _ReturnAddress(void);
198# pragma intrinsic(_ReturnAddress)
199# define ASMReturnAddress() _ReturnAddress()
200#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
201# define ASMReturnAddress() __builtin_return_address(0)
202#else
203# error "Unsupported compiler."
204#endif
205
206
207/**
208 * Gets the content of the IDTR CPU register.
209 * @param pIdtr Where to store the IDTR contents.
210 */
211#if RT_INLINE_ASM_EXTERNAL
212DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
213#else
214DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
215{
216# if RT_INLINE_ASM_GNU_STYLE
217 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
218# else
219 __asm
220 {
221# ifdef RT_ARCH_AMD64
222 mov rax, [pIdtr]
223 sidt [rax]
224# else
225 mov eax, [pIdtr]
226 sidt [eax]
227# endif
228 }
229# endif
230}
231#endif
232
233
234/**
235 * Sets the content of the IDTR CPU register.
236 * @param pIdtr Where to load the IDTR contents from
237 */
238#if RT_INLINE_ASM_EXTERNAL
239DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
240#else
241DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
242{
243# if RT_INLINE_ASM_GNU_STYLE
244 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
245# else
246 __asm
247 {
248# ifdef RT_ARCH_AMD64
249 mov rax, [pIdtr]
250 lidt [rax]
251# else
252 mov eax, [pIdtr]
253 lidt [eax]
254# endif
255 }
256# endif
257}
258#endif
259
260
261/**
262 * Gets the content of the GDTR CPU register.
263 * @param pGdtr Where to store the GDTR contents.
264 */
265#if RT_INLINE_ASM_EXTERNAL
266DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
267#else
268DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
269{
270# if RT_INLINE_ASM_GNU_STYLE
271 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
272# else
273 __asm
274 {
275# ifdef RT_ARCH_AMD64
276 mov rax, [pGdtr]
277 sgdt [rax]
278# else
279 mov eax, [pGdtr]
280 sgdt [eax]
281# endif
282 }
283# endif
284}
285#endif
286
287/**
288 * Get the cs register.
289 * @returns cs.
290 */
291#if RT_INLINE_ASM_EXTERNAL
292DECLASM(RTSEL) ASMGetCS(void);
293#else
294DECLINLINE(RTSEL) ASMGetCS(void)
295{
296 RTSEL SelCS;
297# if RT_INLINE_ASM_GNU_STYLE
298 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
299# else
300 __asm
301 {
302 mov ax, cs
303 mov [SelCS], ax
304 }
305# endif
306 return SelCS;
307}
308#endif
309
310
311/**
312 * Get the DS register.
313 * @returns DS.
314 */
315#if RT_INLINE_ASM_EXTERNAL
316DECLASM(RTSEL) ASMGetDS(void);
317#else
318DECLINLINE(RTSEL) ASMGetDS(void)
319{
320 RTSEL SelDS;
321# if RT_INLINE_ASM_GNU_STYLE
322 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
323# else
324 __asm
325 {
326 mov ax, ds
327 mov [SelDS], ax
328 }
329# endif
330 return SelDS;
331}
332#endif
333
334
335/**
336 * Get the ES register.
337 * @returns ES.
338 */
339#if RT_INLINE_ASM_EXTERNAL
340DECLASM(RTSEL) ASMGetES(void);
341#else
342DECLINLINE(RTSEL) ASMGetES(void)
343{
344 RTSEL SelES;
345# if RT_INLINE_ASM_GNU_STYLE
346 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
347# else
348 __asm
349 {
350 mov ax, es
351 mov [SelES], ax
352 }
353# endif
354 return SelES;
355}
356#endif
357
358
359/**
360 * Get the FS register.
361 * @returns FS.
362 */
363#if RT_INLINE_ASM_EXTERNAL
364DECLASM(RTSEL) ASMGetFS(void);
365#else
366DECLINLINE(RTSEL) ASMGetFS(void)
367{
368 RTSEL SelFS;
369# if RT_INLINE_ASM_GNU_STYLE
370 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
371# else
372 __asm
373 {
374 mov ax, fs
375 mov [SelFS], ax
376 }
377# endif
378 return SelFS;
379}
380# endif
381
382
383/**
384 * Get the GS register.
385 * @returns GS.
386 */
387#if RT_INLINE_ASM_EXTERNAL
388DECLASM(RTSEL) ASMGetGS(void);
389#else
390DECLINLINE(RTSEL) ASMGetGS(void)
391{
392 RTSEL SelGS;
393# if RT_INLINE_ASM_GNU_STYLE
394 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
395# else
396 __asm
397 {
398 mov ax, gs
399 mov [SelGS], ax
400 }
401# endif
402 return SelGS;
403}
404#endif
405
406
407/**
408 * Get the SS register.
409 * @returns SS.
410 */
411#if RT_INLINE_ASM_EXTERNAL
412DECLASM(RTSEL) ASMGetSS(void);
413#else
414DECLINLINE(RTSEL) ASMGetSS(void)
415{
416 RTSEL SelSS;
417# if RT_INLINE_ASM_GNU_STYLE
418 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
419# else
420 __asm
421 {
422 mov ax, ss
423 mov [SelSS], ax
424 }
425# endif
426 return SelSS;
427}
428#endif
429
430
431/**
432 * Get the TR register.
433 * @returns TR.
434 */
435#if RT_INLINE_ASM_EXTERNAL
436DECLASM(RTSEL) ASMGetTR(void);
437#else
438DECLINLINE(RTSEL) ASMGetTR(void)
439{
440 RTSEL SelTR;
441# if RT_INLINE_ASM_GNU_STYLE
442 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
443# else
444 __asm
445 {
446 str ax
447 mov [SelTR], ax
448 }
449# endif
450 return SelTR;
451}
452#endif
453
454
455/**
456 * Get the [RE]FLAGS register.
457 * @returns [RE]FLAGS.
458 */
459#if RT_INLINE_ASM_EXTERNAL
460DECLASM(RTCCUINTREG) ASMGetFlags(void);
461#else
462DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
463{
464 RTCCUINTREG uFlags;
465# if RT_INLINE_ASM_GNU_STYLE
466# ifdef RT_ARCH_AMD64
467 __asm__ __volatile__("pushfq\n\t"
468 "popq %0\n\t"
469 : "=g" (uFlags));
470# else
471 __asm__ __volatile__("pushfl\n\t"
472 "popl %0\n\t"
473 : "=g" (uFlags));
474# endif
475# else
476 __asm
477 {
478# ifdef RT_ARCH_AMD64
479 pushfq
480 pop [uFlags]
481# else
482 pushfd
483 pop [uFlags]
484# endif
485 }
486# endif
487 return uFlags;
488}
489#endif
490
491
492/**
493 * Set the [RE]FLAGS register.
494 * @param uFlags The new [RE]FLAGS value.
495 */
496#if RT_INLINE_ASM_EXTERNAL
497DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
498#else
499DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
500{
501# if RT_INLINE_ASM_GNU_STYLE
502# ifdef RT_ARCH_AMD64
503 __asm__ __volatile__("pushq %0\n\t"
504 "popfq\n\t"
505 : : "g" (uFlags));
506# else
507 __asm__ __volatile__("pushl %0\n\t"
508 "popfl\n\t"
509 : : "g" (uFlags));
510# endif
511# else
512 __asm
513 {
514# ifdef RT_ARCH_AMD64
515 push [uFlags]
516 popfq
517# else
518 push [uFlags]
519 popfd
520# endif
521 }
522# endif
523}
524#endif
525
526
527/**
528 * Gets the content of the CPU timestamp counter register.
529 *
530 * @returns TSC.
531 */
532#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
533DECLASM(uint64_t) ASMReadTSC(void);
534#else
535DECLINLINE(uint64_t) ASMReadTSC(void)
536{
537 RTUINT64U u;
538# if RT_INLINE_ASM_GNU_STYLE
539 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
540# else
541# if RT_INLINE_ASM_USES_INTRIN
542 u.u = __rdtsc();
543# else
544 __asm
545 {
546 rdtsc
547 mov [u.s.Lo], eax
548 mov [u.s.Hi], edx
549 }
550# endif
551# endif
552 return u.u;
553}
554#endif
555
556
557/**
558 * Performs the cpuid instruction returning all registers.
559 *
560 * @param uOperator CPUID operation (eax).
561 * @param pvEAX Where to store eax.
562 * @param pvEBX Where to store ebx.
563 * @param pvECX Where to store ecx.
564 * @param pvEDX Where to store edx.
565 * @remark We're using void pointers to ease the use of special bitfield structures and such.
566 */
567#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
568DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
569#else
570DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
571{
572# if RT_INLINE_ASM_GNU_STYLE
573# ifdef RT_ARCH_AMD64
574 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
575 __asm__ ("cpuid\n\t"
576 : "=a" (uRAX),
577 "=b" (uRBX),
578 "=c" (uRCX),
579 "=d" (uRDX)
580 : "0" (uOperator));
581 *(uint32_t *)pvEAX = (uint32_t)uRAX;
582 *(uint32_t *)pvEBX = (uint32_t)uRBX;
583 *(uint32_t *)pvECX = (uint32_t)uRCX;
584 *(uint32_t *)pvEDX = (uint32_t)uRDX;
585# else
586 __asm__ ("xchgl %%ebx, %1\n\t"
587 "cpuid\n\t"
588 "xchgl %%ebx, %1\n\t"
589 : "=a" (*(uint32_t *)pvEAX),
590 "=r" (*(uint32_t *)pvEBX),
591 "=c" (*(uint32_t *)pvECX),
592 "=d" (*(uint32_t *)pvEDX)
593 : "0" (uOperator));
594# endif
595
596# elif RT_INLINE_ASM_USES_INTRIN
597 int aInfo[4];
598 __cpuid(aInfo, uOperator);
599 *(uint32_t *)pvEAX = aInfo[0];
600 *(uint32_t *)pvEBX = aInfo[1];
601 *(uint32_t *)pvECX = aInfo[2];
602 *(uint32_t *)pvEDX = aInfo[3];
603
604# else
605 uint32_t uEAX;
606 uint32_t uEBX;
607 uint32_t uECX;
608 uint32_t uEDX;
609 __asm
610 {
611 push ebx
612 mov eax, [uOperator]
613 cpuid
614 mov [uEAX], eax
615 mov [uEBX], ebx
616 mov [uECX], ecx
617 mov [uEDX], edx
618 pop ebx
619 }
620 *(uint32_t *)pvEAX = uEAX;
621 *(uint32_t *)pvEBX = uEBX;
622 *(uint32_t *)pvECX = uECX;
623 *(uint32_t *)pvEDX = uEDX;
624# endif
625}
626#endif
627
628
629/**
630 * Performs the cpuid instruction returning all registers.
631 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
632 *
633 * @param uOperator CPUID operation (eax).
634 * @param uIdxECX ecx index
635 * @param pvEAX Where to store eax.
636 * @param pvEBX Where to store ebx.
637 * @param pvECX Where to store ecx.
638 * @param pvEDX Where to store edx.
639 * @remark We're using void pointers to ease the use of special bitfield structures and such.
640 */
641#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
642DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
643#else
644DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
645{
646# if RT_INLINE_ASM_GNU_STYLE
647# ifdef RT_ARCH_AMD64
648 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
649 __asm__ ("cpuid\n\t"
650 : "=a" (uRAX),
651 "=b" (uRBX),
652 "=c" (uRCX),
653 "=d" (uRDX)
654 : "0" (uOperator),
655 "2" (uIdxECX));
656 *(uint32_t *)pvEAX = (uint32_t)uRAX;
657 *(uint32_t *)pvEBX = (uint32_t)uRBX;
658 *(uint32_t *)pvECX = (uint32_t)uRCX;
659 *(uint32_t *)pvEDX = (uint32_t)uRDX;
660# else
661 __asm__ ("xchgl %%ebx, %1\n\t"
662 "cpuid\n\t"
663 "xchgl %%ebx, %1\n\t"
664 : "=a" (*(uint32_t *)pvEAX),
665 "=r" (*(uint32_t *)pvEBX),
666 "=c" (*(uint32_t *)pvECX),
667 "=d" (*(uint32_t *)pvEDX)
668 : "0" (uOperator),
669 "2" (uIdxECX));
670# endif
671
672# elif RT_INLINE_ASM_USES_INTRIN
673 int aInfo[4];
674 /* ??? another intrinsic ??? */
675 __cpuid(aInfo, uOperator);
676 *(uint32_t *)pvEAX = aInfo[0];
677 *(uint32_t *)pvEBX = aInfo[1];
678 *(uint32_t *)pvECX = aInfo[2];
679 *(uint32_t *)pvEDX = aInfo[3];
680
681# else
682 uint32_t uEAX;
683 uint32_t uEBX;
684 uint32_t uECX;
685 uint32_t uEDX;
686 __asm
687 {
688 push ebx
689 mov eax, [uOperator]
690 mov ecx, [uIdxECX]
691 cpuid
692 mov [uEAX], eax
693 mov [uEBX], ebx
694 mov [uECX], ecx
695 mov [uEDX], edx
696 pop ebx
697 }
698 *(uint32_t *)pvEAX = uEAX;
699 *(uint32_t *)pvEBX = uEBX;
700 *(uint32_t *)pvECX = uECX;
701 *(uint32_t *)pvEDX = uEDX;
702# endif
703}
704#endif
705
706
707/**
708 * Performs the cpuid instruction returning ecx and edx.
709 *
710 * @param uOperator CPUID operation (eax).
711 * @param pvECX Where to store ecx.
712 * @param pvEDX Where to store edx.
713 * @remark We're using void pointers to ease the use of special bitfield structures and such.
714 */
715#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
716DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
717#else
718DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
719{
720 uint32_t uEBX;
721 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
722}
723#endif
724
725
726/**
727 * Performs the cpuid instruction returning edx.
728 *
729 * @param uOperator CPUID operation (eax).
730 * @returns EDX after cpuid operation.
731 */
732#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
733DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
734#else
735DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
736{
737 RTCCUINTREG xDX;
738# if RT_INLINE_ASM_GNU_STYLE
739# ifdef RT_ARCH_AMD64
740 RTCCUINTREG uSpill;
741 __asm__ ("cpuid"
742 : "=a" (uSpill),
743 "=d" (xDX)
744 : "0" (uOperator)
745 : "rbx", "rcx");
746# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: PIC by default. */
747 __asm__ ("push %%ebx\n\t"
748 "cpuid\n\t"
749 "pop %%ebx\n\t"
750 : "=a" (uOperator),
751 "=d" (xDX)
752 : "0" (uOperator)
753 : "ecx");
754# else
755 __asm__ ("cpuid"
756 : "=a" (uOperator),
757 "=d" (xDX)
758 : "0" (uOperator)
759 : "ebx", "ecx");
760# endif
761
762# elif RT_INLINE_ASM_USES_INTRIN
763 int aInfo[4];
764 __cpuid(aInfo, uOperator);
765 xDX = aInfo[3];
766
767# else
768 __asm
769 {
770 push ebx
771 mov eax, [uOperator]
772 cpuid
773 mov [xDX], edx
774 pop ebx
775 }
776# endif
777 return (uint32_t)xDX;
778}
779#endif
780
781
782/**
783 * Performs the cpuid instruction returning ecx.
784 *
785 * @param uOperator CPUID operation (eax).
786 * @returns ECX after cpuid operation.
787 */
788#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
789DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
790#else
791DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
792{
793 RTCCUINTREG xCX;
794# if RT_INLINE_ASM_GNU_STYLE
795# ifdef RT_ARCH_AMD64
796 RTCCUINTREG uSpill;
797 __asm__ ("cpuid"
798 : "=a" (uSpill),
799 "=c" (xCX)
800 : "0" (uOperator)
801 : "rbx", "rdx");
802# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
803 __asm__ ("push %%ebx\n\t"
804 "cpuid\n\t"
805 "pop %%ebx\n\t"
806 : "=a" (uOperator),
807 "=c" (xCX)
808 : "0" (uOperator)
809 : "edx");
810# else
811 __asm__ ("cpuid"
812 : "=a" (uOperator),
813 "=c" (xCX)
814 : "0" (uOperator)
815 : "ebx", "edx");
816
817# endif
818
819# elif RT_INLINE_ASM_USES_INTRIN
820 int aInfo[4];
821 __cpuid(aInfo, uOperator);
822 xCX = aInfo[2];
823
824# else
825 __asm
826 {
827 push ebx
828 mov eax, [uOperator]
829 cpuid
830 mov [xCX], ecx
831 pop ebx
832 }
833# endif
834 return (uint32_t)xCX;
835}
836#endif
837
838
839/**
840 * Checks if the current CPU supports CPUID.
841 *
842 * @returns true if CPUID is supported.
843 */
844DECLINLINE(bool) ASMHasCpuId(void)
845{
846#ifdef RT_ARCH_AMD64
847 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
848#else /* !RT_ARCH_AMD64 */
849 bool fRet = false;
850# if RT_INLINE_ASM_GNU_STYLE
851 uint32_t u1;
852 uint32_t u2;
853 __asm__ ("pushf\n\t"
854 "pop %1\n\t"
855 "mov %1, %2\n\t"
856 "xorl $0x200000, %1\n\t"
857 "push %1\n\t"
858 "popf\n\t"
859 "pushf\n\t"
860 "pop %1\n\t"
861 "cmpl %1, %2\n\t"
862 "setne %0\n\t"
863 "push %2\n\t"
864 "popf\n\t"
865 : "=m" (fRet), "=r" (u1), "=r" (u2));
866# else
867 __asm
868 {
869 pushfd
870 pop eax
871 mov ebx, eax
872 xor eax, 0200000h
873 push eax
874 popfd
875 pushfd
876 pop eax
877 cmp eax, ebx
878 setne fRet
879 push ebx
880 popfd
881 }
882# endif
883 return fRet;
884#endif /* !RT_ARCH_AMD64 */
885}
886
887
888/**
889 * Gets the APIC ID of the current CPU.
890 *
891 * @returns the APIC ID.
892 */
893#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
894DECLASM(uint8_t) ASMGetApicId(void);
895#else
896DECLINLINE(uint8_t) ASMGetApicId(void)
897{
898 RTCCUINTREG xBX;
899# if RT_INLINE_ASM_GNU_STYLE
900# ifdef RT_ARCH_AMD64
901 RTCCUINTREG uSpill;
902 __asm__ ("cpuid"
903 : "=a" (uSpill),
904 "=b" (xBX)
905 : "0" (1)
906 : "rcx", "rdx");
907# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__)
908 RTCCUINTREG uSpill;
909 __asm__ ("mov %%ebx,%1\n\t"
910 "cpuid\n\t"
911 "xchgl %%ebx,%1\n\t"
912 : "=a" (uSpill),
913 "=r" (xBX)
914 : "0" (1)
915 : "ecx", "edx");
916# else
917 RTCCUINTREG uSpill;
918 __asm__ ("cpuid"
919 : "=a" (uSpill),
920 "=b" (xBX)
921 : "0" (1)
922 : "ecx", "edx");
923# endif
924
925# elif RT_INLINE_ASM_USES_INTRIN
926 int aInfo[4];
927 __cpuid(aInfo, 1);
928 xBX = aInfo[1];
929
930# else
931 __asm
932 {
933 push ebx
934 mov eax, 1
935 cpuid
936 mov [xBX], ebx
937 pop ebx
938 }
939# endif
940 return (uint8_t)(xBX >> 24);
941}
942#endif
943
944/**
945 * Get cr0.
946 * @returns cr0.
947 */
948#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
949DECLASM(RTCCUINTREG) ASMGetCR0(void);
950#else
951DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
952{
953 RTCCUINTREG uCR0;
954# if RT_INLINE_ASM_USES_INTRIN
955 uCR0 = __readcr0();
956
957# elif RT_INLINE_ASM_GNU_STYLE
958# ifdef RT_ARCH_AMD64
959 __asm__ __volatile__("movq %%cr0, %0\t\n" : "=r" (uCR0));
960# else
961 __asm__ __volatile__("movl %%cr0, %0\t\n" : "=r" (uCR0));
962# endif
963# else
964 __asm
965 {
966# ifdef RT_ARCH_AMD64
967 mov rax, cr0
968 mov [uCR0], rax
969# else
970 mov eax, cr0
971 mov [uCR0], eax
972# endif
973 }
974# endif
975 return uCR0;
976}
977#endif
978
979
980/**
981 * Sets the CR0 register.
982 * @param uCR0 The new CR0 value.
983 */
984#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
985DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
986#else
987DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
988{
989# if RT_INLINE_ASM_USES_INTRIN
990 __writecr0(uCR0);
991
992# elif RT_INLINE_ASM_GNU_STYLE
993# ifdef RT_ARCH_AMD64
994 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
995# else
996 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
997# endif
998# else
999 __asm
1000 {
1001# ifdef RT_ARCH_AMD64
1002 mov rax, [uCR0]
1003 mov cr0, rax
1004# else
1005 mov eax, [uCR0]
1006 mov cr0, eax
1007# endif
1008 }
1009# endif
1010}
1011#endif
1012
1013
1014/**
1015 * Get cr2.
1016 * @returns cr2.
1017 */
1018#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1019DECLASM(RTCCUINTREG) ASMGetCR2(void);
1020#else
1021DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
1022{
1023 RTCCUINTREG uCR2;
1024# if RT_INLINE_ASM_USES_INTRIN
1025 uCR2 = __readcr2();
1026
1027# elif RT_INLINE_ASM_GNU_STYLE
1028# ifdef RT_ARCH_AMD64
1029 __asm__ __volatile__("movq %%cr2, %0\t\n" : "=r" (uCR2));
1030# else
1031 __asm__ __volatile__("movl %%cr2, %0\t\n" : "=r" (uCR2));
1032# endif
1033# else
1034 __asm
1035 {
1036# ifdef RT_ARCH_AMD64
1037 mov rax, cr2
1038 mov [uCR2], rax
1039# else
1040 mov eax, cr2
1041 mov [uCR2], eax
1042# endif
1043 }
1044# endif
1045 return uCR2;
1046}
1047#endif
1048
1049
1050/**
1051 * Sets the CR2 register.
1052 * @param uCR2 The new CR0 value.
1053 */
1054#if RT_INLINE_ASM_EXTERNAL
1055DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1056#else
1057DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1058{
1059# if RT_INLINE_ASM_GNU_STYLE
1060# ifdef RT_ARCH_AMD64
1061 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1062# else
1063 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1064# endif
1065# else
1066 __asm
1067 {
1068# ifdef RT_ARCH_AMD64
1069 mov rax, [uCR2]
1070 mov cr2, rax
1071# else
1072 mov eax, [uCR2]
1073 mov cr2, eax
1074# endif
1075 }
1076# endif
1077}
1078#endif
1079
1080
1081/**
1082 * Get cr3.
1083 * @returns cr3.
1084 */
1085#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1086DECLASM(RTCCUINTREG) ASMGetCR3(void);
1087#else
1088DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1089{
1090 RTCCUINTREG uCR3;
1091# if RT_INLINE_ASM_USES_INTRIN
1092 uCR3 = __readcr3();
1093
1094# elif RT_INLINE_ASM_GNU_STYLE
1095# ifdef RT_ARCH_AMD64
1096 __asm__ __volatile__("movq %%cr3, %0\t\n" : "=r" (uCR3));
1097# else
1098 __asm__ __volatile__("movl %%cr3, %0\t\n" : "=r" (uCR3));
1099# endif
1100# else
1101 __asm
1102 {
1103# ifdef RT_ARCH_AMD64
1104 mov rax, cr3
1105 mov [uCR3], rax
1106# else
1107 mov eax, cr3
1108 mov [uCR3], eax
1109# endif
1110 }
1111# endif
1112 return uCR3;
1113}
1114#endif
1115
1116
1117/**
1118 * Sets the CR3 register.
1119 *
1120 * @param uCR3 New CR3 value.
1121 */
1122#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1123DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1124#else
1125DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1126{
1127# if RT_INLINE_ASM_USES_INTRIN
1128 __writecr3(uCR3);
1129
1130# elif RT_INLINE_ASM_GNU_STYLE
1131# ifdef RT_ARCH_AMD64
1132 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1133# else
1134 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1135# endif
1136# else
1137 __asm
1138 {
1139# ifdef RT_ARCH_AMD64
1140 mov rax, [uCR3]
1141 mov cr3, rax
1142# else
1143 mov eax, [uCR3]
1144 mov cr3, eax
1145# endif
1146 }
1147# endif
1148}
1149#endif
1150
1151
1152/**
1153 * Reloads the CR3 register.
1154 */
1155#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1156DECLASM(void) ASMReloadCR3(void);
1157#else
1158DECLINLINE(void) ASMReloadCR3(void)
1159{
1160# if RT_INLINE_ASM_USES_INTRIN
1161 __writecr3(__readcr3());
1162
1163# elif RT_INLINE_ASM_GNU_STYLE
1164 RTCCUINTREG u;
1165# ifdef RT_ARCH_AMD64
1166 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1167 "movq %0, %%cr3\n\t"
1168 : "=r" (u));
1169# else
1170 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1171 "movl %0, %%cr3\n\t"
1172 : "=r" (u));
1173# endif
1174# else
1175 __asm
1176 {
1177# ifdef RT_ARCH_AMD64
1178 mov rax, cr3
1179 mov cr3, rax
1180# else
1181 mov eax, cr3
1182 mov cr3, eax
1183# endif
1184 }
1185# endif
1186}
1187#endif
1188
1189
1190/**
1191 * Get cr4.
1192 * @returns cr4.
1193 */
1194#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1195DECLASM(RTCCUINTREG) ASMGetCR4(void);
1196#else
1197DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1198{
1199 RTCCUINTREG uCR4;
1200# if RT_INLINE_ASM_USES_INTRIN
1201 uCR4 = __readcr4();
1202
1203# elif RT_INLINE_ASM_GNU_STYLE
1204# ifdef RT_ARCH_AMD64
1205 __asm__ __volatile__("movq %%cr4, %0\t\n" : "=r" (uCR4));
1206# else
1207 __asm__ __volatile__("movl %%cr4, %0\t\n" : "=r" (uCR4));
1208# endif
1209# else
1210 __asm
1211 {
1212# ifdef RT_ARCH_AMD64
1213 mov rax, cr4
1214 mov [uCR4], rax
1215# else
1216 push eax /* just in case */
1217 /*mov eax, cr4*/
1218 _emit 0x0f
1219 _emit 0x20
1220 _emit 0xe0
1221 mov [uCR4], eax
1222 pop eax
1223# endif
1224 }
1225# endif
1226 return uCR4;
1227}
1228#endif
1229
1230
1231/**
1232 * Sets the CR4 register.
1233 *
1234 * @param uCR4 New CR4 value.
1235 */
1236#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1237DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1238#else
1239DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1240{
1241# if RT_INLINE_ASM_USES_INTRIN
1242 __writecr4(uCR4);
1243
1244# elif RT_INLINE_ASM_GNU_STYLE
1245# ifdef RT_ARCH_AMD64
1246 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1247# else
1248 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1249# endif
1250# else
1251 __asm
1252 {
1253# ifdef RT_ARCH_AMD64
1254 mov rax, [uCR4]
1255 mov cr4, rax
1256# else
1257 mov eax, [uCR4]
1258 _emit 0x0F
1259 _emit 0x22
1260 _emit 0xE0 /* mov cr4, eax */
1261# endif
1262 }
1263# endif
1264}
1265#endif
1266
1267
1268/**
1269 * Get cr8.
1270 * @returns cr8.
1271 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1272 */
1273#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1274DECLASM(RTCCUINTREG) ASMGetCR8(void);
1275#else
1276DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1277{
1278# ifdef RT_ARCH_AMD64
1279 RTCCUINTREG uCR8;
1280# if RT_INLINE_ASM_USES_INTRIN
1281 uCR8 = __readcr8();
1282
1283# elif RT_INLINE_ASM_GNU_STYLE
1284 __asm__ __volatile__("movq %%cr8, %0\t\n" : "=r" (uCR8));
1285# else
1286 __asm
1287 {
1288 mov rax, cr8
1289 mov [uCR8], rax
1290 }
1291# endif
1292 return uCR8;
1293# else /* !RT_ARCH_AMD64 */
1294 return 0;
1295# endif /* !RT_ARCH_AMD64 */
1296}
1297#endif
1298
1299
1300/**
1301 * Enables interrupts (EFLAGS.IF).
1302 */
1303#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1304DECLASM(void) ASMIntEnable(void);
1305#else
1306DECLINLINE(void) ASMIntEnable(void)
1307{
1308# if RT_INLINE_ASM_GNU_STYLE
1309 __asm("sti\n");
1310# elif RT_INLINE_ASM_USES_INTRIN
1311 _enable();
1312# else
1313 __asm sti
1314# endif
1315}
1316#endif
1317
1318
1319/**
1320 * Disables interrupts (!EFLAGS.IF).
1321 */
1322#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1323DECLASM(void) ASMIntDisable(void);
1324#else
1325DECLINLINE(void) ASMIntDisable(void)
1326{
1327# if RT_INLINE_ASM_GNU_STYLE
1328 __asm("cli\n");
1329# elif RT_INLINE_ASM_USES_INTRIN
1330 _disable();
1331# else
1332 __asm cli
1333# endif
1334}
1335#endif
1336
1337
1338/**
1339 * Disables interrupts and returns previous xFLAGS.
1340 */
1341#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1342DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1343#else
1344DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1345{
1346 RTCCUINTREG xFlags;
1347# if RT_INLINE_ASM_GNU_STYLE
1348# ifdef RT_ARCH_AMD64
1349 __asm__ __volatile__("pushfq\n\t"
1350 "cli\n\t"
1351 "popq %0\n\t"
1352 : "=rm" (xFlags));
1353# else
1354 __asm__ __volatile__("pushfl\n\t"
1355 "cli\n\t"
1356 "popl %0\n\t"
1357 : "=rm" (xFlags));
1358# endif
1359# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1360 xFlags = ASMGetFlags();
1361 _disable();
1362# else
1363 __asm {
1364 pushfd
1365 cli
1366 pop [xFlags]
1367 }
1368# endif
1369 return xFlags;
1370}
1371#endif
1372
1373
1374/**
1375 * Reads a machine specific register.
1376 *
1377 * @returns Register content.
1378 * @param uRegister Register to read.
1379 */
1380#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1381DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1382#else
1383DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1384{
1385 RTUINT64U u;
1386# if RT_INLINE_ASM_GNU_STYLE
1387 __asm__ __volatile__("rdmsr\n\t"
1388 : "=a" (u.s.Lo),
1389 "=d" (u.s.Hi)
1390 : "c" (uRegister));
1391
1392# elif RT_INLINE_ASM_USES_INTRIN
1393 u.u = __readmsr(uRegister);
1394
1395# else
1396 __asm
1397 {
1398 mov ecx, [uRegister]
1399 rdmsr
1400 mov [u.s.Lo], eax
1401 mov [u.s.Hi], edx
1402 }
1403# endif
1404
1405 return u.u;
1406}
1407#endif
1408
1409
1410/**
1411 * Writes a machine specific register.
1412 *
1413 * @returns Register content.
1414 * @param uRegister Register to write to.
1415 * @param u64Val Value to write.
1416 */
1417#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1418DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1419#else
1420DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1421{
1422 RTUINT64U u;
1423
1424 u.u = u64Val;
1425# if RT_INLINE_ASM_GNU_STYLE
1426 __asm__ __volatile__("wrmsr\n\t"
1427 ::"a" (u.s.Lo),
1428 "d" (u.s.Hi),
1429 "c" (uRegister));
1430
1431# elif RT_INLINE_ASM_USES_INTRIN
1432 __writemsr(uRegister, u.u);
1433
1434# else
1435 __asm
1436 {
1437 mov ecx, [uRegister]
1438 mov edx, [u.s.Hi]
1439 mov eax, [u.s.Lo]
1440 wrmsr
1441 }
1442# endif
1443}
1444#endif
1445
1446
1447/**
1448 * Reads low part of a machine specific register.
1449 *
1450 * @returns Register content.
1451 * @param uRegister Register to read.
1452 */
1453#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1454DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1455#else
1456DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1457{
1458 uint32_t u32;
1459# if RT_INLINE_ASM_GNU_STYLE
1460 __asm__ __volatile__("rdmsr\n\t"
1461 : "=a" (u32)
1462 : "c" (uRegister)
1463 : "edx");
1464
1465# elif RT_INLINE_ASM_USES_INTRIN
1466 u32 = (uint32_t)__readmsr(uRegister);
1467
1468#else
1469 __asm
1470 {
1471 mov ecx, [uRegister]
1472 rdmsr
1473 mov [u32], eax
1474 }
1475# endif
1476
1477 return u32;
1478}
1479#endif
1480
1481
1482/**
1483 * Reads high part of a machine specific register.
1484 *
1485 * @returns Register content.
1486 * @param uRegister Register to read.
1487 */
1488#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1489DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1490#else
1491DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1492{
1493 uint32_t u32;
1494# if RT_INLINE_ASM_GNU_STYLE
1495 __asm__ __volatile__("rdmsr\n\t"
1496 : "=d" (u32)
1497 : "c" (uRegister)
1498 : "eax");
1499
1500# elif RT_INLINE_ASM_USES_INTRIN
1501 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1502
1503# else
1504 __asm
1505 {
1506 mov ecx, [uRegister]
1507 rdmsr
1508 mov [u32], edx
1509 }
1510# endif
1511
1512 return u32;
1513}
1514#endif
1515
1516
1517/**
1518 * Gets dr7.
1519 *
1520 * @returns dr7.
1521 */
1522#if RT_INLINE_ASM_EXTERNAL
1523DECLASM(RTCCUINTREG) ASMGetDR7(void);
1524#else
1525DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1526{
1527 RTCCUINTREG uDR7;
1528# if RT_INLINE_ASM_GNU_STYLE
1529# ifdef RT_ARCH_AMD64
1530 __asm__ __volatile__("movq %%dr7, %0\n\t" : "=r" (uDR7));
1531# else
1532 __asm__ __volatile__("movl %%dr7, %0\n\t" : "=r" (uDR7));
1533# endif
1534# else
1535 __asm
1536 {
1537# ifdef RT_ARCH_AMD64
1538 mov rax, dr7
1539 mov [uDR7], rax
1540# else
1541 mov eax, dr7
1542 mov [uDR7], eax
1543# endif
1544 }
1545# endif
1546 return uDR7;
1547}
1548#endif
1549
1550
1551/**
1552 * Gets dr6.
1553 *
1554 * @returns dr6.
1555 */
1556#if RT_INLINE_ASM_EXTERNAL
1557DECLASM(RTCCUINTREG) ASMGetDR6(void);
1558#else
1559DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1560{
1561 RTCCUINTREG uDR6;
1562# if RT_INLINE_ASM_GNU_STYLE
1563# ifdef RT_ARCH_AMD64
1564 __asm__ __volatile__("movq %%dr6, %0\n\t" : "=r" (uDR6));
1565# else
1566 __asm__ __volatile__("movl %%dr6, %0\n\t" : "=r" (uDR6));
1567# endif
1568# else
1569 __asm
1570 {
1571# ifdef RT_ARCH_AMD64
1572 mov rax, dr6
1573 mov [uDR6], rax
1574# else
1575 mov eax, dr6
1576 mov [uDR6], eax
1577# endif
1578 }
1579# endif
1580 return uDR6;
1581}
1582#endif
1583
1584
1585/**
1586 * Reads and clears DR6.
1587 *
1588 * @returns DR6.
1589 */
1590#if RT_INLINE_ASM_EXTERNAL
1591DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1592#else
1593DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1594{
1595 RTCCUINTREG uDR6;
1596# if RT_INLINE_ASM_GNU_STYLE
1597 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1598# ifdef RT_ARCH_AMD64
1599 __asm__ __volatile__("movq %%dr6, %0\n\t"
1600 "movq %1, %%dr6\n\t"
1601 : "=r" (uDR6)
1602 : "r" (uNewValue));
1603# else
1604 __asm__ __volatile__("movl %%dr6, %0\n\t"
1605 "movl %1, %%dr6\n\t"
1606 : "=r" (uDR6)
1607 : "r" (uNewValue));
1608# endif
1609# else
1610 __asm
1611 {
1612# ifdef RT_ARCH_AMD64
1613 mov rax, dr6
1614 mov [uDR6], rax
1615 mov rcx, rax
1616 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1617 mov dr6, rcx
1618# else
1619 mov eax, dr6
1620 mov [uDR6], eax
1621 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1622 mov dr6, ecx
1623# endif
1624 }
1625# endif
1626 return uDR6;
1627}
1628#endif
1629
1630
1631/**
1632 * Compiler memory barrier.
1633 *
1634 * Ensure that the compiler does not use any cached (register/tmp stack) memory
1635 * values or any outstanding writes when returning from this function.
1636 *
1637 * This function must be used if non-volatile data is modified by a
1638 * device or the VMM. Typical cases are port access, MMIO access,
1639 * trapping instruction, etc.
1640 */
1641#if RT_INLINE_ASM_GNU_STYLE
1642# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
1643#elif RT_INLINE_ASM_USES_INTRIN
1644# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
1645#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
1646DECLINLINE(void) ASMCompilerBarrier(void)
1647{
1648 __asm
1649 {
1650 }
1651}
1652#endif
1653
1654
1655/**
1656 * Writes a 8-bit unsigned integer to an I/O port, ordered.
1657 *
1658 * @param Port I/O port to read from.
1659 * @param u8 8-bit integer to write.
1660 */
1661#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1662DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1663#else
1664DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1665{
1666# if RT_INLINE_ASM_GNU_STYLE
1667 __asm__ __volatile__("outb %b1, %w0\n\t"
1668 :: "Nd" (Port),
1669 "a" (u8));
1670
1671# elif RT_INLINE_ASM_USES_INTRIN
1672 __outbyte(Port, u8);
1673
1674# else
1675 __asm
1676 {
1677 mov dx, [Port]
1678 mov al, [u8]
1679 out dx, al
1680 }
1681# endif
1682}
1683#endif
1684
1685
1686/**
1687 * Gets a 8-bit unsigned integer from an I/O port, ordered.
1688 *
1689 * @returns 8-bit integer.
1690 * @param Port I/O port to read from.
1691 */
1692#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1693DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1694#else
1695DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1696{
1697 uint8_t u8;
1698# if RT_INLINE_ASM_GNU_STYLE
1699 __asm__ __volatile__("inb %w1, %b0\n\t"
1700 : "=a" (u8)
1701 : "Nd" (Port));
1702
1703# elif RT_INLINE_ASM_USES_INTRIN
1704 u8 = __inbyte(Port);
1705
1706# else
1707 __asm
1708 {
1709 mov dx, [Port]
1710 in al, dx
1711 mov [u8], al
1712 }
1713# endif
1714 return u8;
1715}
1716#endif
1717
1718
1719/**
1720 * Writes a 16-bit unsigned integer to an I/O port, ordered.
1721 *
1722 * @param Port I/O port to read from.
1723 * @param u16 16-bit integer to write.
1724 */
1725#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1726DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1727#else
1728DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1729{
1730# if RT_INLINE_ASM_GNU_STYLE
1731 __asm__ __volatile__("outw %w1, %w0\n\t"
1732 :: "Nd" (Port),
1733 "a" (u16));
1734
1735# elif RT_INLINE_ASM_USES_INTRIN
1736 __outword(Port, u16);
1737
1738# else
1739 __asm
1740 {
1741 mov dx, [Port]
1742 mov ax, [u16]
1743 out dx, ax
1744 }
1745# endif
1746}
1747#endif
1748
1749
1750/**
1751 * Gets a 16-bit unsigned integer from an I/O port, ordered.
1752 *
1753 * @returns 16-bit integer.
1754 * @param Port I/O port to read from.
1755 */
1756#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1757DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1758#else
1759DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1760{
1761 uint16_t u16;
1762# if RT_INLINE_ASM_GNU_STYLE
1763 __asm__ __volatile__("inw %w1, %w0\n\t"
1764 : "=a" (u16)
1765 : "Nd" (Port));
1766
1767# elif RT_INLINE_ASM_USES_INTRIN
1768 u16 = __inword(Port);
1769
1770# else
1771 __asm
1772 {
1773 mov dx, [Port]
1774 in ax, dx
1775 mov [u16], ax
1776 }
1777# endif
1778 return u16;
1779}
1780#endif
1781
1782
1783/**
1784 * Writes a 32-bit unsigned integer to an I/O port, ordered.
1785 *
1786 * @param Port I/O port to read from.
1787 * @param u32 32-bit integer to write.
1788 */
1789#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1790DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1791#else
1792DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1793{
1794# if RT_INLINE_ASM_GNU_STYLE
1795 __asm__ __volatile__("outl %1, %w0\n\t"
1796 :: "Nd" (Port),
1797 "a" (u32));
1798
1799# elif RT_INLINE_ASM_USES_INTRIN
1800 __outdword(Port, u32);
1801
1802# else
1803 __asm
1804 {
1805 mov dx, [Port]
1806 mov eax, [u32]
1807 out dx, eax
1808 }
1809# endif
1810}
1811#endif
1812
1813
1814/**
1815 * Gets a 32-bit unsigned integer from an I/O port, ordered.
1816 *
1817 * @returns 32-bit integer.
1818 * @param Port I/O port to read from.
1819 */
1820#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1821DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1822#else
1823DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1824{
1825 uint32_t u32;
1826# if RT_INLINE_ASM_GNU_STYLE
1827 __asm__ __volatile__("inl %w1, %0\n\t"
1828 : "=a" (u32)
1829 : "Nd" (Port));
1830
1831# elif RT_INLINE_ASM_USES_INTRIN
1832 u32 = __indword(Port);
1833
1834# else
1835 __asm
1836 {
1837 mov dx, [Port]
1838 in eax, dx
1839 mov [u32], eax
1840 }
1841# endif
1842 return u32;
1843}
1844#endif
1845
1846/** @todo string i/o */
1847
1848
1849/**
1850 * Atomically Exchange an unsigned 8-bit value, ordered.
1851 *
1852 * @returns Current *pu8 value
1853 * @param pu8 Pointer to the 8-bit variable to update.
1854 * @param u8 The 8-bit value to assign to *pu8.
1855 */
1856#if RT_INLINE_ASM_EXTERNAL
1857DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1858#else
1859DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1860{
1861# if RT_INLINE_ASM_GNU_STYLE
1862 __asm__ __volatile__("xchgb %0, %1\n\t"
1863 : "=m" (*pu8),
1864 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
1865 : "1" (u8));
1866# else
1867 __asm
1868 {
1869# ifdef RT_ARCH_AMD64
1870 mov rdx, [pu8]
1871 mov al, [u8]
1872 xchg [rdx], al
1873 mov [u8], al
1874# else
1875 mov edx, [pu8]
1876 mov al, [u8]
1877 xchg [edx], al
1878 mov [u8], al
1879# endif
1880 }
1881# endif
1882 return u8;
1883}
1884#endif
1885
1886
1887/**
1888 * Atomically Exchange a signed 8-bit value, ordered.
1889 *
1890 * @returns Current *pu8 value
1891 * @param pi8 Pointer to the 8-bit variable to update.
1892 * @param i8 The 8-bit value to assign to *pi8.
1893 */
1894DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1895{
1896 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1897}
1898
1899
1900/**
1901 * Atomically Exchange a bool value, ordered.
1902 *
1903 * @returns Current *pf value
1904 * @param pf Pointer to the 8-bit variable to update.
1905 * @param f The 8-bit value to assign to *pi8.
1906 */
1907DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
1908{
1909#ifdef _MSC_VER
1910 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1911#else
1912 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1913#endif
1914}
1915
1916
1917/**
1918 * Atomically Exchange an unsigned 16-bit value, ordered.
1919 *
1920 * @returns Current *pu16 value
1921 * @param pu16 Pointer to the 16-bit variable to update.
1922 * @param u16 The 16-bit value to assign to *pu16.
1923 */
1924#if RT_INLINE_ASM_EXTERNAL
1925DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
1926#else
1927DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
1928{
1929# if RT_INLINE_ASM_GNU_STYLE
1930 __asm__ __volatile__("xchgw %0, %1\n\t"
1931 : "=m" (*pu16),
1932 "=r" (u16)
1933 : "1" (u16));
1934# else
1935 __asm
1936 {
1937# ifdef RT_ARCH_AMD64
1938 mov rdx, [pu16]
1939 mov ax, [u16]
1940 xchg [rdx], ax
1941 mov [u16], ax
1942# else
1943 mov edx, [pu16]
1944 mov ax, [u16]
1945 xchg [edx], ax
1946 mov [u16], ax
1947# endif
1948 }
1949# endif
1950 return u16;
1951}
1952#endif
1953
1954
1955/**
1956 * Atomically Exchange a signed 16-bit value, ordered.
1957 *
1958 * @returns Current *pu16 value
1959 * @param pi16 Pointer to the 16-bit variable to update.
1960 * @param i16 The 16-bit value to assign to *pi16.
1961 */
1962DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
1963{
1964 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
1965}
1966
1967
1968/**
1969 * Atomically Exchange an unsigned 32-bit value, ordered.
1970 *
1971 * @returns Current *pu32 value
1972 * @param pu32 Pointer to the 32-bit variable to update.
1973 * @param u32 The 32-bit value to assign to *pu32.
1974 */
1975#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1976DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
1977#else
1978DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
1979{
1980# if RT_INLINE_ASM_GNU_STYLE
1981 __asm__ __volatile__("xchgl %0, %1\n\t"
1982 : "=m" (*pu32),
1983 "=r" (u32)
1984 : "1" (u32));
1985
1986# elif RT_INLINE_ASM_USES_INTRIN
1987 u32 = _InterlockedExchange((long *)pu32, u32);
1988
1989# else
1990 __asm
1991 {
1992# ifdef RT_ARCH_AMD64
1993 mov rdx, [pu32]
1994 mov eax, u32
1995 xchg [rdx], eax
1996 mov [u32], eax
1997# else
1998 mov edx, [pu32]
1999 mov eax, u32
2000 xchg [edx], eax
2001 mov [u32], eax
2002# endif
2003 }
2004# endif
2005 return u32;
2006}
2007#endif
2008
2009
2010/**
2011 * Atomically Exchange a signed 32-bit value, ordered.
2012 *
2013 * @returns Current *pu32 value
2014 * @param pi32 Pointer to the 32-bit variable to update.
2015 * @param i32 The 32-bit value to assign to *pi32.
2016 */
2017DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
2018{
2019 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
2020}
2021
2022
2023/**
2024 * Atomically Exchange an unsigned 64-bit value, ordered.
2025 *
2026 * @returns Current *pu64 value
2027 * @param pu64 Pointer to the 64-bit variable to update.
2028 * @param u64 The 64-bit value to assign to *pu64.
2029 */
2030#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2031DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2032#else
2033DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2034{
2035# if defined(RT_ARCH_AMD64)
2036# if RT_INLINE_ASM_USES_INTRIN
2037 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2038
2039# elif RT_INLINE_ASM_GNU_STYLE
2040 __asm__ __volatile__("xchgq %0, %1\n\t"
2041 : "=m" (*pu64),
2042 "=r" (u64)
2043 : "1" (u64));
2044# else
2045 __asm
2046 {
2047 mov rdx, [pu64]
2048 mov rax, [u64]
2049 xchg [rdx], rax
2050 mov [u64], rax
2051 }
2052# endif
2053# else /* !RT_ARCH_AMD64 */
2054# if RT_INLINE_ASM_GNU_STYLE
2055# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2056 uint32_t u32 = (uint32_t)u64;
2057 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2058 "xchgl %%ebx, %3\n\t"
2059 "1:\n\t"
2060 "lock; cmpxchg8b (%5)\n\t"
2061 "jnz 1b\n\t"
2062 "xchgl %%ebx, %3\n\t"
2063 /*"xchgl %%esi, %5\n\t"*/
2064 : "=A" (u64),
2065 "=m" (*pu64)
2066 : "0" (*pu64),
2067 "m" ( u32 ),
2068 "c" ( (uint32_t)(u64 >> 32) ),
2069 "S" (pu64) );
2070# else /* !PIC */
2071 __asm__ __volatile__("1:\n\t"
2072 "lock; cmpxchg8b %1\n\t"
2073 "jnz 1b\n\t"
2074 : "=A" (u64),
2075 "=m" (*pu64)
2076 : "0" (*pu64),
2077 "b" ( (uint32_t)u64 ),
2078 "c" ( (uint32_t)(u64 >> 32) ));
2079# endif
2080# else
2081 __asm
2082 {
2083 mov ebx, dword ptr [u64]
2084 mov ecx, dword ptr [u64 + 4]
2085 mov edi, pu64
2086 mov eax, dword ptr [edi]
2087 mov edx, dword ptr [edi + 4]
2088 retry:
2089 lock cmpxchg8b [edi]
2090 jnz retry
2091 mov dword ptr [u64], eax
2092 mov dword ptr [u64 + 4], edx
2093 }
2094# endif
2095# endif /* !RT_ARCH_AMD64 */
2096 return u64;
2097}
2098#endif
2099
2100
2101/**
2102 * Atomically Exchange an signed 64-bit value, ordered.
2103 *
2104 * @returns Current *pi64 value
2105 * @param pi64 Pointer to the 64-bit variable to update.
2106 * @param i64 The 64-bit value to assign to *pi64.
2107 */
2108DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2109{
2110 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2111}
2112
2113
2114#ifdef RT_ARCH_AMD64
2115/**
2116 * Atomically Exchange an unsigned 128-bit value, ordered.
2117 *
2118 * @returns Current *pu128.
2119 * @param pu128 Pointer to the 128-bit variable to update.
2120 * @param u128 The 128-bit value to assign to *pu128.
2121 *
2122 * @remark We cannot really assume that any hardware supports this. Nor do I have
2123 * GAS support for it. So, for the time being we'll BREAK the atomic
2124 * bit of this function and use two 64-bit exchanges instead.
2125 */
2126# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2127DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2128# else
2129DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2130{
2131 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2132 {
2133 /** @todo this is clumsy code */
2134 RTUINT128U u128Ret;
2135 u128Ret.u = u128;
2136 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2137 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2138 return u128Ret.u;
2139 }
2140#if 0 /* later? */
2141 else
2142 {
2143# if RT_INLINE_ASM_GNU_STYLE
2144 __asm__ __volatile__("1:\n\t"
2145 "lock; cmpxchg8b %1\n\t"
2146 "jnz 1b\n\t"
2147 : "=A" (u128),
2148 "=m" (*pu128)
2149 : "0" (*pu128),
2150 "b" ( (uint64_t)u128 ),
2151 "c" ( (uint64_t)(u128 >> 64) ));
2152# else
2153 __asm
2154 {
2155 mov rbx, dword ptr [u128]
2156 mov rcx, dword ptr [u128 + 8]
2157 mov rdi, pu128
2158 mov rax, dword ptr [rdi]
2159 mov rdx, dword ptr [rdi + 8]
2160 retry:
2161 lock cmpxchg16b [rdi]
2162 jnz retry
2163 mov dword ptr [u128], rax
2164 mov dword ptr [u128 + 8], rdx
2165 }
2166# endif
2167 }
2168 return u128;
2169#endif
2170}
2171# endif
2172#endif /* RT_ARCH_AMD64 */
2173
2174
2175/**
2176 * Atomically Exchange a value which size might differ
2177 * between platforms or compilers, ordered.
2178 *
2179 * @param pu Pointer to the variable to update.
2180 * @param uNew The value to assign to *pu.
2181 */
2182#define ASMAtomicXchgSize(pu, uNew) \
2183 do { \
2184 switch (sizeof(*(pu))) { \
2185 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2186 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2187 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2188 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2189 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2190 } \
2191 } while (0)
2192
2193
2194/**
2195 * Atomically Exchange a pointer value, ordered.
2196 *
2197 * @returns Current *ppv value
2198 * @param ppv Pointer to the pointer variable to update.
2199 * @param pv The pointer value to assign to *ppv.
2200 */
2201DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2202{
2203#if ARCH_BITS == 32
2204 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2205#elif ARCH_BITS == 64
2206 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2207#else
2208# error "ARCH_BITS is bogus"
2209#endif
2210}
2211
2212
2213/**
2214 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
2215 *
2216 * @returns true if xchg was done.
2217 * @returns false if xchg wasn't done.
2218 *
2219 * @param pu32 Pointer to the value to update.
2220 * @param u32New The new value to assigned to *pu32.
2221 * @param u32Old The old value to *pu32 compare with.
2222 */
2223#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2224DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2225#else
2226DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
2227{
2228# if RT_INLINE_ASM_GNU_STYLE
2229 uint8_t u8Ret;
2230 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2231 "setz %1\n\t"
2232 : "=m" (*pu32),
2233 "=qm" (u8Ret),
2234 "=a" (u32Old)
2235 : "r" (u32New),
2236 "2" (u32Old));
2237 return (bool)u8Ret;
2238
2239# elif RT_INLINE_ASM_USES_INTRIN
2240 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2241
2242# else
2243 uint32_t u32Ret;
2244 __asm
2245 {
2246# ifdef RT_ARCH_AMD64
2247 mov rdx, [pu32]
2248# else
2249 mov edx, [pu32]
2250# endif
2251 mov eax, [u32Old]
2252 mov ecx, [u32New]
2253# ifdef RT_ARCH_AMD64
2254 lock cmpxchg [rdx], ecx
2255# else
2256 lock cmpxchg [edx], ecx
2257# endif
2258 setz al
2259 movzx eax, al
2260 mov [u32Ret], eax
2261 }
2262 return !!u32Ret;
2263# endif
2264}
2265#endif
2266
2267
2268/**
2269 * Atomically Compare and Exchange a signed 32-bit value, ordered.
2270 *
2271 * @returns true if xchg was done.
2272 * @returns false if xchg wasn't done.
2273 *
2274 * @param pi32 Pointer to the value to update.
2275 * @param i32New The new value to assigned to *pi32.
2276 * @param i32Old The old value to *pi32 compare with.
2277 */
2278DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2279{
2280 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2281}
2282
2283
2284/**
2285 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
2286 *
2287 * @returns true if xchg was done.
2288 * @returns false if xchg wasn't done.
2289 *
2290 * @param pu64 Pointer to the 64-bit variable to update.
2291 * @param u64New The 64-bit value to assign to *pu64.
2292 * @param u64Old The value to compare with.
2293 */
2294#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2295DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2296#else
2297DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, uint64_t u64Old)
2298{
2299# if RT_INLINE_ASM_USES_INTRIN
2300 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2301
2302# elif defined(RT_ARCH_AMD64)
2303# if RT_INLINE_ASM_GNU_STYLE
2304 uint8_t u8Ret;
2305 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2306 "setz %1\n\t"
2307 : "=m" (*pu64),
2308 "=qm" (u8Ret),
2309 "=a" (u64Old)
2310 : "r" (u64New),
2311 "2" (u64Old));
2312 return (bool)u8Ret;
2313# else
2314 bool fRet;
2315 __asm
2316 {
2317 mov rdx, [pu32]
2318 mov rax, [u64Old]
2319 mov rcx, [u64New]
2320 lock cmpxchg [rdx], rcx
2321 setz al
2322 mov [fRet], al
2323 }
2324 return fRet;
2325# endif
2326# else /* !RT_ARCH_AMD64 */
2327 uint32_t u32Ret;
2328# if RT_INLINE_ASM_GNU_STYLE
2329# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2330 uint32_t u32 = (uint32_t)u64New;
2331 uint32_t u32Spill;
2332 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
2333 "lock; cmpxchg8b (%6)\n\t"
2334 "setz %%al\n\t"
2335 "xchgl %%ebx, %4\n\t"
2336 "movzbl %%al, %%eax\n\t"
2337 : "=a" (u32Ret),
2338 "=d" (u32Spill),
2339 "=m" (*pu64)
2340 : "A" (u64Old),
2341 "m" ( u32 ),
2342 "c" ( (uint32_t)(u64New >> 32) ),
2343 "S" (pu64) );
2344# else /* !PIC */
2345 uint32_t u32Spill;
2346 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
2347 "setz %%al\n\t"
2348 "movzbl %%al, %%eax\n\t"
2349 : "=a" (u32Ret),
2350 "=d" (u32Spill),
2351 "=m" (*pu64)
2352 : "A" (u64Old),
2353 "b" ( (uint32_t)u64New ),
2354 "c" ( (uint32_t)(u64New >> 32) ));
2355# endif
2356 return (bool)u32Ret;
2357# else
2358 __asm
2359 {
2360 mov ebx, dword ptr [u64New]
2361 mov ecx, dword ptr [u64New + 4]
2362 mov edi, [pu64]
2363 mov eax, dword ptr [u64Old]
2364 mov edx, dword ptr [u64Old + 4]
2365 lock cmpxchg8b [edi]
2366 setz al
2367 movzx eax, al
2368 mov dword ptr [u32Ret], eax
2369 }
2370 return !!u32Ret;
2371# endif
2372# endif /* !RT_ARCH_AMD64 */
2373}
2374#endif
2375
2376
2377/**
2378 * Atomically Compare and exchange a signed 64-bit value, ordered.
2379 *
2380 * @returns true if xchg was done.
2381 * @returns false if xchg wasn't done.
2382 *
2383 * @param pi64 Pointer to the 64-bit variable to update.
2384 * @param i64 The 64-bit value to assign to *pu64.
2385 * @param i64Old The value to compare with.
2386 */
2387DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2388{
2389 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2390}
2391
2392
2393/** @def ASMAtomicCmpXchgSize
2394 * Atomically Compare and Exchange a value which size might differ
2395 * between platforms or compilers, ordered.
2396 *
2397 * @param pu Pointer to the value to update.
2398 * @param uNew The new value to assigned to *pu.
2399 * @param uOld The old value to *pu compare with.
2400 * @param fRc Where to store the result.
2401 */
2402#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2403 do { \
2404 switch (sizeof(*(pu))) { \
2405 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2406 break; \
2407 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2408 break; \
2409 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2410 (fRc) = false; \
2411 break; \
2412 } \
2413 } while (0)
2414
2415
2416/**
2417 * Atomically Compare and Exchange a pointer value, ordered.
2418 *
2419 * @returns true if xchg was done.
2420 * @returns false if xchg wasn't done.
2421 *
2422 * @param ppv Pointer to the value to update.
2423 * @param pvNew The new value to assigned to *ppv.
2424 * @param pvOld The old value to *ppv compare with.
2425 */
2426DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2427{
2428#if ARCH_BITS == 32
2429 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2430#elif ARCH_BITS == 64
2431 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2432#else
2433# error "ARCH_BITS is bogus"
2434#endif
2435}
2436
2437
2438/**
2439 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
2440 * passes back old value, ordered.
2441 *
2442 * @returns true if xchg was done.
2443 * @returns false if xchg wasn't done.
2444 *
2445 * @param pu32 Pointer to the value to update.
2446 * @param u32New The new value to assigned to *pu32.
2447 * @param u32Old The old value to *pu32 compare with.
2448 * @param pu32Old Pointer store the old value at.
2449 */
2450#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2451DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
2452#else
2453DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
2454{
2455# if RT_INLINE_ASM_GNU_STYLE
2456 uint8_t u8Ret;
2457 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2458 "setz %1\n\t"
2459 : "=m" (*pu32),
2460 "=qm" (u8Ret),
2461 "=a" (*pu32Old)
2462 : "r" (u32New),
2463 "a" (u32Old));
2464 return (bool)u8Ret;
2465
2466# elif RT_INLINE_ASM_USES_INTRIN
2467 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
2468
2469# else
2470 uint32_t u32Ret;
2471 __asm
2472 {
2473# ifdef RT_ARCH_AMD64
2474 mov rdx, [pu32]
2475# else
2476 mov edx, [pu32]
2477# endif
2478 mov eax, [u32Old]
2479 mov ecx, [u32New]
2480# ifdef RT_ARCH_AMD64
2481 lock cmpxchg [rdx], ecx
2482 mov rdx, [pu32Old]
2483 mov [rdx], eax
2484# else
2485 lock cmpxchg [edx], ecx
2486 mov edx, [pu32Old]
2487 mov [edx], eax
2488# endif
2489 setz al
2490 movzx eax, al
2491 mov [u32Ret], eax
2492 }
2493 return !!u32Ret;
2494# endif
2495}
2496#endif
2497
2498
2499/**
2500 * Atomically Compare and Exchange a signed 32-bit value, additionally
2501 * passes back old value, ordered.
2502 *
2503 * @returns true if xchg was done.
2504 * @returns false if xchg wasn't done.
2505 *
2506 * @param pi32 Pointer to the value to update.
2507 * @param i32New The new value to assigned to *pi32.
2508 * @param i32Old The old value to *pi32 compare with.
2509 * @param pi32Old Pointer store the old value at.
2510 */
2511DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
2512{
2513 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
2514}
2515
2516
2517/**
2518 * Atomically Compare and exchange an unsigned 64-bit value, additionally
2519 * passing back old value, ordered.
2520 *
2521 * @returns true if xchg was done.
2522 * @returns false if xchg wasn't done.
2523 *
2524 * @param pu64 Pointer to the 64-bit variable to update.
2525 * @param u64New The 64-bit value to assign to *pu64.
2526 * @param u64Old The value to compare with.
2527 * @param pu64Old Pointer store the old value at.
2528 */
2529#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2530DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
2531#else
2532DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
2533{
2534# if RT_INLINE_ASM_USES_INTRIN
2535 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
2536
2537# elif defined(RT_ARCH_AMD64)
2538# if RT_INLINE_ASM_GNU_STYLE
2539 uint8_t u8Ret;
2540 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2541 "setz %1\n\t"
2542 : "=m" (*pu64),
2543 "=qm" (u8Ret),
2544 "=a" (*pu64Old)
2545 : "r" (u64New),
2546 "a" (u64Old));
2547 return (bool)u8Ret;
2548# else
2549 bool fRet;
2550 __asm
2551 {
2552 mov rdx, [pu32]
2553 mov rax, [u64Old]
2554 mov rcx, [u64New]
2555 lock cmpxchg [rdx], rcx
2556 mov rdx, [pu64Old]
2557 mov [rdx], rax
2558 setz al
2559 mov [fRet], al
2560 }
2561 return fRet;
2562# endif
2563# else /* !RT_ARCH_AMD64 */
2564# if RT_INLINE_ASM_GNU_STYLE
2565 uint64_t u64Ret;
2566# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2567 /* NB: this code uses a memory clobber description, because the clean
2568 * solution with an output value for *pu64 makes gcc run out of registers.
2569 * This will cause suboptimal code, and anyone with a better solution is
2570 * welcome to improve this. */
2571 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
2572 "lock; cmpxchg8b %3\n\t"
2573 "xchgl %%ebx, %1\n\t"
2574 : "=A" (u64Ret)
2575 : "DS" ((uint32_t)u64New),
2576 "c" ((uint32_t)(u64New >> 32)),
2577 "m" (*pu64),
2578 "0" (u64Old)
2579 : "memory" );
2580# else /* !PIC */
2581 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
2582 : "=A" (u64Ret),
2583 "=m" (*pu64)
2584 : "b" ((uint32_t)u64New),
2585 "c" ((uint32_t)(u64New >> 32)),
2586 "m" (*pu64),
2587 "0" (u64Old));
2588# endif
2589 *pu64Old = u64Ret;
2590 return u64Ret == u64Old;
2591# else
2592 uint32_t u32Ret;
2593 __asm
2594 {
2595 mov ebx, dword ptr [u64New]
2596 mov ecx, dword ptr [u64New + 4]
2597 mov edi, [pu64]
2598 mov eax, dword ptr [u64Old]
2599 mov edx, dword ptr [u64Old + 4]
2600 lock cmpxchg8b [edi]
2601 mov ebx, [pu64Old]
2602 mov [ebx], eax
2603 setz al
2604 movzx eax, al
2605 add ebx, 4
2606 mov [ebx], edx
2607 mov dword ptr [u32Ret], eax
2608 }
2609 return !!u32Ret;
2610# endif
2611# endif /* !RT_ARCH_AMD64 */
2612}
2613#endif
2614
2615
2616/**
2617 * Atomically Compare and exchange a signed 64-bit value, additionally
2618 * passing back old value, ordered.
2619 *
2620 * @returns true if xchg was done.
2621 * @returns false if xchg wasn't done.
2622 *
2623 * @param pi64 Pointer to the 64-bit variable to update.
2624 * @param i64 The 64-bit value to assign to *pu64.
2625 * @param i64Old The value to compare with.
2626 * @param pi64Old Pointer store the old value at.
2627 */
2628DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
2629{
2630 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
2631}
2632
2633
2634/** @def ASMAtomicCmpXchgExSize
2635 * Atomically Compare and Exchange a value which size might differ
2636 * between platforms or compilers. Additionally passes back old value.
2637 *
2638 * @param pu Pointer to the value to update.
2639 * @param uNew The new value to assigned to *pu.
2640 * @param uOld The old value to *pu compare with.
2641 * @param fRc Where to store the result.
2642 * @param uOldVal Where to store the old value.
2643 */
2644#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, uOldVal) \
2645 do { \
2646 switch (sizeof(*(pu))) { \
2647 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)&(uOldVal)); \
2648 break; \
2649 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)&(uOldVal)); \
2650 break; \
2651 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2652 (fRc) = false; \
2653 (uOldVal) = 0; \
2654 break; \
2655 } \
2656 } while (0)
2657
2658
2659/**
2660 * Atomically Compare and Exchange a pointer value, additionally
2661 * passing back old value, ordered.
2662 *
2663 * @returns true if xchg was done.
2664 * @returns false if xchg wasn't done.
2665 *
2666 * @param ppv Pointer to the value to update.
2667 * @param pvNew The new value to assigned to *ppv.
2668 * @param pvOld The old value to *ppv compare with.
2669 * @param ppvOld Pointer store the old value at.
2670 */
2671DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, void *pvNew, void *pvOld, void **ppvOld)
2672{
2673#if ARCH_BITS == 32
2674 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
2675#elif ARCH_BITS == 64
2676 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
2677#else
2678# error "ARCH_BITS is bogus"
2679#endif
2680}
2681
2682
2683/**
2684 * Atomically exchanges and adds to a 32-bit value, ordered.
2685 *
2686 * @returns The old value.
2687 * @param pu32 Pointer to the value.
2688 * @param u32 Number to add.
2689 */
2690#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2691DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
2692#else
2693DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
2694{
2695# if RT_INLINE_ASM_USES_INTRIN
2696 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
2697 return u32;
2698
2699# elif RT_INLINE_ASM_GNU_STYLE
2700 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2701 : "=r" (u32),
2702 "=m" (*pu32)
2703 : "0" (u32)
2704 : "memory");
2705 return u32;
2706# else
2707 __asm
2708 {
2709 mov eax, [u32]
2710# ifdef RT_ARCH_AMD64
2711 mov rdx, [pu32]
2712 lock xadd [rdx], eax
2713# else
2714 mov edx, [pu32]
2715 lock xadd [edx], eax
2716# endif
2717 mov [u32], eax
2718 }
2719 return u32;
2720# endif
2721}
2722#endif
2723
2724
2725/**
2726 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2727 *
2728 * @returns The old value.
2729 * @param pi32 Pointer to the value.
2730 * @param i32 Number to add.
2731 */
2732DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
2733{
2734 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
2735}
2736
2737
2738/**
2739 * Atomically increment a 32-bit value, ordered.
2740 *
2741 * @returns The new value.
2742 * @param pu32 Pointer to the value to increment.
2743 */
2744#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2745DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2746#else
2747DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2748{
2749 uint32_t u32;
2750# if RT_INLINE_ASM_USES_INTRIN
2751 u32 = _InterlockedIncrement((long *)pu32);
2752 return u32;
2753
2754# elif RT_INLINE_ASM_GNU_STYLE
2755 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2756 : "=r" (u32),
2757 "=m" (*pu32)
2758 : "0" (1)
2759 : "memory");
2760 return u32+1;
2761# else
2762 __asm
2763 {
2764 mov eax, 1
2765# ifdef RT_ARCH_AMD64
2766 mov rdx, [pu32]
2767 lock xadd [rdx], eax
2768# else
2769 mov edx, [pu32]
2770 lock xadd [edx], eax
2771# endif
2772 mov u32, eax
2773 }
2774 return u32+1;
2775# endif
2776}
2777#endif
2778
2779
2780/**
2781 * Atomically increment a signed 32-bit value, ordered.
2782 *
2783 * @returns The new value.
2784 * @param pi32 Pointer to the value to increment.
2785 */
2786DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2787{
2788 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2789}
2790
2791
2792/**
2793 * Atomically decrement an unsigned 32-bit value, ordered.
2794 *
2795 * @returns The new value.
2796 * @param pu32 Pointer to the value to decrement.
2797 */
2798#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2799DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2800#else
2801DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2802{
2803 uint32_t u32;
2804# if RT_INLINE_ASM_USES_INTRIN
2805 u32 = _InterlockedDecrement((long *)pu32);
2806 return u32;
2807
2808# elif RT_INLINE_ASM_GNU_STYLE
2809 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2810 : "=r" (u32),
2811 "=m" (*pu32)
2812 : "0" (-1)
2813 : "memory");
2814 return u32-1;
2815# else
2816 __asm
2817 {
2818 mov eax, -1
2819# ifdef RT_ARCH_AMD64
2820 mov rdx, [pu32]
2821 lock xadd [rdx], eax
2822# else
2823 mov edx, [pu32]
2824 lock xadd [edx], eax
2825# endif
2826 mov u32, eax
2827 }
2828 return u32-1;
2829# endif
2830}
2831#endif
2832
2833
2834/**
2835 * Atomically decrement a signed 32-bit value, ordered.
2836 *
2837 * @returns The new value.
2838 * @param pi32 Pointer to the value to decrement.
2839 */
2840DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2841{
2842 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2843}
2844
2845
2846/**
2847 * Atomically Or an unsigned 32-bit value, ordered.
2848 *
2849 * @param pu32 Pointer to the pointer variable to OR u32 with.
2850 * @param u32 The value to OR *pu32 with.
2851 */
2852#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2853DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2854#else
2855DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2856{
2857# if RT_INLINE_ASM_USES_INTRIN
2858 _InterlockedOr((long volatile *)pu32, (long)u32);
2859
2860# elif RT_INLINE_ASM_GNU_STYLE
2861 __asm__ __volatile__("lock; orl %1, %0\n\t"
2862 : "=m" (*pu32)
2863 : "ir" (u32));
2864# else
2865 __asm
2866 {
2867 mov eax, [u32]
2868# ifdef RT_ARCH_AMD64
2869 mov rdx, [pu32]
2870 lock or [rdx], eax
2871# else
2872 mov edx, [pu32]
2873 lock or [edx], eax
2874# endif
2875 }
2876# endif
2877}
2878#endif
2879
2880
2881/**
2882 * Atomically Or a signed 32-bit value, ordered.
2883 *
2884 * @param pi32 Pointer to the pointer variable to OR u32 with.
2885 * @param i32 The value to OR *pu32 with.
2886 */
2887DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2888{
2889 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2890}
2891
2892
2893/**
2894 * Atomically And an unsigned 32-bit value, ordered.
2895 *
2896 * @param pu32 Pointer to the pointer variable to AND u32 with.
2897 * @param u32 The value to AND *pu32 with.
2898 */
2899#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2900DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
2901#else
2902DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
2903{
2904# if RT_INLINE_ASM_USES_INTRIN
2905 _InterlockedAnd((long volatile *)pu32, u32);
2906
2907# elif RT_INLINE_ASM_GNU_STYLE
2908 __asm__ __volatile__("lock; andl %1, %0\n\t"
2909 : "=m" (*pu32)
2910 : "ir" (u32));
2911# else
2912 __asm
2913 {
2914 mov eax, [u32]
2915# ifdef RT_ARCH_AMD64
2916 mov rdx, [pu32]
2917 lock and [rdx], eax
2918# else
2919 mov edx, [pu32]
2920 lock and [edx], eax
2921# endif
2922 }
2923# endif
2924}
2925#endif
2926
2927
2928/**
2929 * Atomically And a signed 32-bit value, ordered.
2930 *
2931 * @param pi32 Pointer to the pointer variable to AND i32 with.
2932 * @param i32 The value to AND *pi32 with.
2933 */
2934DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
2935{
2936 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
2937}
2938
2939
2940/**
2941 * Memory fence, waits for any pending writes and reads to complete.
2942 */
2943DECLINLINE(void) ASMMemoryFence(void)
2944{
2945 /** @todo use mfence? check if all cpus we care for support it. */
2946 uint32_t volatile u32;
2947 ASMAtomicXchgU32(&u32, 0);
2948}
2949
2950
2951/**
2952 * Write fence, waits for any pending writes to complete.
2953 */
2954DECLINLINE(void) ASMWriteFence(void)
2955{
2956 /** @todo use sfence? check if all cpus we care for support it. */
2957 ASMMemoryFence();
2958}
2959
2960
2961/**
2962 * Read fence, waits for any pending reads to complete.
2963 */
2964DECLINLINE(void) ASMReadFence(void)
2965{
2966 /** @todo use lfence? check if all cpus we care for support it. */
2967 ASMMemoryFence();
2968}
2969
2970
2971/**
2972 * Atomically reads an unsigned 8-bit value, ordered.
2973 *
2974 * @returns Current *pu8 value
2975 * @param pu8 Pointer to the 8-bit variable to read.
2976 */
2977DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
2978{
2979 ASMMemoryFence();
2980 return *pu8; /* byte reads are atomic on x86 */
2981}
2982
2983
2984/**
2985 * Atomically reads an unsigned 8-bit value, unordered.
2986 *
2987 * @returns Current *pu8 value
2988 * @param pu8 Pointer to the 8-bit variable to read.
2989 */
2990DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
2991{
2992 return *pu8; /* byte reads are atomic on x86 */
2993}
2994
2995
2996/**
2997 * Atomically reads a signed 8-bit value, ordered.
2998 *
2999 * @returns Current *pi8 value
3000 * @param pi8 Pointer to the 8-bit variable to read.
3001 */
3002DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
3003{
3004 ASMMemoryFence();
3005 return *pi8; /* byte reads are atomic on x86 */
3006}
3007
3008
3009/**
3010 * Atomically reads a signed 8-bit value, unordered.
3011 *
3012 * @returns Current *pi8 value
3013 * @param pi8 Pointer to the 8-bit variable to read.
3014 */
3015DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
3016{
3017 return *pi8; /* byte reads are atomic on x86 */
3018}
3019
3020
3021/**
3022 * Atomically reads an unsigned 16-bit value, ordered.
3023 *
3024 * @returns Current *pu16 value
3025 * @param pu16 Pointer to the 16-bit variable to read.
3026 */
3027DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
3028{
3029 ASMMemoryFence();
3030 Assert(!((uintptr_t)pu16 & 1));
3031 return *pu16;
3032}
3033
3034
3035/**
3036 * Atomically reads an unsigned 16-bit value, unordered.
3037 *
3038 * @returns Current *pu16 value
3039 * @param pu16 Pointer to the 16-bit variable to read.
3040 */
3041DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
3042{
3043 Assert(!((uintptr_t)pu16 & 1));
3044 return *pu16;
3045}
3046
3047
3048/**
3049 * Atomically reads a signed 16-bit value, ordered.
3050 *
3051 * @returns Current *pi16 value
3052 * @param pi16 Pointer to the 16-bit variable to read.
3053 */
3054DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
3055{
3056 ASMMemoryFence();
3057 Assert(!((uintptr_t)pi16 & 1));
3058 return *pi16;
3059}
3060
3061
3062/**
3063 * Atomically reads a signed 16-bit value, unordered.
3064 *
3065 * @returns Current *pi16 value
3066 * @param pi16 Pointer to the 16-bit variable to read.
3067 */
3068DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
3069{
3070 Assert(!((uintptr_t)pi16 & 1));
3071 return *pi16;
3072}
3073
3074
3075/**
3076 * Atomically reads an unsigned 32-bit value, ordered.
3077 *
3078 * @returns Current *pu32 value
3079 * @param pu32 Pointer to the 32-bit variable to read.
3080 */
3081DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
3082{
3083 ASMMemoryFence();
3084 Assert(!((uintptr_t)pu32 & 3));
3085 return *pu32;
3086}
3087
3088
3089/**
3090 * Atomically reads an unsigned 32-bit value, unordered.
3091 *
3092 * @returns Current *pu32 value
3093 * @param pu32 Pointer to the 32-bit variable to read.
3094 */
3095DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
3096{
3097 Assert(!((uintptr_t)pu32 & 3));
3098 return *pu32;
3099}
3100
3101
3102/**
3103 * Atomically reads a signed 32-bit value, ordered.
3104 *
3105 * @returns Current *pi32 value
3106 * @param pi32 Pointer to the 32-bit variable to read.
3107 */
3108DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
3109{
3110 ASMMemoryFence();
3111 Assert(!((uintptr_t)pi32 & 3));
3112 return *pi32;
3113}
3114
3115
3116/**
3117 * Atomically reads a signed 32-bit value, unordered.
3118 *
3119 * @returns Current *pi32 value
3120 * @param pi32 Pointer to the 32-bit variable to read.
3121 */
3122DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
3123{
3124 Assert(!((uintptr_t)pi32 & 3));
3125 return *pi32;
3126}
3127
3128
3129/**
3130 * Atomically reads an unsigned 64-bit value, ordered.
3131 *
3132 * @returns Current *pu64 value
3133 * @param pu64 Pointer to the 64-bit variable to read.
3134 * The memory pointed to must be writable.
3135 * @remark This will fault if the memory is read-only!
3136 */
3137#if RT_INLINE_ASM_EXTERNAL
3138DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
3139#else
3140DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
3141{
3142 uint64_t u64;
3143# ifdef RT_ARCH_AMD64
3144# if RT_INLINE_ASM_GNU_STYLE
3145 Assert(!((uintptr_t)pu64 & 7));
3146 __asm__ __volatile__( "mfence\n\t"
3147 "movq %1, %0\n\t"
3148 : "=r" (u64)
3149 : "m" (*pu64));
3150# else
3151 __asm
3152 {
3153 mfence
3154 mov rdx, [pu64]
3155 mov rax, [rdx]
3156 mov [u64], rax
3157 }
3158# endif
3159# else /* !RT_ARCH_AMD64 */
3160# if RT_INLINE_ASM_GNU_STYLE
3161# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3162 uint32_t u32EBX = 0;
3163 Assert(!((uintptr_t)pu64 & 7));
3164 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3165 "lock; cmpxchg8b (%5)\n\t"
3166 "xchgl %%ebx, %3\n\t"
3167 : "=A" (u64),
3168 "=m" (*pu64)
3169 : "0" (0),
3170 "m" (u32EBX),
3171 "c" (0),
3172 "S" (pu64));
3173# else /* !PIC */
3174 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3175 : "=A" (u64),
3176 "=m" (*pu64)
3177 : "0" (0),
3178 "b" (0),
3179 "c" (0));
3180# endif
3181# else
3182 Assert(!((uintptr_t)pu64 & 7));
3183 __asm
3184 {
3185 xor eax, eax
3186 xor edx, edx
3187 mov edi, pu64
3188 xor ecx, ecx
3189 xor ebx, ebx
3190 lock cmpxchg8b [edi]
3191 mov dword ptr [u64], eax
3192 mov dword ptr [u64 + 4], edx
3193 }
3194# endif
3195# endif /* !RT_ARCH_AMD64 */
3196 return u64;
3197}
3198#endif
3199
3200
3201/**
3202 * Atomically reads an unsigned 64-bit value, unordered.
3203 *
3204 * @returns Current *pu64 value
3205 * @param pu64 Pointer to the 64-bit variable to read.
3206 * The memory pointed to must be writable.
3207 * @remark This will fault if the memory is read-only!
3208 */
3209#if RT_INLINE_ASM_EXTERNAL
3210DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
3211#else
3212DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
3213{
3214 uint64_t u64;
3215# ifdef RT_ARCH_AMD64
3216# if RT_INLINE_ASM_GNU_STYLE
3217 Assert(!((uintptr_t)pu64 & 7));
3218 __asm__ __volatile__("movq %1, %0\n\t"
3219 : "=r" (u64)
3220 : "m" (*pu64));
3221# else
3222 __asm
3223 {
3224 mov rdx, [pu64]
3225 mov rax, [rdx]
3226 mov [u64], rax
3227 }
3228# endif
3229# else /* !RT_ARCH_AMD64 */
3230# if RT_INLINE_ASM_GNU_STYLE
3231# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3232 uint32_t u32EBX = 0;
3233 Assert(!((uintptr_t)pu64 & 7));
3234 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3235 "lock; cmpxchg8b (%5)\n\t"
3236 "xchgl %%ebx, %3\n\t"
3237 : "=A" (u64),
3238 "=m" (*pu64)
3239 : "0" (0),
3240 "m" (u32EBX),
3241 "c" (0),
3242 "S" (pu64));
3243# else /* !PIC */
3244 __asm__ __volatile__("cmpxchg8b %1\n\t"
3245 : "=A" (u64),
3246 "=m" (*pu64)
3247 : "0" (0),
3248 "b" (0),
3249 "c" (0));
3250# endif
3251# else
3252 Assert(!((uintptr_t)pu64 & 7));
3253 __asm
3254 {
3255 xor eax, eax
3256 xor edx, edx
3257 mov edi, pu64
3258 xor ecx, ecx
3259 xor ebx, ebx
3260 lock cmpxchg8b [edi]
3261 mov dword ptr [u64], eax
3262 mov dword ptr [u64 + 4], edx
3263 }
3264# endif
3265# endif /* !RT_ARCH_AMD64 */
3266 return u64;
3267}
3268#endif
3269
3270
3271/**
3272 * Atomically reads a signed 64-bit value, ordered.
3273 *
3274 * @returns Current *pi64 value
3275 * @param pi64 Pointer to the 64-bit variable to read.
3276 * The memory pointed to must be writable.
3277 * @remark This will fault if the memory is read-only!
3278 */
3279DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
3280{
3281 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
3282}
3283
3284
3285/**
3286 * Atomically reads a signed 64-bit value, unordered.
3287 *
3288 * @returns Current *pi64 value
3289 * @param pi64 Pointer to the 64-bit variable to read.
3290 * The memory pointed to must be writable.
3291 * @remark This will fault if the memory is read-only!
3292 */
3293DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
3294{
3295 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
3296}
3297
3298
3299/**
3300 * Atomically reads a pointer value, ordered.
3301 *
3302 * @returns Current *pv value
3303 * @param ppv Pointer to the pointer variable to read.
3304 */
3305DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
3306{
3307#if ARCH_BITS == 32
3308 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
3309#elif ARCH_BITS == 64
3310 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
3311#else
3312# error "ARCH_BITS is bogus"
3313#endif
3314}
3315
3316
3317/**
3318 * Atomically reads a pointer value, unordered.
3319 *
3320 * @returns Current *pv value
3321 * @param ppv Pointer to the pointer variable to read.
3322 */
3323DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
3324{
3325#if ARCH_BITS == 32
3326 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
3327#elif ARCH_BITS == 64
3328 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
3329#else
3330# error "ARCH_BITS is bogus"
3331#endif
3332}
3333
3334
3335/**
3336 * Atomically reads a boolean value, ordered.
3337 *
3338 * @returns Current *pf value
3339 * @param pf Pointer to the boolean variable to read.
3340 */
3341DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
3342{
3343 ASMMemoryFence();
3344 return *pf; /* byte reads are atomic on x86 */
3345}
3346
3347
3348/**
3349 * Atomically reads a boolean value, unordered.
3350 *
3351 * @returns Current *pf value
3352 * @param pf Pointer to the boolean variable to read.
3353 */
3354DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
3355{
3356 return *pf; /* byte reads are atomic on x86 */
3357}
3358
3359
3360/**
3361 * Atomically read a value which size might differ
3362 * between platforms or compilers, ordered.
3363 *
3364 * @param pu Pointer to the variable to update.
3365 * @param puRes Where to store the result.
3366 */
3367#define ASMAtomicReadSize(pu, puRes) \
3368 do { \
3369 switch (sizeof(*(pu))) { \
3370 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3371 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
3372 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
3373 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
3374 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3375 } \
3376 } while (0)
3377
3378
3379/**
3380 * Atomically read a value which size might differ
3381 * between platforms or compilers, unordered.
3382 *
3383 * @param pu Pointer to the variable to update.
3384 * @param puRes Where to store the result.
3385 */
3386#define ASMAtomicUoReadSize(pu, puRes) \
3387 do { \
3388 switch (sizeof(*(pu))) { \
3389 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3390 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
3391 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
3392 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
3393 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3394 } \
3395 } while (0)
3396
3397
3398/**
3399 * Atomically writes an unsigned 8-bit value, ordered.
3400 *
3401 * @param pu8 Pointer to the 8-bit variable.
3402 * @param u8 The 8-bit value to assign to *pu8.
3403 */
3404DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
3405{
3406 ASMAtomicXchgU8(pu8, u8);
3407}
3408
3409
3410/**
3411 * Atomically writes an unsigned 8-bit value, unordered.
3412 *
3413 * @param pu8 Pointer to the 8-bit variable.
3414 * @param u8 The 8-bit value to assign to *pu8.
3415 */
3416DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
3417{
3418 *pu8 = u8; /* byte writes are atomic on x86 */
3419}
3420
3421
3422/**
3423 * Atomically writes a signed 8-bit value, ordered.
3424 *
3425 * @param pi8 Pointer to the 8-bit variable to read.
3426 * @param i8 The 8-bit value to assign to *pi8.
3427 */
3428DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
3429{
3430 ASMAtomicXchgS8(pi8, i8);
3431}
3432
3433
3434/**
3435 * Atomically writes a signed 8-bit value, unordered.
3436 *
3437 * @param pi8 Pointer to the 8-bit variable to read.
3438 * @param i8 The 8-bit value to assign to *pi8.
3439 */
3440DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
3441{
3442 *pi8 = i8; /* byte writes are atomic on x86 */
3443}
3444
3445
3446/**
3447 * Atomically writes an unsigned 16-bit value, ordered.
3448 *
3449 * @param pu16 Pointer to the 16-bit variable.
3450 * @param u16 The 16-bit value to assign to *pu16.
3451 */
3452DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
3453{
3454 ASMAtomicXchgU16(pu16, u16);
3455}
3456
3457
3458/**
3459 * Atomically writes an unsigned 16-bit value, unordered.
3460 *
3461 * @param pu16 Pointer to the 16-bit variable.
3462 * @param u16 The 16-bit value to assign to *pu16.
3463 */
3464DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
3465{
3466 Assert(!((uintptr_t)pu16 & 1));
3467 *pu16 = u16;
3468}
3469
3470
3471/**
3472 * Atomically writes a signed 16-bit value, ordered.
3473 *
3474 * @param pi16 Pointer to the 16-bit variable to read.
3475 * @param i16 The 16-bit value to assign to *pi16.
3476 */
3477DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
3478{
3479 ASMAtomicXchgS16(pi16, i16);
3480}
3481
3482
3483/**
3484 * Atomically writes a signed 16-bit value, unordered.
3485 *
3486 * @param pi16 Pointer to the 16-bit variable to read.
3487 * @param i16 The 16-bit value to assign to *pi16.
3488 */
3489DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
3490{
3491 Assert(!((uintptr_t)pi16 & 1));
3492 *pi16 = i16;
3493}
3494
3495
3496/**
3497 * Atomically writes an unsigned 32-bit value, ordered.
3498 *
3499 * @param pu32 Pointer to the 32-bit variable.
3500 * @param u32 The 32-bit value to assign to *pu32.
3501 */
3502DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
3503{
3504 ASMAtomicXchgU32(pu32, u32);
3505}
3506
3507
3508/**
3509 * Atomically writes an unsigned 32-bit value, unordered.
3510 *
3511 * @param pu32 Pointer to the 32-bit variable.
3512 * @param u32 The 32-bit value to assign to *pu32.
3513 */
3514DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
3515{
3516 Assert(!((uintptr_t)pu32 & 3));
3517 *pu32 = u32;
3518}
3519
3520
3521/**
3522 * Atomically writes a signed 32-bit value, ordered.
3523 *
3524 * @param pi32 Pointer to the 32-bit variable to read.
3525 * @param i32 The 32-bit value to assign to *pi32.
3526 */
3527DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
3528{
3529 ASMAtomicXchgS32(pi32, i32);
3530}
3531
3532
3533/**
3534 * Atomically writes a signed 32-bit value, unordered.
3535 *
3536 * @param pi32 Pointer to the 32-bit variable to read.
3537 * @param i32 The 32-bit value to assign to *pi32.
3538 */
3539DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
3540{
3541 Assert(!((uintptr_t)pi32 & 3));
3542 *pi32 = i32;
3543}
3544
3545
3546/**
3547 * Atomically writes an unsigned 64-bit value, ordered.
3548 *
3549 * @param pu64 Pointer to the 64-bit variable.
3550 * @param u64 The 64-bit value to assign to *pu64.
3551 */
3552DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
3553{
3554 ASMAtomicXchgU64(pu64, u64);
3555}
3556
3557
3558/**
3559 * Atomically writes an unsigned 64-bit value, unordered.
3560 *
3561 * @param pu64 Pointer to the 64-bit variable.
3562 * @param u64 The 64-bit value to assign to *pu64.
3563 */
3564DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
3565{
3566 Assert(!((uintptr_t)pu64 & 7));
3567#if ARCH_BITS == 64
3568 *pu64 = u64;
3569#else
3570 ASMAtomicXchgU64(pu64, u64);
3571#endif
3572}
3573
3574
3575/**
3576 * Atomically writes a signed 64-bit value, ordered.
3577 *
3578 * @param pi64 Pointer to the 64-bit variable.
3579 * @param i64 The 64-bit value to assign to *pi64.
3580 */
3581DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
3582{
3583 ASMAtomicXchgS64(pi64, i64);
3584}
3585
3586
3587/**
3588 * Atomically writes a signed 64-bit value, unordered.
3589 *
3590 * @param pi64 Pointer to the 64-bit variable.
3591 * @param i64 The 64-bit value to assign to *pi64.
3592 */
3593DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
3594{
3595 Assert(!((uintptr_t)pi64 & 7));
3596#if ARCH_BITS == 64
3597 *pi64 = i64;
3598#else
3599 ASMAtomicXchgS64(pi64, i64);
3600#endif
3601}
3602
3603
3604/**
3605 * Atomically writes a boolean value, unordered.
3606 *
3607 * @param pf Pointer to the boolean variable.
3608 * @param f The boolean value to assign to *pf.
3609 */
3610DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
3611{
3612 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
3613}
3614
3615
3616/**
3617 * Atomically writes a boolean value, unordered.
3618 *
3619 * @param pf Pointer to the boolean variable.
3620 * @param f The boolean value to assign to *pf.
3621 */
3622DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
3623{
3624 *pf = f; /* byte writes are atomic on x86 */
3625}
3626
3627
3628/**
3629 * Atomically writes a pointer value, ordered.
3630 *
3631 * @returns Current *pv value
3632 * @param ppv Pointer to the pointer variable.
3633 * @param pv The pointer value to assigne to *ppv.
3634 */
3635DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, void *pv)
3636{
3637#if ARCH_BITS == 32
3638 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
3639#elif ARCH_BITS == 64
3640 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
3641#else
3642# error "ARCH_BITS is bogus"
3643#endif
3644}
3645
3646
3647/**
3648 * Atomically writes a pointer value, unordered.
3649 *
3650 * @returns Current *pv value
3651 * @param ppv Pointer to the pointer variable.
3652 * @param pv The pointer value to assigne to *ppv.
3653 */
3654DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, void *pv)
3655{
3656#if ARCH_BITS == 32
3657 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
3658#elif ARCH_BITS == 64
3659 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
3660#else
3661# error "ARCH_BITS is bogus"
3662#endif
3663}
3664
3665
3666/**
3667 * Atomically write a value which size might differ
3668 * between platforms or compilers, ordered.
3669 *
3670 * @param pu Pointer to the variable to update.
3671 * @param uNew The value to assign to *pu.
3672 */
3673#define ASMAtomicWriteSize(pu, uNew) \
3674 do { \
3675 switch (sizeof(*(pu))) { \
3676 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
3677 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3678 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3679 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3680 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3681 } \
3682 } while (0)
3683
3684/**
3685 * Atomically write a value which size might differ
3686 * between platforms or compilers, unordered.
3687 *
3688 * @param pu Pointer to the variable to update.
3689 * @param uNew The value to assign to *pu.
3690 */
3691#define ASMAtomicUoWriteSize(pu, uNew) \
3692 do { \
3693 switch (sizeof(*(pu))) { \
3694 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
3695 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3696 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3697 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3698 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3699 } \
3700 } while (0)
3701
3702
3703
3704
3705/**
3706 * Invalidate page.
3707 *
3708 * @param pv Address of the page to invalidate.
3709 */
3710#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3711DECLASM(void) ASMInvalidatePage(void *pv);
3712#else
3713DECLINLINE(void) ASMInvalidatePage(void *pv)
3714{
3715# if RT_INLINE_ASM_USES_INTRIN
3716 __invlpg(pv);
3717
3718# elif RT_INLINE_ASM_GNU_STYLE
3719 __asm__ __volatile__("invlpg %0\n\t"
3720 : : "m" (*(uint8_t *)pv));
3721# else
3722 __asm
3723 {
3724# ifdef RT_ARCH_AMD64
3725 mov rax, [pv]
3726 invlpg [rax]
3727# else
3728 mov eax, [pv]
3729 invlpg [eax]
3730# endif
3731 }
3732# endif
3733}
3734#endif
3735
3736
3737#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3738# if PAGE_SIZE != 0x1000
3739# error "PAGE_SIZE is not 0x1000!"
3740# endif
3741#endif
3742
3743/**
3744 * Zeros a 4K memory page.
3745 *
3746 * @param pv Pointer to the memory block. This must be page aligned.
3747 */
3748#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3749DECLASM(void) ASMMemZeroPage(volatile void *pv);
3750# else
3751DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
3752{
3753# if RT_INLINE_ASM_USES_INTRIN
3754# ifdef RT_ARCH_AMD64
3755 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
3756# else
3757 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
3758# endif
3759
3760# elif RT_INLINE_ASM_GNU_STYLE
3761 RTUINTREG uDummy;
3762# ifdef RT_ARCH_AMD64
3763 __asm__ __volatile__ ("rep stosq"
3764 : "=D" (pv),
3765 "=c" (uDummy)
3766 : "0" (pv),
3767 "c" (0x1000 >> 3),
3768 "a" (0)
3769 : "memory");
3770# else
3771 __asm__ __volatile__ ("rep stosl"
3772 : "=D" (pv),
3773 "=c" (uDummy)
3774 : "0" (pv),
3775 "c" (0x1000 >> 2),
3776 "a" (0)
3777 : "memory");
3778# endif
3779# else
3780 __asm
3781 {
3782# ifdef RT_ARCH_AMD64
3783 xor rax, rax
3784 mov ecx, 0200h
3785 mov rdi, [pv]
3786 rep stosq
3787# else
3788 xor eax, eax
3789 mov ecx, 0400h
3790 mov edi, [pv]
3791 rep stosd
3792# endif
3793 }
3794# endif
3795}
3796# endif
3797
3798
3799/**
3800 * Zeros a memory block with a 32-bit aligned size.
3801 *
3802 * @param pv Pointer to the memory block.
3803 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3804 */
3805#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3806DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3807#else
3808DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3809{
3810# if RT_INLINE_ASM_USES_INTRIN
3811 __stosd((unsigned long *)pv, 0, cb >> 2);
3812
3813# elif RT_INLINE_ASM_GNU_STYLE
3814 __asm__ __volatile__ ("rep stosl"
3815 : "=D" (pv),
3816 "=c" (cb)
3817 : "0" (pv),
3818 "1" (cb >> 2),
3819 "a" (0)
3820 : "memory");
3821# else
3822 __asm
3823 {
3824 xor eax, eax
3825# ifdef RT_ARCH_AMD64
3826 mov rcx, [cb]
3827 shr rcx, 2
3828 mov rdi, [pv]
3829# else
3830 mov ecx, [cb]
3831 shr ecx, 2
3832 mov edi, [pv]
3833# endif
3834 rep stosd
3835 }
3836# endif
3837}
3838#endif
3839
3840
3841/**
3842 * Fills a memory block with a 32-bit aligned size.
3843 *
3844 * @param pv Pointer to the memory block.
3845 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3846 * @param u32 The value to fill with.
3847 */
3848#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3849DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3850#else
3851DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3852{
3853# if RT_INLINE_ASM_USES_INTRIN
3854 __stosd((unsigned long *)pv, 0, cb >> 2);
3855
3856# elif RT_INLINE_ASM_GNU_STYLE
3857 __asm__ __volatile__ ("rep stosl"
3858 : "=D" (pv),
3859 "=c" (cb)
3860 : "0" (pv),
3861 "1" (cb >> 2),
3862 "a" (u32)
3863 : "memory");
3864# else
3865 __asm
3866 {
3867# ifdef RT_ARCH_AMD64
3868 mov rcx, [cb]
3869 shr rcx, 2
3870 mov rdi, [pv]
3871# else
3872 mov ecx, [cb]
3873 shr ecx, 2
3874 mov edi, [pv]
3875# endif
3876 mov eax, [u32]
3877 rep stosd
3878 }
3879# endif
3880}
3881#endif
3882
3883
3884/**
3885 * Checks if a memory block is filled with the specified byte.
3886 *
3887 * This is a sort of inverted memchr.
3888 *
3889 * @returns Pointer to the byte which doesn't equal u8.
3890 * @returns NULL if all equal to u8.
3891 *
3892 * @param pv Pointer to the memory block.
3893 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3894 * @param u8 The value it's supposed to be filled with.
3895 */
3896#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3897DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
3898#else
3899DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
3900{
3901/** @todo rewrite this in inline assembly? */
3902 uint8_t const *pb = (uint8_t const *)pv;
3903 for (; cb; cb--, pb++)
3904 if (RT_UNLIKELY(*pb != u8))
3905 return (void *)pb;
3906 return NULL;
3907}
3908#endif
3909
3910
3911/**
3912 * Checks if a memory block is filled with the specified 32-bit value.
3913 *
3914 * This is a sort of inverted memchr.
3915 *
3916 * @returns Pointer to the first value which doesn't equal u32.
3917 * @returns NULL if all equal to u32.
3918 *
3919 * @param pv Pointer to the memory block.
3920 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3921 * @param u32 The value it's supposed to be filled with.
3922 */
3923#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3924DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
3925#else
3926DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
3927{
3928/** @todo rewrite this in inline assembly? */
3929 uint32_t const *pu32 = (uint32_t const *)pv;
3930 for (; cb; cb -= 4, pu32++)
3931 if (RT_UNLIKELY(*pu32 != u32))
3932 return (uint32_t *)pu32;
3933 return NULL;
3934}
3935#endif
3936
3937
3938/**
3939 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
3940 *
3941 * @returns u32F1 * u32F2.
3942 */
3943#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3944DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
3945#else
3946DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
3947{
3948# ifdef RT_ARCH_AMD64
3949 return (uint64_t)u32F1 * u32F2;
3950# else /* !RT_ARCH_AMD64 */
3951 uint64_t u64;
3952# if RT_INLINE_ASM_GNU_STYLE
3953 __asm__ __volatile__("mull %%edx"
3954 : "=A" (u64)
3955 : "a" (u32F2), "d" (u32F1));
3956# else
3957 __asm
3958 {
3959 mov edx, [u32F1]
3960 mov eax, [u32F2]
3961 mul edx
3962 mov dword ptr [u64], eax
3963 mov dword ptr [u64 + 4], edx
3964 }
3965# endif
3966 return u64;
3967# endif /* !RT_ARCH_AMD64 */
3968}
3969#endif
3970
3971
3972/**
3973 * Multiplies two signed 32-bit values returning a signed 64-bit result.
3974 *
3975 * @returns u32F1 * u32F2.
3976 */
3977#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3978DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
3979#else
3980DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
3981{
3982# ifdef RT_ARCH_AMD64
3983 return (int64_t)i32F1 * i32F2;
3984# else /* !RT_ARCH_AMD64 */
3985 int64_t i64;
3986# if RT_INLINE_ASM_GNU_STYLE
3987 __asm__ __volatile__("imull %%edx"
3988 : "=A" (i64)
3989 : "a" (i32F2), "d" (i32F1));
3990# else
3991 __asm
3992 {
3993 mov edx, [i32F1]
3994 mov eax, [i32F2]
3995 imul edx
3996 mov dword ptr [i64], eax
3997 mov dword ptr [i64 + 4], edx
3998 }
3999# endif
4000 return i64;
4001# endif /* !RT_ARCH_AMD64 */
4002}
4003#endif
4004
4005
4006/**
4007 * Devides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
4008 *
4009 * @returns u64 / u32.
4010 */
4011#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4012DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
4013#else
4014DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
4015{
4016# ifdef RT_ARCH_AMD64
4017 return (uint32_t)(u64 / u32);
4018# else /* !RT_ARCH_AMD64 */
4019# if RT_INLINE_ASM_GNU_STYLE
4020 RTUINTREG uDummy;
4021 __asm__ __volatile__("divl %3"
4022 : "=a" (u32), "=d"(uDummy)
4023 : "A" (u64), "r" (u32));
4024# else
4025 __asm
4026 {
4027 mov eax, dword ptr [u64]
4028 mov edx, dword ptr [u64 + 4]
4029 mov ecx, [u32]
4030 div ecx
4031 mov [u32], eax
4032 }
4033# endif
4034 return u32;
4035# endif /* !RT_ARCH_AMD64 */
4036}
4037#endif
4038
4039
4040/**
4041 * Devides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
4042 *
4043 * @returns u64 / u32.
4044 */
4045#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4046DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
4047#else
4048DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
4049{
4050# ifdef RT_ARCH_AMD64
4051 return (int32_t)(i64 / i32);
4052# else /* !RT_ARCH_AMD64 */
4053# if RT_INLINE_ASM_GNU_STYLE
4054 RTUINTREG iDummy;
4055 __asm__ __volatile__("idivl %3"
4056 : "=a" (i32), "=d"(iDummy)
4057 : "A" (i64), "r" (i32));
4058# else
4059 __asm
4060 {
4061 mov eax, dword ptr [i64]
4062 mov edx, dword ptr [i64 + 4]
4063 mov ecx, [i32]
4064 idiv ecx
4065 mov [i32], eax
4066 }
4067# endif
4068 return i32;
4069# endif /* !RT_ARCH_AMD64 */
4070}
4071#endif
4072
4073
4074/**
4075 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
4076 * using a 96 bit intermediate result.
4077 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
4078 * __udivdi3 and __umoddi3 even if this inline function is not used.
4079 *
4080 * @returns (u64A * u32B) / u32C.
4081 * @param u64A The 64-bit value.
4082 * @param u32B The 32-bit value to multiple by A.
4083 * @param u32C The 32-bit value to divide A*B by.
4084 */
4085#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
4086DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
4087#else
4088DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
4089{
4090# if RT_INLINE_ASM_GNU_STYLE
4091# ifdef RT_ARCH_AMD64
4092 uint64_t u64Result, u64Spill;
4093 __asm__ __volatile__("mulq %2\n\t"
4094 "divq %3\n\t"
4095 : "=a" (u64Result),
4096 "=d" (u64Spill)
4097 : "r" ((uint64_t)u32B),
4098 "r" ((uint64_t)u32C),
4099 "0" (u64A),
4100 "1" (0));
4101 return u64Result;
4102# else
4103 uint32_t u32Dummy;
4104 uint64_t u64Result;
4105 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
4106 edx = u64Lo.hi = (u64A.lo * u32B).hi */
4107 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
4108 eax = u64A.hi */
4109 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
4110 edx = u32C */
4111 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
4112 edx = u32B */
4113 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
4114 edx = u64Hi.hi = (u64A.hi * u32B).hi */
4115 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
4116 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
4117 "divl %%ecx \n\t" /* eax = u64Hi / u32C
4118 edx = u64Hi % u32C */
4119 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
4120 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
4121 "divl %%ecx \n\t" /* u64Result.lo */
4122 "movl %%edi,%%edx \n\t" /* u64Result.hi */
4123 : "=A"(u64Result), "=c"(u32Dummy),
4124 "=S"(u32Dummy), "=D"(u32Dummy)
4125 : "a"((uint32_t)u64A),
4126 "S"((uint32_t)(u64A >> 32)),
4127 "c"(u32B),
4128 "D"(u32C));
4129 return u64Result;
4130# endif
4131# else
4132 RTUINT64U u;
4133 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
4134 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
4135 u64Hi += (u64Lo >> 32);
4136 u.s.Hi = (uint32_t)(u64Hi / u32C);
4137 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
4138 return u.u;
4139# endif
4140}
4141#endif
4142
4143
4144/**
4145 * Probes a byte pointer for read access.
4146 *
4147 * While the function will not fault if the byte is not read accessible,
4148 * the idea is to do this in a safe place like before acquiring locks
4149 * and such like.
4150 *
4151 * Also, this functions guarantees that an eager compiler is not going
4152 * to optimize the probing away.
4153 *
4154 * @param pvByte Pointer to the byte.
4155 */
4156#if RT_INLINE_ASM_EXTERNAL
4157DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
4158#else
4159DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
4160{
4161 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4162 uint8_t u8;
4163# if RT_INLINE_ASM_GNU_STYLE
4164 __asm__ __volatile__("movb (%1), %0\n\t"
4165 : "=r" (u8)
4166 : "r" (pvByte));
4167# else
4168 __asm
4169 {
4170# ifdef RT_ARCH_AMD64
4171 mov rax, [pvByte]
4172 mov al, [rax]
4173# else
4174 mov eax, [pvByte]
4175 mov al, [eax]
4176# endif
4177 mov [u8], al
4178 }
4179# endif
4180 return u8;
4181}
4182#endif
4183
4184/**
4185 * Probes a buffer for read access page by page.
4186 *
4187 * While the function will fault if the buffer is not fully read
4188 * accessible, the idea is to do this in a safe place like before
4189 * acquiring locks and such like.
4190 *
4191 * Also, this functions guarantees that an eager compiler is not going
4192 * to optimize the probing away.
4193 *
4194 * @param pvBuf Pointer to the buffer.
4195 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4196 */
4197DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
4198{
4199 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4200 /* the first byte */
4201 const uint8_t *pu8 = (const uint8_t *)pvBuf;
4202 ASMProbeReadByte(pu8);
4203
4204 /* the pages in between pages. */
4205 while (cbBuf > /*PAGE_SIZE*/0x1000)
4206 {
4207 ASMProbeReadByte(pu8);
4208 cbBuf -= /*PAGE_SIZE*/0x1000;
4209 pu8 += /*PAGE_SIZE*/0x1000;
4210 }
4211
4212 /* the last byte */
4213 ASMProbeReadByte(pu8 + cbBuf - 1);
4214}
4215
4216
4217/** @def ASMBreakpoint
4218 * Debugger Breakpoint.
4219 * @remark In the gnu world we add a nop instruction after the int3 to
4220 * force gdb to remain at the int3 source line.
4221 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
4222 * @internal
4223 */
4224#if RT_INLINE_ASM_GNU_STYLE
4225# ifndef __L4ENV__
4226# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
4227# else
4228# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
4229# endif
4230#else
4231# define ASMBreakpoint() __debugbreak()
4232#endif
4233
4234
4235
4236/** @defgroup grp_inline_bits Bit Operations
4237 * @{
4238 */
4239
4240
4241/**
4242 * Sets a bit in a bitmap.
4243 *
4244 * @param pvBitmap Pointer to the bitmap.
4245 * @param iBit The bit to set.
4246 */
4247#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4248DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
4249#else
4250DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
4251{
4252# if RT_INLINE_ASM_USES_INTRIN
4253 _bittestandset((long *)pvBitmap, iBit);
4254
4255# elif RT_INLINE_ASM_GNU_STYLE
4256 __asm__ __volatile__ ("btsl %1, %0"
4257 : "=m" (*(volatile long *)pvBitmap)
4258 : "Ir" (iBit)
4259 : "memory");
4260# else
4261 __asm
4262 {
4263# ifdef RT_ARCH_AMD64
4264 mov rax, [pvBitmap]
4265 mov edx, [iBit]
4266 bts [rax], edx
4267# else
4268 mov eax, [pvBitmap]
4269 mov edx, [iBit]
4270 bts [eax], edx
4271# endif
4272 }
4273# endif
4274}
4275#endif
4276
4277
4278/**
4279 * Atomically sets a bit in a bitmap, ordered.
4280 *
4281 * @param pvBitmap Pointer to the bitmap.
4282 * @param iBit The bit to set.
4283 */
4284#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4285DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
4286#else
4287DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
4288{
4289# if RT_INLINE_ASM_USES_INTRIN
4290 _interlockedbittestandset((long *)pvBitmap, iBit);
4291# elif RT_INLINE_ASM_GNU_STYLE
4292 __asm__ __volatile__ ("lock; btsl %1, %0"
4293 : "=m" (*(volatile long *)pvBitmap)
4294 : "Ir" (iBit)
4295 : "memory");
4296# else
4297 __asm
4298 {
4299# ifdef RT_ARCH_AMD64
4300 mov rax, [pvBitmap]
4301 mov edx, [iBit]
4302 lock bts [rax], edx
4303# else
4304 mov eax, [pvBitmap]
4305 mov edx, [iBit]
4306 lock bts [eax], edx
4307# endif
4308 }
4309# endif
4310}
4311#endif
4312
4313
4314/**
4315 * Clears a bit in a bitmap.
4316 *
4317 * @param pvBitmap Pointer to the bitmap.
4318 * @param iBit The bit to clear.
4319 */
4320#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4321DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
4322#else
4323DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
4324{
4325# if RT_INLINE_ASM_USES_INTRIN
4326 _bittestandreset((long *)pvBitmap, iBit);
4327
4328# elif RT_INLINE_ASM_GNU_STYLE
4329 __asm__ __volatile__ ("btrl %1, %0"
4330 : "=m" (*(volatile long *)pvBitmap)
4331 : "Ir" (iBit)
4332 : "memory");
4333# else
4334 __asm
4335 {
4336# ifdef RT_ARCH_AMD64
4337 mov rax, [pvBitmap]
4338 mov edx, [iBit]
4339 btr [rax], edx
4340# else
4341 mov eax, [pvBitmap]
4342 mov edx, [iBit]
4343 btr [eax], edx
4344# endif
4345 }
4346# endif
4347}
4348#endif
4349
4350
4351/**
4352 * Atomically clears a bit in a bitmap, ordered.
4353 *
4354 * @param pvBitmap Pointer to the bitmap.
4355 * @param iBit The bit to toggle set.
4356 * @remark No memory barrier, take care on smp.
4357 */
4358#if RT_INLINE_ASM_EXTERNAL
4359DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
4360#else
4361DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
4362{
4363# if RT_INLINE_ASM_GNU_STYLE
4364 __asm__ __volatile__ ("lock; btrl %1, %0"
4365 : "=m" (*(volatile long *)pvBitmap)
4366 : "Ir" (iBit)
4367 : "memory");
4368# else
4369 __asm
4370 {
4371# ifdef RT_ARCH_AMD64
4372 mov rax, [pvBitmap]
4373 mov edx, [iBit]
4374 lock btr [rax], edx
4375# else
4376 mov eax, [pvBitmap]
4377 mov edx, [iBit]
4378 lock btr [eax], edx
4379# endif
4380 }
4381# endif
4382}
4383#endif
4384
4385
4386/**
4387 * Toggles a bit in a bitmap.
4388 *
4389 * @param pvBitmap Pointer to the bitmap.
4390 * @param iBit The bit to toggle.
4391 */
4392#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4393DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
4394#else
4395DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
4396{
4397# if RT_INLINE_ASM_USES_INTRIN
4398 _bittestandcomplement((long *)pvBitmap, iBit);
4399# elif RT_INLINE_ASM_GNU_STYLE
4400 __asm__ __volatile__ ("btcl %1, %0"
4401 : "=m" (*(volatile long *)pvBitmap)
4402 : "Ir" (iBit)
4403 : "memory");
4404# else
4405 __asm
4406 {
4407# ifdef RT_ARCH_AMD64
4408 mov rax, [pvBitmap]
4409 mov edx, [iBit]
4410 btc [rax], edx
4411# else
4412 mov eax, [pvBitmap]
4413 mov edx, [iBit]
4414 btc [eax], edx
4415# endif
4416 }
4417# endif
4418}
4419#endif
4420
4421
4422/**
4423 * Atomically toggles a bit in a bitmap, ordered.
4424 *
4425 * @param pvBitmap Pointer to the bitmap.
4426 * @param iBit The bit to test and set.
4427 */
4428#if RT_INLINE_ASM_EXTERNAL
4429DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
4430#else
4431DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
4432{
4433# if RT_INLINE_ASM_GNU_STYLE
4434 __asm__ __volatile__ ("lock; btcl %1, %0"
4435 : "=m" (*(volatile long *)pvBitmap)
4436 : "Ir" (iBit)
4437 : "memory");
4438# else
4439 __asm
4440 {
4441# ifdef RT_ARCH_AMD64
4442 mov rax, [pvBitmap]
4443 mov edx, [iBit]
4444 lock btc [rax], edx
4445# else
4446 mov eax, [pvBitmap]
4447 mov edx, [iBit]
4448 lock btc [eax], edx
4449# endif
4450 }
4451# endif
4452}
4453#endif
4454
4455
4456/**
4457 * Tests and sets a bit in a bitmap.
4458 *
4459 * @returns true if the bit was set.
4460 * @returns false if the bit was clear.
4461 * @param pvBitmap Pointer to the bitmap.
4462 * @param iBit The bit to test and set.
4463 */
4464#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4465DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4466#else
4467DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4468{
4469 union { bool f; uint32_t u32; uint8_t u8; } rc;
4470# if RT_INLINE_ASM_USES_INTRIN
4471 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
4472
4473# elif RT_INLINE_ASM_GNU_STYLE
4474 __asm__ __volatile__ ("btsl %2, %1\n\t"
4475 "setc %b0\n\t"
4476 "andl $1, %0\n\t"
4477 : "=q" (rc.u32),
4478 "=m" (*(volatile long *)pvBitmap)
4479 : "Ir" (iBit)
4480 : "memory");
4481# else
4482 __asm
4483 {
4484 mov edx, [iBit]
4485# ifdef RT_ARCH_AMD64
4486 mov rax, [pvBitmap]
4487 bts [rax], edx
4488# else
4489 mov eax, [pvBitmap]
4490 bts [eax], edx
4491# endif
4492 setc al
4493 and eax, 1
4494 mov [rc.u32], eax
4495 }
4496# endif
4497 return rc.f;
4498}
4499#endif
4500
4501
4502/**
4503 * Atomically tests and sets a bit in a bitmap, ordered.
4504 *
4505 * @returns true if the bit was set.
4506 * @returns false if the bit was clear.
4507 * @param pvBitmap Pointer to the bitmap.
4508 * @param iBit The bit to set.
4509 */
4510#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4511DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4512#else
4513DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4514{
4515 union { bool f; uint32_t u32; uint8_t u8; } rc;
4516# if RT_INLINE_ASM_USES_INTRIN
4517 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
4518# elif RT_INLINE_ASM_GNU_STYLE
4519 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
4520 "setc %b0\n\t"
4521 "andl $1, %0\n\t"
4522 : "=q" (rc.u32),
4523 "=m" (*(volatile long *)pvBitmap)
4524 : "Ir" (iBit)
4525 : "memory");
4526# else
4527 __asm
4528 {
4529 mov edx, [iBit]
4530# ifdef RT_ARCH_AMD64
4531 mov rax, [pvBitmap]
4532 lock bts [rax], edx
4533# else
4534 mov eax, [pvBitmap]
4535 lock bts [eax], edx
4536# endif
4537 setc al
4538 and eax, 1
4539 mov [rc.u32], eax
4540 }
4541# endif
4542 return rc.f;
4543}
4544#endif
4545
4546
4547/**
4548 * Tests and clears a bit in a bitmap.
4549 *
4550 * @returns true if the bit was set.
4551 * @returns false if the bit was clear.
4552 * @param pvBitmap Pointer to the bitmap.
4553 * @param iBit The bit to test and clear.
4554 */
4555#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4556DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4557#else
4558DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4559{
4560 union { bool f; uint32_t u32; uint8_t u8; } rc;
4561# if RT_INLINE_ASM_USES_INTRIN
4562 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
4563
4564# elif RT_INLINE_ASM_GNU_STYLE
4565 __asm__ __volatile__ ("btrl %2, %1\n\t"
4566 "setc %b0\n\t"
4567 "andl $1, %0\n\t"
4568 : "=q" (rc.u32),
4569 "=m" (*(volatile long *)pvBitmap)
4570 : "Ir" (iBit)
4571 : "memory");
4572# else
4573 __asm
4574 {
4575 mov edx, [iBit]
4576# ifdef RT_ARCH_AMD64
4577 mov rax, [pvBitmap]
4578 btr [rax], edx
4579# else
4580 mov eax, [pvBitmap]
4581 btr [eax], edx
4582# endif
4583 setc al
4584 and eax, 1
4585 mov [rc.u32], eax
4586 }
4587# endif
4588 return rc.f;
4589}
4590#endif
4591
4592
4593/**
4594 * Atomically tests and clears a bit in a bitmap, ordered.
4595 *
4596 * @returns true if the bit was set.
4597 * @returns false if the bit was clear.
4598 * @param pvBitmap Pointer to the bitmap.
4599 * @param iBit The bit to test and clear.
4600 * @remark No memory barrier, take care on smp.
4601 */
4602#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4603DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4604#else
4605DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4606{
4607 union { bool f; uint32_t u32; uint8_t u8; } rc;
4608# if RT_INLINE_ASM_USES_INTRIN
4609 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
4610
4611# elif RT_INLINE_ASM_GNU_STYLE
4612 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
4613 "setc %b0\n\t"
4614 "andl $1, %0\n\t"
4615 : "=q" (rc.u32),
4616 "=m" (*(volatile long *)pvBitmap)
4617 : "Ir" (iBit)
4618 : "memory");
4619# else
4620 __asm
4621 {
4622 mov edx, [iBit]
4623# ifdef RT_ARCH_AMD64
4624 mov rax, [pvBitmap]
4625 lock btr [rax], edx
4626# else
4627 mov eax, [pvBitmap]
4628 lock btr [eax], edx
4629# endif
4630 setc al
4631 and eax, 1
4632 mov [rc.u32], eax
4633 }
4634# endif
4635 return rc.f;
4636}
4637#endif
4638
4639
4640/**
4641 * Tests and toggles a bit in a bitmap.
4642 *
4643 * @returns true if the bit was set.
4644 * @returns false if the bit was clear.
4645 * @param pvBitmap Pointer to the bitmap.
4646 * @param iBit The bit to test and toggle.
4647 */
4648#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4649DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4650#else
4651DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4652{
4653 union { bool f; uint32_t u32; uint8_t u8; } rc;
4654# if RT_INLINE_ASM_USES_INTRIN
4655 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
4656
4657# elif RT_INLINE_ASM_GNU_STYLE
4658 __asm__ __volatile__ ("btcl %2, %1\n\t"
4659 "setc %b0\n\t"
4660 "andl $1, %0\n\t"
4661 : "=q" (rc.u32),
4662 "=m" (*(volatile long *)pvBitmap)
4663 : "Ir" (iBit)
4664 : "memory");
4665# else
4666 __asm
4667 {
4668 mov edx, [iBit]
4669# ifdef RT_ARCH_AMD64
4670 mov rax, [pvBitmap]
4671 btc [rax], edx
4672# else
4673 mov eax, [pvBitmap]
4674 btc [eax], edx
4675# endif
4676 setc al
4677 and eax, 1
4678 mov [rc.u32], eax
4679 }
4680# endif
4681 return rc.f;
4682}
4683#endif
4684
4685
4686/**
4687 * Atomically tests and toggles a bit in a bitmap, ordered.
4688 *
4689 * @returns true if the bit was set.
4690 * @returns false if the bit was clear.
4691 * @param pvBitmap Pointer to the bitmap.
4692 * @param iBit The bit to test and toggle.
4693 */
4694#if RT_INLINE_ASM_EXTERNAL
4695DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4696#else
4697DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4698{
4699 union { bool f; uint32_t u32; uint8_t u8; } rc;
4700# if RT_INLINE_ASM_GNU_STYLE
4701 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
4702 "setc %b0\n\t"
4703 "andl $1, %0\n\t"
4704 : "=q" (rc.u32),
4705 "=m" (*(volatile long *)pvBitmap)
4706 : "Ir" (iBit)
4707 : "memory");
4708# else
4709 __asm
4710 {
4711 mov edx, [iBit]
4712# ifdef RT_ARCH_AMD64
4713 mov rax, [pvBitmap]
4714 lock btc [rax], edx
4715# else
4716 mov eax, [pvBitmap]
4717 lock btc [eax], edx
4718# endif
4719 setc al
4720 and eax, 1
4721 mov [rc.u32], eax
4722 }
4723# endif
4724 return rc.f;
4725}
4726#endif
4727
4728
4729/**
4730 * Tests if a bit in a bitmap is set.
4731 *
4732 * @returns true if the bit is set.
4733 * @returns false if the bit is clear.
4734 * @param pvBitmap Pointer to the bitmap.
4735 * @param iBit The bit to test.
4736 */
4737#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4738DECLASM(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit);
4739#else
4740DECLINLINE(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit)
4741{
4742 union { bool f; uint32_t u32; uint8_t u8; } rc;
4743# if RT_INLINE_ASM_USES_INTRIN
4744 rc.u32 = _bittest((long *)pvBitmap, iBit);
4745# elif RT_INLINE_ASM_GNU_STYLE
4746
4747 __asm__ __volatile__ ("btl %2, %1\n\t"
4748 "setc %b0\n\t"
4749 "andl $1, %0\n\t"
4750 : "=q" (rc.u32),
4751 "=m" (*(volatile long *)pvBitmap)
4752 : "Ir" (iBit)
4753 : "memory");
4754# else
4755 __asm
4756 {
4757 mov edx, [iBit]
4758# ifdef RT_ARCH_AMD64
4759 mov rax, [pvBitmap]
4760 bt [rax], edx
4761# else
4762 mov eax, [pvBitmap]
4763 bt [eax], edx
4764# endif
4765 setc al
4766 and eax, 1
4767 mov [rc.u32], eax
4768 }
4769# endif
4770 return rc.f;
4771}
4772#endif
4773
4774
4775/**
4776 * Clears a bit range within a bitmap.
4777 *
4778 * @param pvBitmap Pointer to the bitmap.
4779 * @param iBitStart The First bit to clear.
4780 * @param iBitEnd The first bit not to clear.
4781 */
4782DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4783{
4784 if (iBitStart < iBitEnd)
4785 {
4786 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4787 int iStart = iBitStart & ~31;
4788 int iEnd = iBitEnd & ~31;
4789 if (iStart == iEnd)
4790 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
4791 else
4792 {
4793 /* bits in first dword. */
4794 if (iBitStart & 31)
4795 {
4796 *pu32 &= (1 << (iBitStart & 31)) - 1;
4797 pu32++;
4798 iBitStart = iStart + 32;
4799 }
4800
4801 /* whole dword. */
4802 if (iBitStart != iEnd)
4803 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4804
4805 /* bits in last dword. */
4806 if (iBitEnd & 31)
4807 {
4808 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4809 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
4810 }
4811 }
4812 }
4813}
4814
4815
4816/**
4817 * Finds the first clear bit in a bitmap.
4818 *
4819 * @returns Index of the first zero bit.
4820 * @returns -1 if no clear bit was found.
4821 * @param pvBitmap Pointer to the bitmap.
4822 * @param cBits The number of bits in the bitmap. Multiple of 32.
4823 */
4824#if RT_INLINE_ASM_EXTERNAL
4825DECLASM(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits);
4826#else
4827DECLINLINE(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits)
4828{
4829 if (cBits)
4830 {
4831 int32_t iBit;
4832# if RT_INLINE_ASM_GNU_STYLE
4833 RTCCUINTREG uEAX, uECX, uEDI;
4834 cBits = RT_ALIGN_32(cBits, 32);
4835 __asm__ __volatile__("repe; scasl\n\t"
4836 "je 1f\n\t"
4837# ifdef RT_ARCH_AMD64
4838 "lea -4(%%rdi), %%rdi\n\t"
4839 "xorl (%%rdi), %%eax\n\t"
4840 "subq %5, %%rdi\n\t"
4841# else
4842 "lea -4(%%edi), %%edi\n\t"
4843 "xorl (%%edi), %%eax\n\t"
4844 "subl %5, %%edi\n\t"
4845# endif
4846 "shll $3, %%edi\n\t"
4847 "bsfl %%eax, %%edx\n\t"
4848 "addl %%edi, %%edx\n\t"
4849 "1:\t\n"
4850 : "=d" (iBit),
4851 "=&c" (uECX),
4852 "=&D" (uEDI),
4853 "=&a" (uEAX)
4854 : "0" (0xffffffff),
4855 "mr" (pvBitmap),
4856 "1" (cBits >> 5),
4857 "2" (pvBitmap),
4858 "3" (0xffffffff));
4859# else
4860 cBits = RT_ALIGN_32(cBits, 32);
4861 __asm
4862 {
4863# ifdef RT_ARCH_AMD64
4864 mov rdi, [pvBitmap]
4865 mov rbx, rdi
4866# else
4867 mov edi, [pvBitmap]
4868 mov ebx, edi
4869# endif
4870 mov edx, 0ffffffffh
4871 mov eax, edx
4872 mov ecx, [cBits]
4873 shr ecx, 5
4874 repe scasd
4875 je done
4876
4877# ifdef RT_ARCH_AMD64
4878 lea rdi, [rdi - 4]
4879 xor eax, [rdi]
4880 sub rdi, rbx
4881# else
4882 lea edi, [edi - 4]
4883 xor eax, [edi]
4884 sub edi, ebx
4885# endif
4886 shl edi, 3
4887 bsf edx, eax
4888 add edx, edi
4889 done:
4890 mov [iBit], edx
4891 }
4892# endif
4893 return iBit;
4894 }
4895 return -1;
4896}
4897#endif
4898
4899
4900/**
4901 * Finds the next clear bit in a bitmap.
4902 *
4903 * @returns Index of the first zero bit.
4904 * @returns -1 if no clear bit was found.
4905 * @param pvBitmap Pointer to the bitmap.
4906 * @param cBits The number of bits in the bitmap. Multiple of 32.
4907 * @param iBitPrev The bit returned from the last search.
4908 * The search will start at iBitPrev + 1.
4909 */
4910#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4911DECLASM(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4912#else
4913DECLINLINE(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4914{
4915 int iBit = ++iBitPrev & 31;
4916 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
4917 cBits -= iBitPrev & ~31;
4918 if (iBit)
4919 {
4920 /* inspect the first dword. */
4921 uint32_t u32 = (~*(volatile uint32_t *)pvBitmap) >> iBit;
4922# if RT_INLINE_ASM_USES_INTRIN
4923 unsigned long ulBit = 0;
4924 if (_BitScanForward(&ulBit, u32))
4925 return ulBit + iBitPrev;
4926 iBit = -1;
4927# else
4928# if RT_INLINE_ASM_GNU_STYLE
4929 __asm__ __volatile__("bsf %1, %0\n\t"
4930 "jnz 1f\n\t"
4931 "movl $-1, %0\n\t"
4932 "1:\n\t"
4933 : "=r" (iBit)
4934 : "r" (u32));
4935# else
4936 __asm
4937 {
4938 mov edx, [u32]
4939 bsf eax, edx
4940 jnz done
4941 mov eax, 0ffffffffh
4942 done:
4943 mov [iBit], eax
4944 }
4945# endif
4946 if (iBit >= 0)
4947 return iBit + iBitPrev;
4948# endif
4949 /* Search the rest of the bitmap, if there is anything. */
4950 if (cBits > 32)
4951 {
4952 iBit = ASMBitFirstClear((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
4953 if (iBit >= 0)
4954 return iBit + (iBitPrev & ~31) + 32;
4955 }
4956 }
4957 else
4958 {
4959 /* Search the rest of the bitmap. */
4960 iBit = ASMBitFirstClear(pvBitmap, cBits);
4961 if (iBit >= 0)
4962 return iBit + (iBitPrev & ~31);
4963 }
4964 return iBit;
4965}
4966#endif
4967
4968
4969/**
4970 * Finds the first set bit in a bitmap.
4971 *
4972 * @returns Index of the first set bit.
4973 * @returns -1 if no clear bit was found.
4974 * @param pvBitmap Pointer to the bitmap.
4975 * @param cBits The number of bits in the bitmap. Multiple of 32.
4976 */
4977#if RT_INLINE_ASM_EXTERNAL
4978DECLASM(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits);
4979#else
4980DECLINLINE(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits)
4981{
4982 if (cBits)
4983 {
4984 int32_t iBit;
4985# if RT_INLINE_ASM_GNU_STYLE
4986 RTCCUINTREG uEAX, uECX, uEDI;
4987 cBits = RT_ALIGN_32(cBits, 32);
4988 __asm__ __volatile__("repe; scasl\n\t"
4989 "je 1f\n\t"
4990# ifdef RT_ARCH_AMD64
4991 "lea -4(%%rdi), %%rdi\n\t"
4992 "movl (%%rdi), %%eax\n\t"
4993 "subq %5, %%rdi\n\t"
4994# else
4995 "lea -4(%%edi), %%edi\n\t"
4996 "movl (%%edi), %%eax\n\t"
4997 "subl %5, %%edi\n\t"
4998# endif
4999 "shll $3, %%edi\n\t"
5000 "bsfl %%eax, %%edx\n\t"
5001 "addl %%edi, %%edx\n\t"
5002 "1:\t\n"
5003 : "=d" (iBit),
5004 "=&c" (uECX),
5005 "=&D" (uEDI),
5006 "=&a" (uEAX)
5007 : "0" (0xffffffff),
5008 "mr" (pvBitmap),
5009 "1" (cBits >> 5),
5010 "2" (pvBitmap),
5011 "3" (0));
5012# else
5013 cBits = RT_ALIGN_32(cBits, 32);
5014 __asm
5015 {
5016# ifdef RT_ARCH_AMD64
5017 mov rdi, [pvBitmap]
5018 mov rbx, rdi
5019# else
5020 mov edi, [pvBitmap]
5021 mov ebx, edi
5022# endif
5023 mov edx, 0ffffffffh
5024 xor eax, eax
5025 mov ecx, [cBits]
5026 shr ecx, 5
5027 repe scasd
5028 je done
5029# ifdef RT_ARCH_AMD64
5030 lea rdi, [rdi - 4]
5031 mov eax, [rdi]
5032 sub rdi, rbx
5033# else
5034 lea edi, [edi - 4]
5035 mov eax, [edi]
5036 sub edi, ebx
5037# endif
5038 shl edi, 3
5039 bsf edx, eax
5040 add edx, edi
5041 done:
5042 mov [iBit], edx
5043 }
5044# endif
5045 return iBit;
5046 }
5047 return -1;
5048}
5049#endif
5050
5051
5052/**
5053 * Finds the next set bit in a bitmap.
5054 *
5055 * @returns Index of the next set bit.
5056 * @returns -1 if no set bit was found.
5057 * @param pvBitmap Pointer to the bitmap.
5058 * @param cBits The number of bits in the bitmap. Multiple of 32.
5059 * @param iBitPrev The bit returned from the last search.
5060 * The search will start at iBitPrev + 1.
5061 */
5062#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5063DECLASM(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5064#else
5065DECLINLINE(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5066{
5067 int iBit = ++iBitPrev & 31;
5068 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
5069 cBits -= iBitPrev & ~31;
5070 if (iBit)
5071 {
5072 /* inspect the first dword. */
5073 uint32_t u32 = *(volatile uint32_t *)pvBitmap >> iBit;
5074# if RT_INLINE_ASM_USES_INTRIN
5075 unsigned long ulBit = 0;
5076 if (_BitScanForward(&ulBit, u32))
5077 return ulBit + iBitPrev;
5078 iBit = -1;
5079# else
5080# if RT_INLINE_ASM_GNU_STYLE
5081 __asm__ __volatile__("bsf %1, %0\n\t"
5082 "jnz 1f\n\t"
5083 "movl $-1, %0\n\t"
5084 "1:\n\t"
5085 : "=r" (iBit)
5086 : "r" (u32));
5087# else
5088 __asm
5089 {
5090 mov edx, u32
5091 bsf eax, edx
5092 jnz done
5093 mov eax, 0ffffffffh
5094 done:
5095 mov [iBit], eax
5096 }
5097# endif
5098 if (iBit >= 0)
5099 return iBit + iBitPrev;
5100# endif
5101 /* Search the rest of the bitmap, if there is anything. */
5102 if (cBits > 32)
5103 {
5104 iBit = ASMBitFirstSet((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
5105 if (iBit >= 0)
5106 return iBit + (iBitPrev & ~31) + 32;
5107 }
5108
5109 }
5110 else
5111 {
5112 /* Search the rest of the bitmap. */
5113 iBit = ASMBitFirstSet(pvBitmap, cBits);
5114 if (iBit >= 0)
5115 return iBit + (iBitPrev & ~31);
5116 }
5117 return iBit;
5118}
5119#endif
5120
5121
5122/**
5123 * Finds the first bit which is set in the given 32-bit integer.
5124 * Bits are numbered from 1 (least significant) to 32.
5125 *
5126 * @returns index [1..32] of the first set bit.
5127 * @returns 0 if all bits are cleared.
5128 * @param u32 Integer to search for set bits.
5129 * @remark Similar to ffs() in BSD.
5130 */
5131DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5132{
5133# if RT_INLINE_ASM_USES_INTRIN
5134 unsigned long iBit;
5135 if (_BitScanForward(&iBit, u32))
5136 iBit++;
5137 else
5138 iBit = 0;
5139# elif RT_INLINE_ASM_GNU_STYLE
5140 uint32_t iBit;
5141 __asm__ __volatile__("bsf %1, %0\n\t"
5142 "jnz 1f\n\t"
5143 "xorl %0, %0\n\t"
5144 "jmp 2f\n"
5145 "1:\n\t"
5146 "incl %0\n"
5147 "2:\n\t"
5148 : "=r" (iBit)
5149 : "rm" (u32));
5150# else
5151 uint32_t iBit;
5152 _asm
5153 {
5154 bsf eax, [u32]
5155 jnz found
5156 xor eax, eax
5157 jmp done
5158 found:
5159 inc eax
5160 done:
5161 mov [iBit], eax
5162 }
5163# endif
5164 return iBit;
5165}
5166
5167
5168/**
5169 * Finds the first bit which is set in the given 32-bit integer.
5170 * Bits are numbered from 1 (least significant) to 32.
5171 *
5172 * @returns index [1..32] of the first set bit.
5173 * @returns 0 if all bits are cleared.
5174 * @param i32 Integer to search for set bits.
5175 * @remark Similar to ffs() in BSD.
5176 */
5177DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5178{
5179 return ASMBitFirstSetU32((uint32_t)i32);
5180}
5181
5182
5183/**
5184 * Finds the last bit which is set in the given 32-bit integer.
5185 * Bits are numbered from 1 (least significant) to 32.
5186 *
5187 * @returns index [1..32] of the last set bit.
5188 * @returns 0 if all bits are cleared.
5189 * @param u32 Integer to search for set bits.
5190 * @remark Similar to fls() in BSD.
5191 */
5192DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5193{
5194# if RT_INLINE_ASM_USES_INTRIN
5195 unsigned long iBit;
5196 if (_BitScanReverse(&iBit, u32))
5197 iBit++;
5198 else
5199 iBit = 0;
5200# elif RT_INLINE_ASM_GNU_STYLE
5201 uint32_t iBit;
5202 __asm__ __volatile__("bsrl %1, %0\n\t"
5203 "jnz 1f\n\t"
5204 "xorl %0, %0\n\t"
5205 "jmp 2f\n"
5206 "1:\n\t"
5207 "incl %0\n"
5208 "2:\n\t"
5209 : "=r" (iBit)
5210 : "rm" (u32));
5211# else
5212 uint32_t iBit;
5213 _asm
5214 {
5215 bsr eax, [u32]
5216 jnz found
5217 xor eax, eax
5218 jmp done
5219 found:
5220 inc eax
5221 done:
5222 mov [iBit], eax
5223 }
5224# endif
5225 return iBit;
5226}
5227
5228
5229/**
5230 * Finds the last bit which is set in the given 32-bit integer.
5231 * Bits are numbered from 1 (least significant) to 32.
5232 *
5233 * @returns index [1..32] of the last set bit.
5234 * @returns 0 if all bits are cleared.
5235 * @param i32 Integer to search for set bits.
5236 * @remark Similar to fls() in BSD.
5237 */
5238DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5239{
5240 return ASMBitLastSetS32((uint32_t)i32);
5241}
5242
5243
5244/**
5245 * Reverse the byte order of the given 32-bit integer.
5246 * @param u32 Integer
5247 */
5248DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5249{
5250#if RT_INLINE_ASM_USES_INTRIN
5251 u32 = _byteswap_ulong(u32);
5252#elif RT_INLINE_ASM_GNU_STYLE
5253 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5254#else
5255 _asm
5256 {
5257 mov eax, [u32]
5258 bswap eax
5259 mov [u32], eax
5260 }
5261#endif
5262 return u32;
5263}
5264
5265/** @} */
5266
5267
5268/** @} */
5269#endif
5270
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette