VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 7637

Last change on this file since 7637 was 7637, checked in by vboxsync, 17 years ago

cmpxchgl may change eax on failure, so tell the compiler that it output as well as input. (This was the cause of the ATA thread problems (exited because of timeout from rtSemEventWait).)

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 132.4 KB
Line 
1/** @file
2 * innotek Portable Runtime - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 innotek GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @todo #include <iprt/param.h> for PAGE_SIZE. */
33/** @def RT_INLINE_ASM_USES_INTRIN
34 * Defined as 1 if we're using a _MSC_VER 1400.
35 * Otherwise defined as 0.
36 */
37
38#ifdef _MSC_VER
39# if _MSC_VER >= 1400
40# define RT_INLINE_ASM_USES_INTRIN 1
41# include <intrin.h>
42 /* Emit the intrinsics at all optimization levels. */
43# pragma intrinsic(_ReadWriteBarrier)
44# pragma intrinsic(__cpuid)
45# pragma intrinsic(_enable)
46# pragma intrinsic(_disable)
47# pragma intrinsic(__rdtsc)
48# pragma intrinsic(__readmsr)
49# pragma intrinsic(__writemsr)
50# pragma intrinsic(__outbyte)
51# pragma intrinsic(__outword)
52# pragma intrinsic(__outdword)
53# pragma intrinsic(__inbyte)
54# pragma intrinsic(__inword)
55# pragma intrinsic(__indword)
56# pragma intrinsic(__invlpg)
57# pragma intrinsic(__stosd)
58# pragma intrinsic(__stosw)
59# pragma intrinsic(__stosb)
60# pragma intrinsic(__readcr0)
61# pragma intrinsic(__readcr2)
62# pragma intrinsic(__readcr3)
63# pragma intrinsic(__readcr4)
64# pragma intrinsic(__writecr0)
65# pragma intrinsic(__writecr3)
66# pragma intrinsic(__writecr4)
67# pragma intrinsic(_BitScanForward)
68# pragma intrinsic(_BitScanReverse)
69# pragma intrinsic(_bittest)
70# pragma intrinsic(_bittestandset)
71# pragma intrinsic(_bittestandreset)
72# pragma intrinsic(_bittestandcomplement)
73# pragma intrinsic(_byteswap_ushort)
74# pragma intrinsic(_byteswap_ulong)
75# pragma intrinsic(_interlockedbittestandset)
76# pragma intrinsic(_interlockedbittestandreset)
77# pragma intrinsic(_InterlockedAnd)
78# pragma intrinsic(_InterlockedOr)
79# pragma intrinsic(_InterlockedIncrement)
80# pragma intrinsic(_InterlockedDecrement)
81# pragma intrinsic(_InterlockedExchange)
82# pragma intrinsic(_InterlockedExchangeAdd)
83# pragma intrinsic(_InterlockedCompareExchange)
84# pragma intrinsic(_InterlockedCompareExchange64)
85# ifdef RT_ARCH_AMD64
86# pragma intrinsic(__stosq)
87# pragma intrinsic(__readcr8)
88# pragma intrinsic(__writecr8)
89# pragma intrinsic(_byteswap_uint64)
90# pragma intrinsic(_InterlockedExchange64)
91# endif
92# endif
93#endif
94#ifndef RT_INLINE_ASM_USES_INTRIN
95# define RT_INLINE_ASM_USES_INTRIN 0
96#endif
97
98
99
100/** @defgroup grp_asm ASM - Assembly Routines
101 * @ingroup grp_rt
102 *
103 * @remarks The difference between ordered and unordered atomic operations are that
104 * the former will complete outstanding reads and writes before continuing
105 * while the latter doesn't make any promisses about the order. Ordered
106 * operations doesn't, it seems, make any 100% promise wrt to whether
107 * the operation will complete before any subsequent memory access.
108 * (please, correct if wrong.)
109 *
110 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
111 * are unordered (note the Uo).
112 *
113 * @{
114 */
115
116/** @def RT_INLINE_ASM_EXTERNAL
117 * Defined as 1 if the compiler does not support inline assembly.
118 * The ASM* functions will then be implemented in an external .asm file.
119 *
120 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
121 * inline assmebly in their AMD64 compiler.
122 */
123#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
124# define RT_INLINE_ASM_EXTERNAL 1
125#else
126# define RT_INLINE_ASM_EXTERNAL 0
127#endif
128
129/** @def RT_INLINE_ASM_GNU_STYLE
130 * Defined as 1 if the compiler understand GNU style inline assembly.
131 */
132#if defined(_MSC_VER)
133# define RT_INLINE_ASM_GNU_STYLE 0
134#else
135# define RT_INLINE_ASM_GNU_STYLE 1
136#endif
137
138
139/** @todo find a more proper place for this structure? */
140#pragma pack(1)
141/** IDTR */
142typedef struct RTIDTR
143{
144 /** Size of the IDT. */
145 uint16_t cbIdt;
146 /** Address of the IDT. */
147 uintptr_t pIdt;
148} RTIDTR, *PRTIDTR;
149#pragma pack()
150
151#pragma pack(1)
152/** GDTR */
153typedef struct RTGDTR
154{
155 /** Size of the GDT. */
156 uint16_t cbGdt;
157 /** Address of the GDT. */
158 uintptr_t pGdt;
159} RTGDTR, *PRTGDTR;
160#pragma pack()
161
162
163/** @def ASMReturnAddress
164 * Gets the return address of the current (or calling if you like) function or method.
165 */
166#ifdef _MSC_VER
167# ifdef __cplusplus
168extern "C"
169# endif
170void * _ReturnAddress(void);
171# pragma intrinsic(_ReturnAddress)
172# define ASMReturnAddress() _ReturnAddress()
173#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
174# define ASMReturnAddress() __builtin_return_address(0)
175#else
176# error "Unsupported compiler."
177#endif
178
179
180/**
181 * Gets the content of the IDTR CPU register.
182 * @param pIdtr Where to store the IDTR contents.
183 */
184#if RT_INLINE_ASM_EXTERNAL
185DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
186#else
187DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
188{
189# if RT_INLINE_ASM_GNU_STYLE
190 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
191# else
192 __asm
193 {
194# ifdef RT_ARCH_AMD64
195 mov rax, [pIdtr]
196 sidt [rax]
197# else
198 mov eax, [pIdtr]
199 sidt [eax]
200# endif
201 }
202# endif
203}
204#endif
205
206
207/**
208 * Sets the content of the IDTR CPU register.
209 * @param pIdtr Where to load the IDTR contents from
210 */
211#if RT_INLINE_ASM_EXTERNAL
212DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
213#else
214DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
215{
216# if RT_INLINE_ASM_GNU_STYLE
217 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
218# else
219 __asm
220 {
221# ifdef RT_ARCH_AMD64
222 mov rax, [pIdtr]
223 lidt [rax]
224# else
225 mov eax, [pIdtr]
226 lidt [eax]
227# endif
228 }
229# endif
230}
231#endif
232
233
234/**
235 * Gets the content of the GDTR CPU register.
236 * @param pGdtr Where to store the GDTR contents.
237 */
238#if RT_INLINE_ASM_EXTERNAL
239DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
240#else
241DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
242{
243# if RT_INLINE_ASM_GNU_STYLE
244 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
245# else
246 __asm
247 {
248# ifdef RT_ARCH_AMD64
249 mov rax, [pGdtr]
250 sgdt [rax]
251# else
252 mov eax, [pGdtr]
253 sgdt [eax]
254# endif
255 }
256# endif
257}
258#endif
259
260/**
261 * Get the cs register.
262 * @returns cs.
263 */
264#if RT_INLINE_ASM_EXTERNAL
265DECLASM(RTSEL) ASMGetCS(void);
266#else
267DECLINLINE(RTSEL) ASMGetCS(void)
268{
269 RTSEL SelCS;
270# if RT_INLINE_ASM_GNU_STYLE
271 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
272# else
273 __asm
274 {
275 mov ax, cs
276 mov [SelCS], ax
277 }
278# endif
279 return SelCS;
280}
281#endif
282
283
284/**
285 * Get the DS register.
286 * @returns DS.
287 */
288#if RT_INLINE_ASM_EXTERNAL
289DECLASM(RTSEL) ASMGetDS(void);
290#else
291DECLINLINE(RTSEL) ASMGetDS(void)
292{
293 RTSEL SelDS;
294# if RT_INLINE_ASM_GNU_STYLE
295 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
296# else
297 __asm
298 {
299 mov ax, ds
300 mov [SelDS], ax
301 }
302# endif
303 return SelDS;
304}
305#endif
306
307
308/**
309 * Get the ES register.
310 * @returns ES.
311 */
312#if RT_INLINE_ASM_EXTERNAL
313DECLASM(RTSEL) ASMGetES(void);
314#else
315DECLINLINE(RTSEL) ASMGetES(void)
316{
317 RTSEL SelES;
318# if RT_INLINE_ASM_GNU_STYLE
319 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
320# else
321 __asm
322 {
323 mov ax, es
324 mov [SelES], ax
325 }
326# endif
327 return SelES;
328}
329#endif
330
331
332/**
333 * Get the FS register.
334 * @returns FS.
335 */
336#if RT_INLINE_ASM_EXTERNAL
337DECLASM(RTSEL) ASMGetFS(void);
338#else
339DECLINLINE(RTSEL) ASMGetFS(void)
340{
341 RTSEL SelFS;
342# if RT_INLINE_ASM_GNU_STYLE
343 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
344# else
345 __asm
346 {
347 mov ax, fs
348 mov [SelFS], ax
349 }
350# endif
351 return SelFS;
352}
353# endif
354
355
356/**
357 * Get the GS register.
358 * @returns GS.
359 */
360#if RT_INLINE_ASM_EXTERNAL
361DECLASM(RTSEL) ASMGetGS(void);
362#else
363DECLINLINE(RTSEL) ASMGetGS(void)
364{
365 RTSEL SelGS;
366# if RT_INLINE_ASM_GNU_STYLE
367 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
368# else
369 __asm
370 {
371 mov ax, gs
372 mov [SelGS], ax
373 }
374# endif
375 return SelGS;
376}
377#endif
378
379
380/**
381 * Get the SS register.
382 * @returns SS.
383 */
384#if RT_INLINE_ASM_EXTERNAL
385DECLASM(RTSEL) ASMGetSS(void);
386#else
387DECLINLINE(RTSEL) ASMGetSS(void)
388{
389 RTSEL SelSS;
390# if RT_INLINE_ASM_GNU_STYLE
391 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
392# else
393 __asm
394 {
395 mov ax, ss
396 mov [SelSS], ax
397 }
398# endif
399 return SelSS;
400}
401#endif
402
403
404/**
405 * Get the TR register.
406 * @returns TR.
407 */
408#if RT_INLINE_ASM_EXTERNAL
409DECLASM(RTSEL) ASMGetTR(void);
410#else
411DECLINLINE(RTSEL) ASMGetTR(void)
412{
413 RTSEL SelTR;
414# if RT_INLINE_ASM_GNU_STYLE
415 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
416# else
417 __asm
418 {
419 str ax
420 mov [SelTR], ax
421 }
422# endif
423 return SelTR;
424}
425#endif
426
427
428/**
429 * Get the [RE]FLAGS register.
430 * @returns [RE]FLAGS.
431 */
432#if RT_INLINE_ASM_EXTERNAL
433DECLASM(RTCCUINTREG) ASMGetFlags(void);
434#else
435DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
436{
437 RTCCUINTREG uFlags;
438# if RT_INLINE_ASM_GNU_STYLE
439# ifdef RT_ARCH_AMD64
440 __asm__ __volatile__("pushfq\n\t"
441 "popq %0\n\t"
442 : "=g" (uFlags));
443# else
444 __asm__ __volatile__("pushfl\n\t"
445 "popl %0\n\t"
446 : "=g" (uFlags));
447# endif
448# else
449 __asm
450 {
451# ifdef RT_ARCH_AMD64
452 pushfq
453 pop [uFlags]
454# else
455 pushfd
456 pop [uFlags]
457# endif
458 }
459# endif
460 return uFlags;
461}
462#endif
463
464
465/**
466 * Set the [RE]FLAGS register.
467 * @param uFlags The new [RE]FLAGS value.
468 */
469#if RT_INLINE_ASM_EXTERNAL
470DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
471#else
472DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
473{
474# if RT_INLINE_ASM_GNU_STYLE
475# ifdef RT_ARCH_AMD64
476 __asm__ __volatile__("pushq %0\n\t"
477 "popfq\n\t"
478 : : "g" (uFlags));
479# else
480 __asm__ __volatile__("pushl %0\n\t"
481 "popfl\n\t"
482 : : "g" (uFlags));
483# endif
484# else
485 __asm
486 {
487# ifdef RT_ARCH_AMD64
488 push [uFlags]
489 popfq
490# else
491 push [uFlags]
492 popfd
493# endif
494 }
495# endif
496}
497#endif
498
499
500/**
501 * Gets the content of the CPU timestamp counter register.
502 *
503 * @returns TSC.
504 */
505#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
506DECLASM(uint64_t) ASMReadTSC(void);
507#else
508DECLINLINE(uint64_t) ASMReadTSC(void)
509{
510 RTUINT64U u;
511# if RT_INLINE_ASM_GNU_STYLE
512 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
513# else
514# if RT_INLINE_ASM_USES_INTRIN
515 u.u = __rdtsc();
516# else
517 __asm
518 {
519 rdtsc
520 mov [u.s.Lo], eax
521 mov [u.s.Hi], edx
522 }
523# endif
524# endif
525 return u.u;
526}
527#endif
528
529
530/**
531 * Performs the cpuid instruction returning all registers.
532 *
533 * @param uOperator CPUID operation (eax).
534 * @param pvEAX Where to store eax.
535 * @param pvEBX Where to store ebx.
536 * @param pvECX Where to store ecx.
537 * @param pvEDX Where to store edx.
538 * @remark We're using void pointers to ease the use of special bitfield structures and such.
539 */
540#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
541DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
542#else
543DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
544{
545# if RT_INLINE_ASM_GNU_STYLE
546# ifdef RT_ARCH_AMD64
547 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
548 __asm__ ("cpuid\n\t"
549 : "=a" (uRAX),
550 "=b" (uRBX),
551 "=c" (uRCX),
552 "=d" (uRDX)
553 : "0" (uOperator));
554 *(uint32_t *)pvEAX = (uint32_t)uRAX;
555 *(uint32_t *)pvEBX = (uint32_t)uRBX;
556 *(uint32_t *)pvECX = (uint32_t)uRCX;
557 *(uint32_t *)pvEDX = (uint32_t)uRDX;
558# else
559 __asm__ ("xchgl %%ebx, %1\n\t"
560 "cpuid\n\t"
561 "xchgl %%ebx, %1\n\t"
562 : "=a" (*(uint32_t *)pvEAX),
563 "=r" (*(uint32_t *)pvEBX),
564 "=c" (*(uint32_t *)pvECX),
565 "=d" (*(uint32_t *)pvEDX)
566 : "0" (uOperator));
567# endif
568
569# elif RT_INLINE_ASM_USES_INTRIN
570 int aInfo[4];
571 __cpuid(aInfo, uOperator);
572 *(uint32_t *)pvEAX = aInfo[0];
573 *(uint32_t *)pvEBX = aInfo[1];
574 *(uint32_t *)pvECX = aInfo[2];
575 *(uint32_t *)pvEDX = aInfo[3];
576
577# else
578 uint32_t uEAX;
579 uint32_t uEBX;
580 uint32_t uECX;
581 uint32_t uEDX;
582 __asm
583 {
584 push ebx
585 mov eax, [uOperator]
586 cpuid
587 mov [uEAX], eax
588 mov [uEBX], ebx
589 mov [uECX], ecx
590 mov [uEDX], edx
591 pop ebx
592 }
593 *(uint32_t *)pvEAX = uEAX;
594 *(uint32_t *)pvEBX = uEBX;
595 *(uint32_t *)pvECX = uECX;
596 *(uint32_t *)pvEDX = uEDX;
597# endif
598}
599#endif
600
601
602/**
603 * Performs the cpuid instruction returning all registers.
604 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
605 *
606 * @param uOperator CPUID operation (eax).
607 * @param uIdxECX ecx index
608 * @param pvEAX Where to store eax.
609 * @param pvEBX Where to store ebx.
610 * @param pvECX Where to store ecx.
611 * @param pvEDX Where to store edx.
612 * @remark We're using void pointers to ease the use of special bitfield structures and such.
613 */
614#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
615DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
616#else
617DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
618{
619# if RT_INLINE_ASM_GNU_STYLE
620# ifdef RT_ARCH_AMD64
621 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
622 __asm__ ("cpuid\n\t"
623 : "=a" (uRAX),
624 "=b" (uRBX),
625 "=c" (uRCX),
626 "=d" (uRDX)
627 : "0" (uOperator),
628 "2" (uIdxECX));
629 *(uint32_t *)pvEAX = (uint32_t)uRAX;
630 *(uint32_t *)pvEBX = (uint32_t)uRBX;
631 *(uint32_t *)pvECX = (uint32_t)uRCX;
632 *(uint32_t *)pvEDX = (uint32_t)uRDX;
633# else
634 __asm__ ("xchgl %%ebx, %1\n\t"
635 "cpuid\n\t"
636 "xchgl %%ebx, %1\n\t"
637 : "=a" (*(uint32_t *)pvEAX),
638 "=r" (*(uint32_t *)pvEBX),
639 "=c" (*(uint32_t *)pvECX),
640 "=d" (*(uint32_t *)pvEDX)
641 : "0" (uOperator),
642 "2" (uIdxECX));
643# endif
644
645# elif RT_INLINE_ASM_USES_INTRIN
646 int aInfo[4];
647 /* ??? another intrinsic ??? */
648 __cpuid(aInfo, uOperator);
649 *(uint32_t *)pvEAX = aInfo[0];
650 *(uint32_t *)pvEBX = aInfo[1];
651 *(uint32_t *)pvECX = aInfo[2];
652 *(uint32_t *)pvEDX = aInfo[3];
653
654# else
655 uint32_t uEAX;
656 uint32_t uEBX;
657 uint32_t uECX;
658 uint32_t uEDX;
659 __asm
660 {
661 push ebx
662 mov eax, [uOperator]
663 mov ecx, [uIdxECX]
664 cpuid
665 mov [uEAX], eax
666 mov [uEBX], ebx
667 mov [uECX], ecx
668 mov [uEDX], edx
669 pop ebx
670 }
671 *(uint32_t *)pvEAX = uEAX;
672 *(uint32_t *)pvEBX = uEBX;
673 *(uint32_t *)pvECX = uECX;
674 *(uint32_t *)pvEDX = uEDX;
675# endif
676}
677#endif
678
679
680/**
681 * Performs the cpuid instruction returning ecx and edx.
682 *
683 * @param uOperator CPUID operation (eax).
684 * @param pvECX Where to store ecx.
685 * @param pvEDX Where to store edx.
686 * @remark We're using void pointers to ease the use of special bitfield structures and such.
687 */
688#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
689DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
690#else
691DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
692{
693 uint32_t uEBX;
694 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
695}
696#endif
697
698
699/**
700 * Performs the cpuid instruction returning edx.
701 *
702 * @param uOperator CPUID operation (eax).
703 * @returns EDX after cpuid operation.
704 */
705#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
706DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
707#else
708DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
709{
710 RTCCUINTREG xDX;
711# if RT_INLINE_ASM_GNU_STYLE
712# ifdef RT_ARCH_AMD64
713 RTCCUINTREG uSpill;
714 __asm__ ("cpuid"
715 : "=a" (uSpill),
716 "=d" (xDX)
717 : "0" (uOperator)
718 : "rbx", "rcx");
719# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: PIC by default. */
720 __asm__ ("push %%ebx\n\t"
721 "cpuid\n\t"
722 "pop %%ebx\n\t"
723 : "=a" (uOperator),
724 "=d" (xDX)
725 : "0" (uOperator)
726 : "ecx");
727# else
728 __asm__ ("cpuid"
729 : "=a" (uOperator),
730 "=d" (xDX)
731 : "0" (uOperator)
732 : "ebx", "ecx");
733# endif
734
735# elif RT_INLINE_ASM_USES_INTRIN
736 int aInfo[4];
737 __cpuid(aInfo, uOperator);
738 xDX = aInfo[3];
739
740# else
741 __asm
742 {
743 push ebx
744 mov eax, [uOperator]
745 cpuid
746 mov [xDX], edx
747 pop ebx
748 }
749# endif
750 return (uint32_t)xDX;
751}
752#endif
753
754
755/**
756 * Performs the cpuid instruction returning ecx.
757 *
758 * @param uOperator CPUID operation (eax).
759 * @returns ECX after cpuid operation.
760 */
761#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
762DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
763#else
764DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
765{
766 RTCCUINTREG xCX;
767# if RT_INLINE_ASM_GNU_STYLE
768# ifdef RT_ARCH_AMD64
769 RTCCUINTREG uSpill;
770 __asm__ ("cpuid"
771 : "=a" (uSpill),
772 "=c" (xCX)
773 : "0" (uOperator)
774 : "rbx", "rdx");
775# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
776 __asm__ ("push %%ebx\n\t"
777 "cpuid\n\t"
778 "pop %%ebx\n\t"
779 : "=a" (uOperator),
780 "=c" (xCX)
781 : "0" (uOperator)
782 : "edx");
783# else
784 __asm__ ("cpuid"
785 : "=a" (uOperator),
786 "=c" (xCX)
787 : "0" (uOperator)
788 : "ebx", "edx");
789
790# endif
791
792# elif RT_INLINE_ASM_USES_INTRIN
793 int aInfo[4];
794 __cpuid(aInfo, uOperator);
795 xCX = aInfo[2];
796
797# else
798 __asm
799 {
800 push ebx
801 mov eax, [uOperator]
802 cpuid
803 mov [xCX], ecx
804 pop ebx
805 }
806# endif
807 return (uint32_t)xCX;
808}
809#endif
810
811
812/**
813 * Checks if the current CPU supports CPUID.
814 *
815 * @returns true if CPUID is supported.
816 */
817DECLINLINE(bool) ASMHasCpuId(void)
818{
819#ifdef RT_ARCH_AMD64
820 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
821#else /* !RT_ARCH_AMD64 */
822 bool fRet = false;
823# if RT_INLINE_ASM_GNU_STYLE
824 uint32_t u1;
825 uint32_t u2;
826 __asm__ ("pushf\n\t"
827 "pop %1\n\t"
828 "mov %1, %2\n\t"
829 "xorl $0x200000, %1\n\t"
830 "push %1\n\t"
831 "popf\n\t"
832 "pushf\n\t"
833 "pop %1\n\t"
834 "cmpl %1, %2\n\t"
835 "setne %0\n\t"
836 "push %2\n\t"
837 "popf\n\t"
838 : "=m" (fRet), "=r" (u1), "=r" (u2));
839# else
840 __asm
841 {
842 pushfd
843 pop eax
844 mov ebx, eax
845 xor eax, 0200000h
846 push eax
847 popfd
848 pushfd
849 pop eax
850 cmp eax, ebx
851 setne fRet
852 push ebx
853 popfd
854 }
855# endif
856 return fRet;
857#endif /* !RT_ARCH_AMD64 */
858}
859
860
861/**
862 * Gets the APIC ID of the current CPU.
863 *
864 * @returns the APIC ID.
865 */
866#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
867DECLASM(uint8_t) ASMGetApicId(void);
868#else
869DECLINLINE(uint8_t) ASMGetApicId(void)
870{
871 RTCCUINTREG xBX;
872# if RT_INLINE_ASM_GNU_STYLE
873# ifdef RT_ARCH_AMD64
874 RTCCUINTREG uSpill;
875 __asm__ ("cpuid"
876 : "=a" (uSpill),
877 "=b" (xBX)
878 : "0" (1)
879 : "rcx", "rdx");
880# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__)
881 RTCCUINTREG uSpill;
882 __asm__ ("mov %%ebx,%1\n\t"
883 "cpuid\n\t"
884 "xchgl %%ebx,%1\n\t"
885 : "=a" (uSpill),
886 "=r" (xBX)
887 : "0" (1)
888 : "ecx", "edx");
889# else
890 RTCCUINTREG uSpill;
891 __asm__ ("cpuid"
892 : "=a" (uSpill),
893 "=b" (xBX)
894 : "0" (1)
895 : "ecx", "edx");
896# endif
897
898# elif RT_INLINE_ASM_USES_INTRIN
899 int aInfo[4];
900 __cpuid(aInfo, 1);
901 xBX = aInfo[1];
902
903# else
904 __asm
905 {
906 push ebx
907 mov eax, 1
908 cpuid
909 mov [xBX], ebx
910 pop ebx
911 }
912# endif
913 return (uint8_t)(xBX >> 24);
914}
915#endif
916
917/**
918 * Get cr0.
919 * @returns cr0.
920 */
921#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
922DECLASM(RTCCUINTREG) ASMGetCR0(void);
923#else
924DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
925{
926 RTCCUINTREG uCR0;
927# if RT_INLINE_ASM_USES_INTRIN
928 uCR0 = __readcr0();
929
930# elif RT_INLINE_ASM_GNU_STYLE
931# ifdef RT_ARCH_AMD64
932 __asm__ ("movq %%cr0, %0\t\n" : "=r" (uCR0));
933# else
934 __asm__ ("movl %%cr0, %0\t\n" : "=r" (uCR0));
935# endif
936# else
937 __asm
938 {
939# ifdef RT_ARCH_AMD64
940 mov rax, cr0
941 mov [uCR0], rax
942# else
943 mov eax, cr0
944 mov [uCR0], eax
945# endif
946 }
947# endif
948 return uCR0;
949}
950#endif
951
952
953/**
954 * Sets the CR0 register.
955 * @param uCR0 The new CR0 value.
956 */
957#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
958DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
959#else
960DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
961{
962# if RT_INLINE_ASM_USES_INTRIN
963 __writecr0(uCR0);
964
965# elif RT_INLINE_ASM_GNU_STYLE
966# ifdef RT_ARCH_AMD64
967 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
968# else
969 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
970# endif
971# else
972 __asm
973 {
974# ifdef RT_ARCH_AMD64
975 mov rax, [uCR0]
976 mov cr0, rax
977# else
978 mov eax, [uCR0]
979 mov cr0, eax
980# endif
981 }
982# endif
983}
984#endif
985
986
987/**
988 * Get cr2.
989 * @returns cr2.
990 */
991#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
992DECLASM(RTCCUINTREG) ASMGetCR2(void);
993#else
994DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
995{
996 RTCCUINTREG uCR2;
997# if RT_INLINE_ASM_USES_INTRIN
998 uCR2 = __readcr2();
999
1000# elif RT_INLINE_ASM_GNU_STYLE
1001# ifdef RT_ARCH_AMD64
1002 __asm__ ("movq %%cr2, %0\t\n" : "=r" (uCR2));
1003# else
1004 __asm__ ("movl %%cr2, %0\t\n" : "=r" (uCR2));
1005# endif
1006# else
1007 __asm
1008 {
1009# ifdef RT_ARCH_AMD64
1010 mov rax, cr2
1011 mov [uCR2], rax
1012# else
1013 mov eax, cr2
1014 mov [uCR2], eax
1015# endif
1016 }
1017# endif
1018 return uCR2;
1019}
1020#endif
1021
1022
1023/**
1024 * Sets the CR2 register.
1025 * @param uCR2 The new CR0 value.
1026 */
1027#if RT_INLINE_ASM_EXTERNAL
1028DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1029#else
1030DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1031{
1032# if RT_INLINE_ASM_GNU_STYLE
1033# ifdef RT_ARCH_AMD64
1034 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1035# else
1036 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1037# endif
1038# else
1039 __asm
1040 {
1041# ifdef RT_ARCH_AMD64
1042 mov rax, [uCR2]
1043 mov cr2, rax
1044# else
1045 mov eax, [uCR2]
1046 mov cr2, eax
1047# endif
1048 }
1049# endif
1050}
1051#endif
1052
1053
1054/**
1055 * Get cr3.
1056 * @returns cr3.
1057 */
1058#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1059DECLASM(RTCCUINTREG) ASMGetCR3(void);
1060#else
1061DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1062{
1063 RTCCUINTREG uCR3;
1064# if RT_INLINE_ASM_USES_INTRIN
1065 uCR3 = __readcr3();
1066
1067# elif RT_INLINE_ASM_GNU_STYLE
1068# ifdef RT_ARCH_AMD64
1069 __asm__ ("movq %%cr3, %0\t\n" : "=r" (uCR3));
1070# else
1071 __asm__ ("movl %%cr3, %0\t\n" : "=r" (uCR3));
1072# endif
1073# else
1074 __asm
1075 {
1076# ifdef RT_ARCH_AMD64
1077 mov rax, cr3
1078 mov [uCR3], rax
1079# else
1080 mov eax, cr3
1081 mov [uCR3], eax
1082# endif
1083 }
1084# endif
1085 return uCR3;
1086}
1087#endif
1088
1089
1090/**
1091 * Sets the CR3 register.
1092 *
1093 * @param uCR3 New CR3 value.
1094 */
1095#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1096DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1097#else
1098DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1099{
1100# if RT_INLINE_ASM_USES_INTRIN
1101 __writecr3(uCR3);
1102
1103# elif RT_INLINE_ASM_GNU_STYLE
1104# ifdef RT_ARCH_AMD64
1105 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1106# else
1107 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1108# endif
1109# else
1110 __asm
1111 {
1112# ifdef RT_ARCH_AMD64
1113 mov rax, [uCR3]
1114 mov cr3, rax
1115# else
1116 mov eax, [uCR3]
1117 mov cr3, eax
1118# endif
1119 }
1120# endif
1121}
1122#endif
1123
1124
1125/**
1126 * Reloads the CR3 register.
1127 */
1128#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1129DECLASM(void) ASMReloadCR3(void);
1130#else
1131DECLINLINE(void) ASMReloadCR3(void)
1132{
1133# if RT_INLINE_ASM_USES_INTRIN
1134 __writecr3(__readcr3());
1135
1136# elif RT_INLINE_ASM_GNU_STYLE
1137 RTCCUINTREG u;
1138# ifdef RT_ARCH_AMD64
1139 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1140 "movq %0, %%cr3\n\t"
1141 : "=r" (u));
1142# else
1143 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1144 "movl %0, %%cr3\n\t"
1145 : "=r" (u));
1146# endif
1147# else
1148 __asm
1149 {
1150# ifdef RT_ARCH_AMD64
1151 mov rax, cr3
1152 mov cr3, rax
1153# else
1154 mov eax, cr3
1155 mov cr3, eax
1156# endif
1157 }
1158# endif
1159}
1160#endif
1161
1162
1163/**
1164 * Get cr4.
1165 * @returns cr4.
1166 */
1167#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1168DECLASM(RTCCUINTREG) ASMGetCR4(void);
1169#else
1170DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1171{
1172 RTCCUINTREG uCR4;
1173# if RT_INLINE_ASM_USES_INTRIN
1174 uCR4 = __readcr4();
1175
1176# elif RT_INLINE_ASM_GNU_STYLE
1177# ifdef RT_ARCH_AMD64
1178 __asm__ ("movq %%cr4, %0\t\n" : "=r" (uCR4));
1179# else
1180 __asm__ ("movl %%cr4, %0\t\n" : "=r" (uCR4));
1181# endif
1182# else
1183 __asm
1184 {
1185# ifdef RT_ARCH_AMD64
1186 mov rax, cr4
1187 mov [uCR4], rax
1188# else
1189 push eax /* just in case */
1190 /*mov eax, cr4*/
1191 _emit 0x0f
1192 _emit 0x20
1193 _emit 0xe0
1194 mov [uCR4], eax
1195 pop eax
1196# endif
1197 }
1198# endif
1199 return uCR4;
1200}
1201#endif
1202
1203
1204/**
1205 * Sets the CR4 register.
1206 *
1207 * @param uCR4 New CR4 value.
1208 */
1209#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1210DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1211#else
1212DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1213{
1214# if RT_INLINE_ASM_USES_INTRIN
1215 __writecr4(uCR4);
1216
1217# elif RT_INLINE_ASM_GNU_STYLE
1218# ifdef RT_ARCH_AMD64
1219 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1220# else
1221 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1222# endif
1223# else
1224 __asm
1225 {
1226# ifdef RT_ARCH_AMD64
1227 mov rax, [uCR4]
1228 mov cr4, rax
1229# else
1230 mov eax, [uCR4]
1231 _emit 0x0F
1232 _emit 0x22
1233 _emit 0xE0 /* mov cr4, eax */
1234# endif
1235 }
1236# endif
1237}
1238#endif
1239
1240
1241/**
1242 * Get cr8.
1243 * @returns cr8.
1244 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1245 */
1246#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1247DECLASM(RTCCUINTREG) ASMGetCR8(void);
1248#else
1249DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1250{
1251# ifdef RT_ARCH_AMD64
1252 RTCCUINTREG uCR8;
1253# if RT_INLINE_ASM_USES_INTRIN
1254 uCR8 = __readcr8();
1255
1256# elif RT_INLINE_ASM_GNU_STYLE
1257 __asm__ ("movq %%cr8, %0\t\n" : "=r" (uCR8));
1258# else
1259 __asm
1260 {
1261 mov rax, cr8
1262 mov [uCR8], rax
1263 }
1264# endif
1265 return uCR8;
1266# else /* !RT_ARCH_AMD64 */
1267 return 0;
1268# endif /* !RT_ARCH_AMD64 */
1269}
1270#endif
1271
1272
1273/**
1274 * Enables interrupts (EFLAGS.IF).
1275 */
1276#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1277DECLASM(void) ASMIntEnable(void);
1278#else
1279DECLINLINE(void) ASMIntEnable(void)
1280{
1281# if RT_INLINE_ASM_GNU_STYLE
1282 __asm("sti\n");
1283# elif RT_INLINE_ASM_USES_INTRIN
1284 _enable();
1285# else
1286 __asm sti
1287# endif
1288}
1289#endif
1290
1291
1292/**
1293 * Disables interrupts (!EFLAGS.IF).
1294 */
1295#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1296DECLASM(void) ASMIntDisable(void);
1297#else
1298DECLINLINE(void) ASMIntDisable(void)
1299{
1300# if RT_INLINE_ASM_GNU_STYLE
1301 __asm("cli\n");
1302# elif RT_INLINE_ASM_USES_INTRIN
1303 _disable();
1304# else
1305 __asm cli
1306# endif
1307}
1308#endif
1309
1310
1311/**
1312 * Disables interrupts and returns previous xFLAGS.
1313 */
1314#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1315DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1316#else
1317DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1318{
1319 RTCCUINTREG xFlags;
1320# if RT_INLINE_ASM_GNU_STYLE
1321# ifdef RT_ARCH_AMD64
1322 __asm__ __volatile__("pushfq\n\t"
1323 "cli\n\t"
1324 "popq %0\n\t"
1325 : "=rm" (xFlags));
1326# else
1327 __asm__ __volatile__("pushfl\n\t"
1328 "cli\n\t"
1329 "popl %0\n\t"
1330 : "=rm" (xFlags));
1331# endif
1332# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1333 xFlags = ASMGetFlags();
1334 _disable();
1335# else
1336 __asm {
1337 pushfd
1338 cli
1339 pop [xFlags]
1340 }
1341# endif
1342 return xFlags;
1343}
1344#endif
1345
1346
1347/**
1348 * Reads a machine specific register.
1349 *
1350 * @returns Register content.
1351 * @param uRegister Register to read.
1352 */
1353#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1354DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1355#else
1356DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1357{
1358 RTUINT64U u;
1359# if RT_INLINE_ASM_GNU_STYLE
1360 __asm__ ("rdmsr\n\t"
1361 : "=a" (u.s.Lo),
1362 "=d" (u.s.Hi)
1363 : "c" (uRegister));
1364
1365# elif RT_INLINE_ASM_USES_INTRIN
1366 u.u = __readmsr(uRegister);
1367
1368# else
1369 __asm
1370 {
1371 mov ecx, [uRegister]
1372 rdmsr
1373 mov [u.s.Lo], eax
1374 mov [u.s.Hi], edx
1375 }
1376# endif
1377
1378 return u.u;
1379}
1380#endif
1381
1382
1383/**
1384 * Writes a machine specific register.
1385 *
1386 * @returns Register content.
1387 * @param uRegister Register to write to.
1388 * @param u64Val Value to write.
1389 */
1390#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1391DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1392#else
1393DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1394{
1395 RTUINT64U u;
1396
1397 u.u = u64Val;
1398# if RT_INLINE_ASM_GNU_STYLE
1399 __asm__ __volatile__("wrmsr\n\t"
1400 ::"a" (u.s.Lo),
1401 "d" (u.s.Hi),
1402 "c" (uRegister));
1403
1404# elif RT_INLINE_ASM_USES_INTRIN
1405 __writemsr(uRegister, u.u);
1406
1407# else
1408 __asm
1409 {
1410 mov ecx, [uRegister]
1411 mov edx, [u.s.Hi]
1412 mov eax, [u.s.Lo]
1413 wrmsr
1414 }
1415# endif
1416}
1417#endif
1418
1419
1420/**
1421 * Reads low part of a machine specific register.
1422 *
1423 * @returns Register content.
1424 * @param uRegister Register to read.
1425 */
1426#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1427DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1428#else
1429DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1430{
1431 uint32_t u32;
1432# if RT_INLINE_ASM_GNU_STYLE
1433 __asm__ ("rdmsr\n\t"
1434 : "=a" (u32)
1435 : "c" (uRegister)
1436 : "edx");
1437
1438# elif RT_INLINE_ASM_USES_INTRIN
1439 u32 = (uint32_t)__readmsr(uRegister);
1440
1441#else
1442 __asm
1443 {
1444 mov ecx, [uRegister]
1445 rdmsr
1446 mov [u32], eax
1447 }
1448# endif
1449
1450 return u32;
1451}
1452#endif
1453
1454
1455/**
1456 * Reads high part of a machine specific register.
1457 *
1458 * @returns Register content.
1459 * @param uRegister Register to read.
1460 */
1461#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1462DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1463#else
1464DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1465{
1466 uint32_t u32;
1467# if RT_INLINE_ASM_GNU_STYLE
1468 __asm__ ("rdmsr\n\t"
1469 : "=d" (u32)
1470 : "c" (uRegister)
1471 : "eax");
1472
1473# elif RT_INLINE_ASM_USES_INTRIN
1474 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1475
1476# else
1477 __asm
1478 {
1479 mov ecx, [uRegister]
1480 rdmsr
1481 mov [u32], edx
1482 }
1483# endif
1484
1485 return u32;
1486}
1487#endif
1488
1489
1490/**
1491 * Gets dr7.
1492 *
1493 * @returns dr7.
1494 */
1495#if RT_INLINE_ASM_EXTERNAL
1496DECLASM(RTCCUINTREG) ASMGetDR7(void);
1497#else
1498DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1499{
1500 RTCCUINTREG uDR7;
1501# if RT_INLINE_ASM_GNU_STYLE
1502# ifdef RT_ARCH_AMD64
1503 __asm__ ("movq %%dr7, %0\n\t" : "=r" (uDR7));
1504# else
1505 __asm__ ("movl %%dr7, %0\n\t" : "=r" (uDR7));
1506# endif
1507# else
1508 __asm
1509 {
1510# ifdef RT_ARCH_AMD64
1511 mov rax, dr7
1512 mov [uDR7], rax
1513# else
1514 mov eax, dr7
1515 mov [uDR7], eax
1516# endif
1517 }
1518# endif
1519 return uDR7;
1520}
1521#endif
1522
1523
1524/**
1525 * Gets dr6.
1526 *
1527 * @returns dr6.
1528 */
1529#if RT_INLINE_ASM_EXTERNAL
1530DECLASM(RTCCUINTREG) ASMGetDR6(void);
1531#else
1532DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1533{
1534 RTCCUINTREG uDR6;
1535# if RT_INLINE_ASM_GNU_STYLE
1536# ifdef RT_ARCH_AMD64
1537 __asm__ ("movq %%dr6, %0\n\t" : "=r" (uDR6));
1538# else
1539 __asm__ ("movl %%dr6, %0\n\t" : "=r" (uDR6));
1540# endif
1541# else
1542 __asm
1543 {
1544# ifdef RT_ARCH_AMD64
1545 mov rax, dr6
1546 mov [uDR6], rax
1547# else
1548 mov eax, dr6
1549 mov [uDR6], eax
1550# endif
1551 }
1552# endif
1553 return uDR6;
1554}
1555#endif
1556
1557
1558/**
1559 * Reads and clears DR6.
1560 *
1561 * @returns DR6.
1562 */
1563#if RT_INLINE_ASM_EXTERNAL
1564DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1565#else
1566DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1567{
1568 RTCCUINTREG uDR6;
1569# if RT_INLINE_ASM_GNU_STYLE
1570 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1571# ifdef RT_ARCH_AMD64
1572 __asm__ ("movq %%dr6, %0\n\t"
1573 "movq %1, %%dr6\n\t"
1574 : "=r" (uDR6)
1575 : "r" (uNewValue));
1576# else
1577 __asm__ ("movl %%dr6, %0\n\t"
1578 "movl %1, %%dr6\n\t"
1579 : "=r" (uDR6)
1580 : "r" (uNewValue));
1581# endif
1582# else
1583 __asm
1584 {
1585# ifdef RT_ARCH_AMD64
1586 mov rax, dr6
1587 mov [uDR6], rax
1588 mov rcx, rax
1589 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1590 mov dr6, rcx
1591# else
1592 mov eax, dr6
1593 mov [uDR6], eax
1594 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1595 mov dr6, ecx
1596# endif
1597 }
1598# endif
1599 return uDR6;
1600}
1601#endif
1602
1603
1604/**
1605 * Compiler memory barrier.
1606 *
1607 * Ensure that the compiler does not use any cached (register/tmp stack) memory
1608 * values or any outstanding writes when returning from this function.
1609 *
1610 * This function must be used if non-volatile data is modified by a
1611 * device or the VMM. Typical cases are port access, MMIO access,
1612 * trapping instruction, etc.
1613 */
1614#if RT_INLINE_ASM_GNU_STYLE
1615# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
1616#elif RT_INLINE_ASM_USES_INTRIN
1617# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
1618#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
1619DECLINLINE(void) ASMCompilerBarrier(void)
1620{
1621 __asm
1622 {
1623 }
1624}
1625#endif
1626
1627
1628/**
1629 * Writes a 8-bit unsigned integer to an I/O port, ordered.
1630 *
1631 * @param Port I/O port to read from.
1632 * @param u8 8-bit integer to write.
1633 */
1634#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1635DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1636#else
1637DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1638{
1639# if RT_INLINE_ASM_GNU_STYLE
1640 __asm__ __volatile__("outb %b1, %w0\n\t"
1641 :: "Nd" (Port),
1642 "a" (u8));
1643
1644# elif RT_INLINE_ASM_USES_INTRIN
1645 __outbyte(Port, u8);
1646
1647# else
1648 __asm
1649 {
1650 mov dx, [Port]
1651 mov al, [u8]
1652 out dx, al
1653 }
1654# endif
1655}
1656#endif
1657
1658
1659/**
1660 * Gets a 8-bit unsigned integer from an I/O port, ordered.
1661 *
1662 * @returns 8-bit integer.
1663 * @param Port I/O port to read from.
1664 */
1665#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1666DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1667#else
1668DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1669{
1670 uint8_t u8;
1671# if RT_INLINE_ASM_GNU_STYLE
1672 __asm__ __volatile__("inb %w1, %b0\n\t"
1673 : "=a" (u8)
1674 : "Nd" (Port));
1675
1676# elif RT_INLINE_ASM_USES_INTRIN
1677 u8 = __inbyte(Port);
1678
1679# else
1680 __asm
1681 {
1682 mov dx, [Port]
1683 in al, dx
1684 mov [u8], al
1685 }
1686# endif
1687 return u8;
1688}
1689#endif
1690
1691
1692/**
1693 * Writes a 16-bit unsigned integer to an I/O port, ordered.
1694 *
1695 * @param Port I/O port to read from.
1696 * @param u16 16-bit integer to write.
1697 */
1698#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1699DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1700#else
1701DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1702{
1703# if RT_INLINE_ASM_GNU_STYLE
1704 __asm__ __volatile__("outw %w1, %w0\n\t"
1705 :: "Nd" (Port),
1706 "a" (u16));
1707
1708# elif RT_INLINE_ASM_USES_INTRIN
1709 __outword(Port, u16);
1710
1711# else
1712 __asm
1713 {
1714 mov dx, [Port]
1715 mov ax, [u16]
1716 out dx, ax
1717 }
1718# endif
1719}
1720#endif
1721
1722
1723/**
1724 * Gets a 16-bit unsigned integer from an I/O port, ordered.
1725 *
1726 * @returns 16-bit integer.
1727 * @param Port I/O port to read from.
1728 */
1729#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1730DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1731#else
1732DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1733{
1734 uint16_t u16;
1735# if RT_INLINE_ASM_GNU_STYLE
1736 __asm__ __volatile__("inw %w1, %w0\n\t"
1737 : "=a" (u16)
1738 : "Nd" (Port));
1739
1740# elif RT_INLINE_ASM_USES_INTRIN
1741 u16 = __inword(Port);
1742
1743# else
1744 __asm
1745 {
1746 mov dx, [Port]
1747 in ax, dx
1748 mov [u16], ax
1749 }
1750# endif
1751 return u16;
1752}
1753#endif
1754
1755
1756/**
1757 * Writes a 32-bit unsigned integer to an I/O port, ordered.
1758 *
1759 * @param Port I/O port to read from.
1760 * @param u32 32-bit integer to write.
1761 */
1762#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1763DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1764#else
1765DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1766{
1767# if RT_INLINE_ASM_GNU_STYLE
1768 __asm__ __volatile__("outl %1, %w0\n\t"
1769 :: "Nd" (Port),
1770 "a" (u32));
1771
1772# elif RT_INLINE_ASM_USES_INTRIN
1773 __outdword(Port, u32);
1774
1775# else
1776 __asm
1777 {
1778 mov dx, [Port]
1779 mov eax, [u32]
1780 out dx, eax
1781 }
1782# endif
1783}
1784#endif
1785
1786
1787/**
1788 * Gets a 32-bit unsigned integer from an I/O port, ordered.
1789 *
1790 * @returns 32-bit integer.
1791 * @param Port I/O port to read from.
1792 */
1793#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1794DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1795#else
1796DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1797{
1798 uint32_t u32;
1799# if RT_INLINE_ASM_GNU_STYLE
1800 __asm__ __volatile__("inl %w1, %0\n\t"
1801 : "=a" (u32)
1802 : "Nd" (Port));
1803
1804# elif RT_INLINE_ASM_USES_INTRIN
1805 u32 = __indword(Port);
1806
1807# else
1808 __asm
1809 {
1810 mov dx, [Port]
1811 in eax, dx
1812 mov [u32], eax
1813 }
1814# endif
1815 return u32;
1816}
1817#endif
1818
1819/** @todo string i/o */
1820
1821
1822/**
1823 * Atomically Exchange an unsigned 8-bit value, ordered.
1824 *
1825 * @returns Current *pu8 value
1826 * @param pu8 Pointer to the 8-bit variable to update.
1827 * @param u8 The 8-bit value to assign to *pu8.
1828 */
1829#if RT_INLINE_ASM_EXTERNAL
1830DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1831#else
1832DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1833{
1834# if RT_INLINE_ASM_GNU_STYLE
1835 __asm__ __volatile__("xchgb %0, %1\n\t"
1836 : "=m" (*pu8),
1837 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
1838 : "1" (u8));
1839# else
1840 __asm
1841 {
1842# ifdef RT_ARCH_AMD64
1843 mov rdx, [pu8]
1844 mov al, [u8]
1845 xchg [rdx], al
1846 mov [u8], al
1847# else
1848 mov edx, [pu8]
1849 mov al, [u8]
1850 xchg [edx], al
1851 mov [u8], al
1852# endif
1853 }
1854# endif
1855 return u8;
1856}
1857#endif
1858
1859
1860/**
1861 * Atomically Exchange a signed 8-bit value, ordered.
1862 *
1863 * @returns Current *pu8 value
1864 * @param pi8 Pointer to the 8-bit variable to update.
1865 * @param i8 The 8-bit value to assign to *pi8.
1866 */
1867DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1868{
1869 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1870}
1871
1872
1873/**
1874 * Atomically Exchange a bool value, ordered.
1875 *
1876 * @returns Current *pf value
1877 * @param pf Pointer to the 8-bit variable to update.
1878 * @param f The 8-bit value to assign to *pi8.
1879 */
1880DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
1881{
1882#ifdef _MSC_VER
1883 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1884#else
1885 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1886#endif
1887}
1888
1889
1890/**
1891 * Atomically Exchange an unsigned 16-bit value, ordered.
1892 *
1893 * @returns Current *pu16 value
1894 * @param pu16 Pointer to the 16-bit variable to update.
1895 * @param u16 The 16-bit value to assign to *pu16.
1896 */
1897#if RT_INLINE_ASM_EXTERNAL
1898DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
1899#else
1900DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
1901{
1902# if RT_INLINE_ASM_GNU_STYLE
1903 __asm__ __volatile__("xchgw %0, %1\n\t"
1904 : "=m" (*pu16),
1905 "=r" (u16)
1906 : "1" (u16));
1907# else
1908 __asm
1909 {
1910# ifdef RT_ARCH_AMD64
1911 mov rdx, [pu16]
1912 mov ax, [u16]
1913 xchg [rdx], ax
1914 mov [u16], ax
1915# else
1916 mov edx, [pu16]
1917 mov ax, [u16]
1918 xchg [edx], ax
1919 mov [u16], ax
1920# endif
1921 }
1922# endif
1923 return u16;
1924}
1925#endif
1926
1927
1928/**
1929 * Atomically Exchange a signed 16-bit value, ordered.
1930 *
1931 * @returns Current *pu16 value
1932 * @param pi16 Pointer to the 16-bit variable to update.
1933 * @param i16 The 16-bit value to assign to *pi16.
1934 */
1935DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
1936{
1937 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
1938}
1939
1940
1941/**
1942 * Atomically Exchange an unsigned 32-bit value, ordered.
1943 *
1944 * @returns Current *pu32 value
1945 * @param pu32 Pointer to the 32-bit variable to update.
1946 * @param u32 The 32-bit value to assign to *pu32.
1947 */
1948#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1949DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
1950#else
1951DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
1952{
1953# if RT_INLINE_ASM_GNU_STYLE
1954 __asm__ __volatile__("xchgl %0, %1\n\t"
1955 : "=m" (*pu32),
1956 "=r" (u32)
1957 : "1" (u32));
1958
1959# elif RT_INLINE_ASM_USES_INTRIN
1960 u32 = _InterlockedExchange((long *)pu32, u32);
1961
1962# else
1963 __asm
1964 {
1965# ifdef RT_ARCH_AMD64
1966 mov rdx, [pu32]
1967 mov eax, u32
1968 xchg [rdx], eax
1969 mov [u32], eax
1970# else
1971 mov edx, [pu32]
1972 mov eax, u32
1973 xchg [edx], eax
1974 mov [u32], eax
1975# endif
1976 }
1977# endif
1978 return u32;
1979}
1980#endif
1981
1982
1983/**
1984 * Atomically Exchange a signed 32-bit value, ordered.
1985 *
1986 * @returns Current *pu32 value
1987 * @param pi32 Pointer to the 32-bit variable to update.
1988 * @param i32 The 32-bit value to assign to *pi32.
1989 */
1990DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
1991{
1992 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
1993}
1994
1995
1996/**
1997 * Atomically Exchange an unsigned 64-bit value, ordered.
1998 *
1999 * @returns Current *pu64 value
2000 * @param pu64 Pointer to the 64-bit variable to update.
2001 * @param u64 The 64-bit value to assign to *pu64.
2002 */
2003#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2004DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2005#else
2006DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2007{
2008# if defined(RT_ARCH_AMD64)
2009# if RT_INLINE_ASM_USES_INTRIN
2010 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2011
2012# elif RT_INLINE_ASM_GNU_STYLE
2013 __asm__ __volatile__("xchgq %0, %1\n\t"
2014 : "=m" (*pu64),
2015 "=r" (u64)
2016 : "1" (u64));
2017# else
2018 __asm
2019 {
2020 mov rdx, [pu64]
2021 mov rax, [u64]
2022 xchg [rdx], rax
2023 mov [u64], rax
2024 }
2025# endif
2026# else /* !RT_ARCH_AMD64 */
2027# if RT_INLINE_ASM_GNU_STYLE
2028# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2029 uint32_t u32 = (uint32_t)u64;
2030 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2031 "xchgl %%ebx, %3\n\t"
2032 "1:\n\t"
2033 "lock; cmpxchg8b (%5)\n\t"
2034 "jnz 1b\n\t"
2035 "xchgl %%ebx, %3\n\t"
2036 /*"xchgl %%esi, %5\n\t"*/
2037 : "=A" (u64),
2038 "=m" (*pu64)
2039 : "0" (*pu64),
2040 "m" ( u32 ),
2041 "c" ( (uint32_t)(u64 >> 32) ),
2042 "S" (pu64) );
2043# else /* !PIC */
2044 __asm__ __volatile__("1:\n\t"
2045 "lock; cmpxchg8b %1\n\t"
2046 "jnz 1b\n\t"
2047 : "=A" (u64),
2048 "=m" (*pu64)
2049 : "0" (*pu64),
2050 "b" ( (uint32_t)u64 ),
2051 "c" ( (uint32_t)(u64 >> 32) ));
2052# endif
2053# else
2054 __asm
2055 {
2056 mov ebx, dword ptr [u64]
2057 mov ecx, dword ptr [u64 + 4]
2058 mov edi, pu64
2059 mov eax, dword ptr [edi]
2060 mov edx, dword ptr [edi + 4]
2061 retry:
2062 lock cmpxchg8b [edi]
2063 jnz retry
2064 mov dword ptr [u64], eax
2065 mov dword ptr [u64 + 4], edx
2066 }
2067# endif
2068# endif /* !RT_ARCH_AMD64 */
2069 return u64;
2070}
2071#endif
2072
2073
2074/**
2075 * Atomically Exchange an signed 64-bit value, ordered.
2076 *
2077 * @returns Current *pi64 value
2078 * @param pi64 Pointer to the 64-bit variable to update.
2079 * @param i64 The 64-bit value to assign to *pi64.
2080 */
2081DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2082{
2083 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2084}
2085
2086
2087#ifdef RT_ARCH_AMD64
2088/**
2089 * Atomically Exchange an unsigned 128-bit value, ordered.
2090 *
2091 * @returns Current *pu128.
2092 * @param pu128 Pointer to the 128-bit variable to update.
2093 * @param u128 The 128-bit value to assign to *pu128.
2094 *
2095 * @remark We cannot really assume that any hardware supports this. Nor do I have
2096 * GAS support for it. So, for the time being we'll BREAK the atomic
2097 * bit of this function and use two 64-bit exchanges instead.
2098 */
2099# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2100DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2101# else
2102DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2103{
2104 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2105 {
2106 /** @todo this is clumsy code */
2107 RTUINT128U u128Ret;
2108 u128Ret.u = u128;
2109 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2110 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2111 return u128Ret.u;
2112 }
2113#if 0 /* later? */
2114 else
2115 {
2116# if RT_INLINE_ASM_GNU_STYLE
2117 __asm__ __volatile__("1:\n\t"
2118 "lock; cmpxchg8b %1\n\t"
2119 "jnz 1b\n\t"
2120 : "=A" (u128),
2121 "=m" (*pu128)
2122 : "0" (*pu128),
2123 "b" ( (uint64_t)u128 ),
2124 "c" ( (uint64_t)(u128 >> 64) ));
2125# else
2126 __asm
2127 {
2128 mov rbx, dword ptr [u128]
2129 mov rcx, dword ptr [u128 + 8]
2130 mov rdi, pu128
2131 mov rax, dword ptr [rdi]
2132 mov rdx, dword ptr [rdi + 8]
2133 retry:
2134 lock cmpxchg16b [rdi]
2135 jnz retry
2136 mov dword ptr [u128], rax
2137 mov dword ptr [u128 + 8], rdx
2138 }
2139# endif
2140 }
2141 return u128;
2142#endif
2143}
2144# endif
2145#endif /* RT_ARCH_AMD64 */
2146
2147
2148/**
2149 * Atomically Exchange a value which size might differ
2150 * between platforms or compilers, ordered.
2151 *
2152 * @param pu Pointer to the variable to update.
2153 * @param uNew The value to assign to *pu.
2154 */
2155#define ASMAtomicXchgSize(pu, uNew) \
2156 do { \
2157 switch (sizeof(*(pu))) { \
2158 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2159 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2160 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2161 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2162 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2163 } \
2164 } while (0)
2165
2166
2167/**
2168 * Atomically Exchange a pointer value, ordered.
2169 *
2170 * @returns Current *ppv value
2171 * @param ppv Pointer to the pointer variable to update.
2172 * @param pv The pointer value to assign to *ppv.
2173 */
2174DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2175{
2176#if ARCH_BITS == 32
2177 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2178#elif ARCH_BITS == 64
2179 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2180#else
2181# error "ARCH_BITS is bogus"
2182#endif
2183}
2184
2185
2186/**
2187 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
2188 *
2189 * @returns true if xchg was done.
2190 * @returns false if xchg wasn't done.
2191 *
2192 * @param pu32 Pointer to the value to update.
2193 * @param u32New The new value to assigned to *pu32.
2194 * @param u32Old The old value to *pu32 compare with.
2195 */
2196#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2197DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2198#else
2199DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
2200{
2201# if RT_INLINE_ASM_GNU_STYLE
2202 uint8_t u8Ret;
2203 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2204 "setz %1\n\t"
2205 : "=m" (*pu32),
2206 "=qm" (u8Ret),
2207 "=a" (u32Old)
2208 : "r" (u32New),
2209 "2" (u32Old));
2210 return (bool)u8Ret;
2211
2212# elif RT_INLINE_ASM_USES_INTRIN
2213 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2214
2215# else
2216 uint32_t u32Ret;
2217 __asm
2218 {
2219# ifdef RT_ARCH_AMD64
2220 mov rdx, [pu32]
2221# else
2222 mov edx, [pu32]
2223# endif
2224 mov eax, [u32Old]
2225 mov ecx, [u32New]
2226# ifdef RT_ARCH_AMD64
2227 lock cmpxchg [rdx], ecx
2228# else
2229 lock cmpxchg [edx], ecx
2230# endif
2231 setz al
2232 movzx eax, al
2233 mov [u32Ret], eax
2234 }
2235 return !!u32Ret;
2236# endif
2237}
2238#endif
2239
2240
2241/**
2242 * Atomically Compare and Exchange a signed 32-bit value, ordered.
2243 *
2244 * @returns true if xchg was done.
2245 * @returns false if xchg wasn't done.
2246 *
2247 * @param pi32 Pointer to the value to update.
2248 * @param i32New The new value to assigned to *pi32.
2249 * @param i32Old The old value to *pi32 compare with.
2250 */
2251DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2252{
2253 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2254}
2255
2256
2257/**
2258 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
2259 *
2260 * @returns true if xchg was done.
2261 * @returns false if xchg wasn't done.
2262 *
2263 * @param pu64 Pointer to the 64-bit variable to update.
2264 * @param u64New The 64-bit value to assign to *pu64.
2265 * @param u64Old The value to compare with.
2266 */
2267#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2268DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2269#else
2270DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old)
2271{
2272# if RT_INLINE_ASM_USES_INTRIN
2273 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2274
2275# elif defined(RT_ARCH_AMD64)
2276# if RT_INLINE_ASM_GNU_STYLE
2277 uint8_t u8Ret;
2278 __asm__ __volatile__("lock; cmpxchgq %2, %0\n\t"
2279 "setz %1\n\t"
2280 : "=m" (*pu64),
2281 "=qm" (u8Ret)
2282 : "r" (u64New),
2283 "a" (u64Old));
2284 return (bool)u8Ret;
2285# else
2286 bool fRet;
2287 __asm
2288 {
2289 mov rdx, [pu32]
2290 mov rax, [u64Old]
2291 mov rcx, [u64New]
2292 lock cmpxchg [rdx], rcx
2293 setz al
2294 mov [fRet], al
2295 }
2296 return fRet;
2297# endif
2298# else /* !RT_ARCH_AMD64 */
2299 uint32_t u32Ret;
2300# if RT_INLINE_ASM_GNU_STYLE
2301# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2302 uint32_t u32 = (uint32_t)u64New;
2303 uint32_t u32Spill;
2304 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
2305 "lock; cmpxchg8b (%6)\n\t"
2306 "setz %%al\n\t"
2307 "xchgl %%ebx, %4\n\t"
2308 "movzbl %%al, %%eax\n\t"
2309 : "=a" (u32Ret),
2310 "=d" (u32Spill),
2311 "=m" (*pu64)
2312 : "A" (u64Old),
2313 "m" ( u32 ),
2314 "c" ( (uint32_t)(u64New >> 32) ),
2315 "S" (pu64) );
2316# else /* !PIC */
2317 uint32_t u32Spill;
2318 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
2319 "setz %%al\n\t"
2320 "movzbl %%al, %%eax\n\t"
2321 : "=a" (u32Ret),
2322 "=d" (u32Spill),
2323 "=m" (*pu64)
2324 : "A" (u64Old),
2325 "b" ( (uint32_t)u64New ),
2326 "c" ( (uint32_t)(u64New >> 32) ));
2327# endif
2328 return (bool)u32Ret;
2329# else
2330 __asm
2331 {
2332 mov ebx, dword ptr [u64New]
2333 mov ecx, dword ptr [u64New + 4]
2334 mov edi, [pu64]
2335 mov eax, dword ptr [u64Old]
2336 mov edx, dword ptr [u64Old + 4]
2337 lock cmpxchg8b [edi]
2338 setz al
2339 movzx eax, al
2340 mov dword ptr [u32Ret], eax
2341 }
2342 return !!u32Ret;
2343# endif
2344# endif /* !RT_ARCH_AMD64 */
2345}
2346#endif
2347
2348
2349/**
2350 * Atomically Compare and exchange a signed 64-bit value, ordered.
2351 *
2352 * @returns true if xchg was done.
2353 * @returns false if xchg wasn't done.
2354 *
2355 * @param pi64 Pointer to the 64-bit variable to update.
2356 * @param i64 The 64-bit value to assign to *pu64.
2357 * @param i64Old The value to compare with.
2358 */
2359DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2360{
2361 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2362}
2363
2364
2365/** @def ASMAtomicCmpXchgSize
2366 * Atomically Compare and Exchange a value which size might differ
2367 * between platforms or compilers, ordered.
2368 *
2369 * @param pu Pointer to the value to update.
2370 * @param uNew The new value to assigned to *pu.
2371 * @param uOld The old value to *pu compare with.
2372 * @param fRc Where to store the result.
2373 */
2374#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2375 do { \
2376 switch (sizeof(*(pu))) { \
2377 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2378 break; \
2379 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2380 break; \
2381 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2382 (fRc) = false; \
2383 break; \
2384 } \
2385 } while (0)
2386
2387
2388/**
2389 * Atomically Compare and Exchange a pointer value, ordered.
2390 *
2391 * @returns true if xchg was done.
2392 * @returns false if xchg wasn't done.
2393 *
2394 * @param ppv Pointer to the value to update.
2395 * @param pvNew The new value to assigned to *ppv.
2396 * @param pvOld The old value to *ppv compare with.
2397 */
2398DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2399{
2400#if ARCH_BITS == 32
2401 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2402#elif ARCH_BITS == 64
2403 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2404#else
2405# error "ARCH_BITS is bogus"
2406#endif
2407}
2408
2409
2410/**
2411 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
2412 * passes back old value, ordered.
2413 *
2414 * @returns true if xchg was done.
2415 * @returns false if xchg wasn't done.
2416 *
2417 * @param pu32 Pointer to the value to update.
2418 * @param u32New The new value to assigned to *pu32.
2419 * @param u32Old The old value to *pu32 compare with.
2420 * @param pu32Old Pointer store the old value at.
2421 */
2422#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2423DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
2424#else
2425DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
2426{
2427# if RT_INLINE_ASM_GNU_STYLE
2428 uint8_t u8Ret;
2429 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2430 "setz %1\n\t"
2431 : "=m" (*pu32),
2432 "=qm" (u8Ret),
2433 "=a" (*pu32Old)
2434 : "r" (u32New),
2435 "a" (u32Old));
2436 return (bool)u8Ret;
2437
2438# elif RT_INLINE_ASM_USES_INTRIN
2439 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
2440
2441# else
2442 uint32_t u32Ret;
2443 __asm
2444 {
2445# ifdef RT_ARCH_AMD64
2446 mov rdx, [pu32]
2447# else
2448 mov edx, [pu32]
2449# endif
2450 mov eax, [u32Old]
2451 mov ecx, [u32New]
2452# ifdef RT_ARCH_AMD64
2453 lock cmpxchg [rdx], ecx
2454 mov rdx, [pu32Old]
2455 mov [rdx], eax
2456# else
2457 lock cmpxchg [edx], ecx
2458 mov edx, [pu32Old]
2459 mov [edx], eax
2460# endif
2461 setz al
2462 movzx eax, al
2463 mov [u32Ret], eax
2464 }
2465 return !!u32Ret;
2466# endif
2467}
2468#endif
2469
2470
2471/**
2472 * Atomically Compare and Exchange a signed 32-bit value, additionally
2473 * passes back old value, ordered.
2474 *
2475 * @returns true if xchg was done.
2476 * @returns false if xchg wasn't done.
2477 *
2478 * @param pi32 Pointer to the value to update.
2479 * @param i32New The new value to assigned to *pi32.
2480 * @param i32Old The old value to *pi32 compare with.
2481 * @param pi32Old Pointer store the old value at.
2482 */
2483DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
2484{
2485 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
2486}
2487
2488
2489/**
2490 * Atomically Compare and exchange an unsigned 64-bit value, additionally
2491 * passing back old value, ordered.
2492 *
2493 * @returns true if xchg was done.
2494 * @returns false if xchg wasn't done.
2495 *
2496 * @param pu64 Pointer to the 64-bit variable to update.
2497 * @param u64New The 64-bit value to assign to *pu64.
2498 * @param u64Old The value to compare with.
2499 * @param pu64Old Pointer store the old value at.
2500 */
2501#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2502DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
2503#else
2504DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
2505{
2506# if RT_INLINE_ASM_USES_INTRIN
2507 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
2508
2509# elif defined(RT_ARCH_AMD64)
2510# if RT_INLINE_ASM_GNU_STYLE
2511 uint8_t u8Ret;
2512 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2513 "setz %1\n\t"
2514 : "=m" (*pu64),
2515 "=qm" (u8Ret),
2516 "=a" (*pu64Old)
2517 : "r" (u64New),
2518 "a" (u64Old));
2519 return (bool)u8Ret;
2520# else
2521 bool fRet;
2522 __asm
2523 {
2524 mov rdx, [pu32]
2525 mov rax, [u64Old]
2526 mov rcx, [u64New]
2527 lock cmpxchg [rdx], rcx
2528 mov rdx, [pu64Old]
2529 mov [rdx], rax
2530 setz al
2531 mov [fRet], al
2532 }
2533 return fRet;
2534# endif
2535# else /* !RT_ARCH_AMD64 */
2536# if RT_INLINE_ASM_GNU_STYLE
2537 uint64_t u64Ret;
2538# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2539 /* NB: this code uses a memory clobber description, because the clean
2540 * solution with an output value for *pu64 makes gcc run out of registers.
2541 * This will cause suboptimal code, and anyone with a better solution is
2542 * welcome to improve this. */
2543 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
2544 "lock; cmpxchg8b %3\n\t"
2545 "xchgl %%ebx, %1\n\t"
2546 : "=A" (u64Ret)
2547 : "DS" ((uint32_t)u64New),
2548 "c" ((uint32_t)(u64New >> 32)),
2549 "m" (*pu64),
2550 "0" (u64Old)
2551 : "memory" );
2552# else /* !PIC */
2553 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
2554 : "=A" (u64Ret),
2555 "=m" (*pu64)
2556 : "b" ((uint32_t)u64New),
2557 "c" ((uint32_t)(u64New >> 32)),
2558 "m" (*pu64),
2559 "0" (u64Old));
2560# endif
2561 *pu64Old = u64Ret;
2562 return u64Ret == u64Old;
2563# else
2564 uint32_t u32Ret;
2565 __asm
2566 {
2567 mov ebx, dword ptr [u64New]
2568 mov ecx, dword ptr [u64New + 4]
2569 mov edi, [pu64]
2570 mov eax, dword ptr [u64Old]
2571 mov edx, dword ptr [u64Old + 4]
2572 lock cmpxchg8b [edi]
2573 mov ebx, [pu64Old]
2574 mov [ebx], eax
2575 setz al
2576 movzx eax, al
2577 add ebx, 4
2578 mov [ebx], edx
2579 mov dword ptr [u32Ret], eax
2580 }
2581 return !!u32Ret;
2582# endif
2583# endif /* !RT_ARCH_AMD64 */
2584}
2585#endif
2586
2587
2588/**
2589 * Atomically Compare and exchange a signed 64-bit value, additionally
2590 * passing back old value, ordered.
2591 *
2592 * @returns true if xchg was done.
2593 * @returns false if xchg wasn't done.
2594 *
2595 * @param pi64 Pointer to the 64-bit variable to update.
2596 * @param i64 The 64-bit value to assign to *pu64.
2597 * @param i64Old The value to compare with.
2598 * @param pi64Old Pointer store the old value at.
2599 */
2600DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
2601{
2602 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
2603}
2604
2605
2606/** @def ASMAtomicCmpXchgExSize
2607 * Atomically Compare and Exchange a value which size might differ
2608 * between platforms or compilers. Additionally passes back old value.
2609 *
2610 * @param pu Pointer to the value to update.
2611 * @param uNew The new value to assigned to *pu.
2612 * @param uOld The old value to *pu compare with.
2613 * @param fRc Where to store the result.
2614 * @param uOldVal Where to store the old value.
2615 */
2616#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, uOldVal) \
2617 do { \
2618 switch (sizeof(*(pu))) { \
2619 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)&(uOldVal)); \
2620 break; \
2621 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)&(uOldVal)); \
2622 break; \
2623 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2624 (fRc) = false; \
2625 (uOldVal) = 0; \
2626 break; \
2627 } \
2628 } while (0)
2629
2630
2631/**
2632 * Atomically Compare and Exchange a pointer value, additionally
2633 * passing back old value, ordered.
2634 *
2635 * @returns true if xchg was done.
2636 * @returns false if xchg wasn't done.
2637 *
2638 * @param ppv Pointer to the value to update.
2639 * @param pvNew The new value to assigned to *ppv.
2640 * @param pvOld The old value to *ppv compare with.
2641 * @param ppvOld Pointer store the old value at.
2642 */
2643DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, void *pvNew, void *pvOld, void **ppvOld)
2644{
2645#if ARCH_BITS == 32
2646 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
2647#elif ARCH_BITS == 64
2648 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
2649#else
2650# error "ARCH_BITS is bogus"
2651#endif
2652}
2653
2654
2655/**
2656 * Atomically exchanges and adds to a 32-bit value, ordered.
2657 *
2658 * @returns The old value.
2659 * @param pu32 Pointer to the value.
2660 * @param u32 Number to add.
2661 */
2662#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2663DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
2664#else
2665DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
2666{
2667# if RT_INLINE_ASM_USES_INTRIN
2668 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
2669 return u32;
2670
2671# elif RT_INLINE_ASM_GNU_STYLE
2672 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2673 : "=r" (u32),
2674 "=m" (*pu32)
2675 : "0" (u32)
2676 : "memory");
2677 return u32;
2678# else
2679 __asm
2680 {
2681 mov eax, [u32]
2682# ifdef RT_ARCH_AMD64
2683 mov rdx, [pu32]
2684 lock xadd [rdx], eax
2685# else
2686 mov edx, [pu32]
2687 lock xadd [edx], eax
2688# endif
2689 mov [u32], eax
2690 }
2691 return u32;
2692# endif
2693}
2694#endif
2695
2696
2697/**
2698 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2699 *
2700 * @returns The old value.
2701 * @param pi32 Pointer to the value.
2702 * @param i32 Number to add.
2703 */
2704DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
2705{
2706 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
2707}
2708
2709
2710/**
2711 * Atomically increment a 32-bit value, ordered.
2712 *
2713 * @returns The new value.
2714 * @param pu32 Pointer to the value to increment.
2715 */
2716#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2717DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2718#else
2719DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2720{
2721 uint32_t u32;
2722# if RT_INLINE_ASM_USES_INTRIN
2723 u32 = _InterlockedIncrement((long *)pu32);
2724 return u32;
2725
2726# elif RT_INLINE_ASM_GNU_STYLE
2727 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2728 : "=r" (u32),
2729 "=m" (*pu32)
2730 : "0" (1)
2731 : "memory");
2732 return u32+1;
2733# else
2734 __asm
2735 {
2736 mov eax, 1
2737# ifdef RT_ARCH_AMD64
2738 mov rdx, [pu32]
2739 lock xadd [rdx], eax
2740# else
2741 mov edx, [pu32]
2742 lock xadd [edx], eax
2743# endif
2744 mov u32, eax
2745 }
2746 return u32+1;
2747# endif
2748}
2749#endif
2750
2751
2752/**
2753 * Atomically increment a signed 32-bit value, ordered.
2754 *
2755 * @returns The new value.
2756 * @param pi32 Pointer to the value to increment.
2757 */
2758DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2759{
2760 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2761}
2762
2763
2764/**
2765 * Atomically decrement an unsigned 32-bit value, ordered.
2766 *
2767 * @returns The new value.
2768 * @param pu32 Pointer to the value to decrement.
2769 */
2770#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2771DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2772#else
2773DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2774{
2775 uint32_t u32;
2776# if RT_INLINE_ASM_USES_INTRIN
2777 u32 = _InterlockedDecrement((long *)pu32);
2778 return u32;
2779
2780# elif RT_INLINE_ASM_GNU_STYLE
2781 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2782 : "=r" (u32),
2783 "=m" (*pu32)
2784 : "0" (-1)
2785 : "memory");
2786 return u32-1;
2787# else
2788 __asm
2789 {
2790 mov eax, -1
2791# ifdef RT_ARCH_AMD64
2792 mov rdx, [pu32]
2793 lock xadd [rdx], eax
2794# else
2795 mov edx, [pu32]
2796 lock xadd [edx], eax
2797# endif
2798 mov u32, eax
2799 }
2800 return u32-1;
2801# endif
2802}
2803#endif
2804
2805
2806/**
2807 * Atomically decrement a signed 32-bit value, ordered.
2808 *
2809 * @returns The new value.
2810 * @param pi32 Pointer to the value to decrement.
2811 */
2812DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2813{
2814 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2815}
2816
2817
2818/**
2819 * Atomically Or an unsigned 32-bit value, ordered.
2820 *
2821 * @param pu32 Pointer to the pointer variable to OR u32 with.
2822 * @param u32 The value to OR *pu32 with.
2823 */
2824#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2825DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2826#else
2827DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2828{
2829# if RT_INLINE_ASM_USES_INTRIN
2830 _InterlockedOr((long volatile *)pu32, (long)u32);
2831
2832# elif RT_INLINE_ASM_GNU_STYLE
2833 __asm__ __volatile__("lock; orl %1, %0\n\t"
2834 : "=m" (*pu32)
2835 : "ir" (u32));
2836# else
2837 __asm
2838 {
2839 mov eax, [u32]
2840# ifdef RT_ARCH_AMD64
2841 mov rdx, [pu32]
2842 lock or [rdx], eax
2843# else
2844 mov edx, [pu32]
2845 lock or [edx], eax
2846# endif
2847 }
2848# endif
2849}
2850#endif
2851
2852
2853/**
2854 * Atomically Or a signed 32-bit value, ordered.
2855 *
2856 * @param pi32 Pointer to the pointer variable to OR u32 with.
2857 * @param i32 The value to OR *pu32 with.
2858 */
2859DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2860{
2861 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2862}
2863
2864
2865/**
2866 * Atomically And an unsigned 32-bit value, ordered.
2867 *
2868 * @param pu32 Pointer to the pointer variable to AND u32 with.
2869 * @param u32 The value to AND *pu32 with.
2870 */
2871#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2872DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
2873#else
2874DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
2875{
2876# if RT_INLINE_ASM_USES_INTRIN
2877 _InterlockedAnd((long volatile *)pu32, u32);
2878
2879# elif RT_INLINE_ASM_GNU_STYLE
2880 __asm__ __volatile__("lock; andl %1, %0\n\t"
2881 : "=m" (*pu32)
2882 : "ir" (u32));
2883# else
2884 __asm
2885 {
2886 mov eax, [u32]
2887# ifdef RT_ARCH_AMD64
2888 mov rdx, [pu32]
2889 lock and [rdx], eax
2890# else
2891 mov edx, [pu32]
2892 lock and [edx], eax
2893# endif
2894 }
2895# endif
2896}
2897#endif
2898
2899
2900/**
2901 * Atomically And a signed 32-bit value, ordered.
2902 *
2903 * @param pi32 Pointer to the pointer variable to AND i32 with.
2904 * @param i32 The value to AND *pi32 with.
2905 */
2906DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
2907{
2908 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
2909}
2910
2911
2912/**
2913 * Memory fence, waits for any pending writes and reads to complete.
2914 */
2915DECLINLINE(void) ASMMemoryFence(void)
2916{
2917 /** @todo use mfence? check if all cpus we care for support it. */
2918 uint32_t volatile u32;
2919 ASMAtomicXchgU32(&u32, 0);
2920}
2921
2922
2923/**
2924 * Write fence, waits for any pending writes to complete.
2925 */
2926DECLINLINE(void) ASMWriteFence(void)
2927{
2928 /** @todo use sfence? check if all cpus we care for support it. */
2929 ASMMemoryFence();
2930}
2931
2932
2933/**
2934 * Read fence, waits for any pending reads to complete.
2935 */
2936DECLINLINE(void) ASMReadFence(void)
2937{
2938 /** @todo use lfence? check if all cpus we care for support it. */
2939 ASMMemoryFence();
2940}
2941
2942
2943/**
2944 * Atomically reads an unsigned 8-bit value, ordered.
2945 *
2946 * @returns Current *pu8 value
2947 * @param pu8 Pointer to the 8-bit variable to read.
2948 */
2949DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
2950{
2951 ASMMemoryFence();
2952 return *pu8; /* byte reads are atomic on x86 */
2953}
2954
2955
2956/**
2957 * Atomically reads an unsigned 8-bit value, unordered.
2958 *
2959 * @returns Current *pu8 value
2960 * @param pu8 Pointer to the 8-bit variable to read.
2961 */
2962DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
2963{
2964 return *pu8; /* byte reads are atomic on x86 */
2965}
2966
2967
2968/**
2969 * Atomically reads a signed 8-bit value, ordered.
2970 *
2971 * @returns Current *pi8 value
2972 * @param pi8 Pointer to the 8-bit variable to read.
2973 */
2974DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
2975{
2976 ASMMemoryFence();
2977 return *pi8; /* byte reads are atomic on x86 */
2978}
2979
2980
2981/**
2982 * Atomically reads a signed 8-bit value, unordered.
2983 *
2984 * @returns Current *pi8 value
2985 * @param pi8 Pointer to the 8-bit variable to read.
2986 */
2987DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
2988{
2989 return *pi8; /* byte reads are atomic on x86 */
2990}
2991
2992
2993/**
2994 * Atomically reads an unsigned 16-bit value, ordered.
2995 *
2996 * @returns Current *pu16 value
2997 * @param pu16 Pointer to the 16-bit variable to read.
2998 */
2999DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
3000{
3001 ASMMemoryFence();
3002 Assert(!((uintptr_t)pu16 & 1));
3003 return *pu16;
3004}
3005
3006
3007/**
3008 * Atomically reads an unsigned 16-bit value, unordered.
3009 *
3010 * @returns Current *pu16 value
3011 * @param pu16 Pointer to the 16-bit variable to read.
3012 */
3013DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
3014{
3015 Assert(!((uintptr_t)pu16 & 1));
3016 return *pu16;
3017}
3018
3019
3020/**
3021 * Atomically reads a signed 16-bit value, ordered.
3022 *
3023 * @returns Current *pi16 value
3024 * @param pi16 Pointer to the 16-bit variable to read.
3025 */
3026DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
3027{
3028 ASMMemoryFence();
3029 Assert(!((uintptr_t)pi16 & 1));
3030 return *pi16;
3031}
3032
3033
3034/**
3035 * Atomically reads a signed 16-bit value, unordered.
3036 *
3037 * @returns Current *pi16 value
3038 * @param pi16 Pointer to the 16-bit variable to read.
3039 */
3040DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
3041{
3042 Assert(!((uintptr_t)pi16 & 1));
3043 return *pi16;
3044}
3045
3046
3047/**
3048 * Atomically reads an unsigned 32-bit value, ordered.
3049 *
3050 * @returns Current *pu32 value
3051 * @param pu32 Pointer to the 32-bit variable to read.
3052 */
3053DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
3054{
3055 ASMMemoryFence();
3056 Assert(!((uintptr_t)pu32 & 3));
3057 return *pu32;
3058}
3059
3060
3061/**
3062 * Atomically reads an unsigned 32-bit value, unordered.
3063 *
3064 * @returns Current *pu32 value
3065 * @param pu32 Pointer to the 32-bit variable to read.
3066 */
3067DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
3068{
3069 Assert(!((uintptr_t)pu32 & 3));
3070 return *pu32;
3071}
3072
3073
3074/**
3075 * Atomically reads a signed 32-bit value, ordered.
3076 *
3077 * @returns Current *pi32 value
3078 * @param pi32 Pointer to the 32-bit variable to read.
3079 */
3080DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
3081{
3082 ASMMemoryFence();
3083 Assert(!((uintptr_t)pi32 & 3));
3084 return *pi32;
3085}
3086
3087
3088/**
3089 * Atomically reads a signed 32-bit value, unordered.
3090 *
3091 * @returns Current *pi32 value
3092 * @param pi32 Pointer to the 32-bit variable to read.
3093 */
3094DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
3095{
3096 Assert(!((uintptr_t)pi32 & 3));
3097 return *pi32;
3098}
3099
3100
3101/**
3102 * Atomically reads an unsigned 64-bit value, ordered.
3103 *
3104 * @returns Current *pu64 value
3105 * @param pu64 Pointer to the 64-bit variable to read.
3106 * The memory pointed to must be writable.
3107 * @remark This will fault if the memory is read-only!
3108 */
3109#if RT_INLINE_ASM_EXTERNAL
3110DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
3111#else
3112DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
3113{
3114 uint64_t u64;
3115# ifdef RT_ARCH_AMD64
3116# if RT_INLINE_ASM_GNU_STYLE
3117 Assert(!((uintptr_t)pu64 & 7));
3118 __asm__ __volatile__( "mfence\n\t"
3119 "movq %1, %0\n\t"
3120 : "=r" (u64)
3121 : "m" (*pu64));
3122# else
3123 __asm
3124 {
3125 mfence
3126 mov rdx, [pu64]
3127 mov rax, [rdx]
3128 mov [u64], rax
3129 }
3130# endif
3131# else /* !RT_ARCH_AMD64 */
3132# if RT_INLINE_ASM_GNU_STYLE
3133# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3134 uint32_t u32EBX = 0;
3135 Assert(!((uintptr_t)pu64 & 7));
3136 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3137 "lock; cmpxchg8b (%5)\n\t"
3138 "xchgl %%ebx, %3\n\t"
3139 : "=A" (u64),
3140 "=m" (*pu64)
3141 : "0" (0),
3142 "m" (u32EBX),
3143 "c" (0),
3144 "S" (pu64));
3145# else /* !PIC */
3146 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3147 : "=A" (u64),
3148 "=m" (*pu64)
3149 : "0" (0),
3150 "b" (0),
3151 "c" (0));
3152# endif
3153# else
3154 Assert(!((uintptr_t)pu64 & 7));
3155 __asm
3156 {
3157 xor eax, eax
3158 xor edx, edx
3159 mov edi, pu64
3160 xor ecx, ecx
3161 xor ebx, ebx
3162 lock cmpxchg8b [edi]
3163 mov dword ptr [u64], eax
3164 mov dword ptr [u64 + 4], edx
3165 }
3166# endif
3167# endif /* !RT_ARCH_AMD64 */
3168 return u64;
3169}
3170#endif
3171
3172
3173/**
3174 * Atomically reads an unsigned 64-bit value, unordered.
3175 *
3176 * @returns Current *pu64 value
3177 * @param pu64 Pointer to the 64-bit variable to read.
3178 * The memory pointed to must be writable.
3179 * @remark This will fault if the memory is read-only!
3180 */
3181#if RT_INLINE_ASM_EXTERNAL
3182DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
3183#else
3184DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
3185{
3186 uint64_t u64;
3187# ifdef RT_ARCH_AMD64
3188# if RT_INLINE_ASM_GNU_STYLE
3189 Assert(!((uintptr_t)pu64 & 7));
3190 __asm__ __volatile__("movq %1, %0\n\t"
3191 : "=r" (u64)
3192 : "m" (*pu64));
3193# else
3194 __asm
3195 {
3196 mov rdx, [pu64]
3197 mov rax, [rdx]
3198 mov [u64], rax
3199 }
3200# endif
3201# else /* !RT_ARCH_AMD64 */
3202# if RT_INLINE_ASM_GNU_STYLE
3203# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3204 uint32_t u32EBX = 0;
3205 Assert(!((uintptr_t)pu64 & 7));
3206 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3207 "lock; cmpxchg8b (%5)\n\t"
3208 "xchgl %%ebx, %3\n\t"
3209 : "=A" (u64),
3210 "=m" (*pu64)
3211 : "0" (0),
3212 "m" (u32EBX),
3213 "c" (0),
3214 "S" (pu64));
3215# else /* !PIC */
3216 __asm__ __volatile__("cmpxchg8b %1\n\t"
3217 : "=A" (u64),
3218 "=m" (*pu64)
3219 : "0" (0),
3220 "b" (0),
3221 "c" (0));
3222# endif
3223# else
3224 Assert(!((uintptr_t)pu64 & 7));
3225 __asm
3226 {
3227 xor eax, eax
3228 xor edx, edx
3229 mov edi, pu64
3230 xor ecx, ecx
3231 xor ebx, ebx
3232 lock cmpxchg8b [edi]
3233 mov dword ptr [u64], eax
3234 mov dword ptr [u64 + 4], edx
3235 }
3236# endif
3237# endif /* !RT_ARCH_AMD64 */
3238 return u64;
3239}
3240#endif
3241
3242
3243/**
3244 * Atomically reads a signed 64-bit value, ordered.
3245 *
3246 * @returns Current *pi64 value
3247 * @param pi64 Pointer to the 64-bit variable to read.
3248 * The memory pointed to must be writable.
3249 * @remark This will fault if the memory is read-only!
3250 */
3251DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
3252{
3253 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
3254}
3255
3256
3257/**
3258 * Atomically reads a signed 64-bit value, unordered.
3259 *
3260 * @returns Current *pi64 value
3261 * @param pi64 Pointer to the 64-bit variable to read.
3262 * The memory pointed to must be writable.
3263 * @remark This will fault if the memory is read-only!
3264 */
3265DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
3266{
3267 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
3268}
3269
3270
3271/**
3272 * Atomically reads a pointer value, ordered.
3273 *
3274 * @returns Current *pv value
3275 * @param ppv Pointer to the pointer variable to read.
3276 */
3277DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
3278{
3279#if ARCH_BITS == 32
3280 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
3281#elif ARCH_BITS == 64
3282 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
3283#else
3284# error "ARCH_BITS is bogus"
3285#endif
3286}
3287
3288
3289/**
3290 * Atomically reads a pointer value, unordered.
3291 *
3292 * @returns Current *pv value
3293 * @param ppv Pointer to the pointer variable to read.
3294 */
3295DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
3296{
3297#if ARCH_BITS == 32
3298 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
3299#elif ARCH_BITS == 64
3300 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
3301#else
3302# error "ARCH_BITS is bogus"
3303#endif
3304}
3305
3306
3307/**
3308 * Atomically reads a boolean value, ordered.
3309 *
3310 * @returns Current *pf value
3311 * @param pf Pointer to the boolean variable to read.
3312 */
3313DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
3314{
3315 ASMMemoryFence();
3316 return *pf; /* byte reads are atomic on x86 */
3317}
3318
3319
3320/**
3321 * Atomically reads a boolean value, unordered.
3322 *
3323 * @returns Current *pf value
3324 * @param pf Pointer to the boolean variable to read.
3325 */
3326DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
3327{
3328 return *pf; /* byte reads are atomic on x86 */
3329}
3330
3331
3332/**
3333 * Atomically read a value which size might differ
3334 * between platforms or compilers, ordered.
3335 *
3336 * @param pu Pointer to the variable to update.
3337 * @param puRes Where to store the result.
3338 */
3339#define ASMAtomicReadSize(pu, puRes) \
3340 do { \
3341 switch (sizeof(*(pu))) { \
3342 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3343 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
3344 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
3345 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
3346 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3347 } \
3348 } while (0)
3349
3350
3351/**
3352 * Atomically read a value which size might differ
3353 * between platforms or compilers, unordered.
3354 *
3355 * @param pu Pointer to the variable to update.
3356 * @param puRes Where to store the result.
3357 */
3358#define ASMAtomicUoReadSize(pu, puRes) \
3359 do { \
3360 switch (sizeof(*(pu))) { \
3361 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3362 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
3363 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
3364 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
3365 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3366 } \
3367 } while (0)
3368
3369
3370/**
3371 * Atomically writes an unsigned 8-bit value, ordered.
3372 *
3373 * @param pu8 Pointer to the 8-bit variable.
3374 * @param u8 The 8-bit value to assign to *pu8.
3375 */
3376DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
3377{
3378 ASMAtomicXchgU8(pu8, u8);
3379}
3380
3381
3382/**
3383 * Atomically writes an unsigned 8-bit value, unordered.
3384 *
3385 * @param pu8 Pointer to the 8-bit variable.
3386 * @param u8 The 8-bit value to assign to *pu8.
3387 */
3388DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
3389{
3390 *pu8 = u8; /* byte writes are atomic on x86 */
3391}
3392
3393
3394/**
3395 * Atomically writes a signed 8-bit value, ordered.
3396 *
3397 * @param pi8 Pointer to the 8-bit variable to read.
3398 * @param i8 The 8-bit value to assign to *pi8.
3399 */
3400DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
3401{
3402 ASMAtomicXchgS8(pi8, i8);
3403}
3404
3405
3406/**
3407 * Atomically writes a signed 8-bit value, unordered.
3408 *
3409 * @param pi8 Pointer to the 8-bit variable to read.
3410 * @param i8 The 8-bit value to assign to *pi8.
3411 */
3412DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
3413{
3414 *pi8 = i8; /* byte writes are atomic on x86 */
3415}
3416
3417
3418/**
3419 * Atomically writes an unsigned 16-bit value, ordered.
3420 *
3421 * @param pu16 Pointer to the 16-bit variable.
3422 * @param u16 The 16-bit value to assign to *pu16.
3423 */
3424DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
3425{
3426 ASMAtomicXchgU16(pu16, u16);
3427}
3428
3429
3430/**
3431 * Atomically writes an unsigned 16-bit value, unordered.
3432 *
3433 * @param pu16 Pointer to the 16-bit variable.
3434 * @param u16 The 16-bit value to assign to *pu16.
3435 */
3436DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
3437{
3438 Assert(!((uintptr_t)pu16 & 1));
3439 *pu16 = u16;
3440}
3441
3442
3443/**
3444 * Atomically writes a signed 16-bit value, ordered.
3445 *
3446 * @param pi16 Pointer to the 16-bit variable to read.
3447 * @param i16 The 16-bit value to assign to *pi16.
3448 */
3449DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
3450{
3451 ASMAtomicXchgS16(pi16, i16);
3452}
3453
3454
3455/**
3456 * Atomically writes a signed 16-bit value, unordered.
3457 *
3458 * @param pi16 Pointer to the 16-bit variable to read.
3459 * @param i16 The 16-bit value to assign to *pi16.
3460 */
3461DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
3462{
3463 Assert(!((uintptr_t)pi16 & 1));
3464 *pi16 = i16;
3465}
3466
3467
3468/**
3469 * Atomically writes an unsigned 32-bit value, ordered.
3470 *
3471 * @param pu32 Pointer to the 32-bit variable.
3472 * @param u32 The 32-bit value to assign to *pu32.
3473 */
3474DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
3475{
3476 ASMAtomicXchgU32(pu32, u32);
3477}
3478
3479
3480/**
3481 * Atomically writes an unsigned 32-bit value, unordered.
3482 *
3483 * @param pu32 Pointer to the 32-bit variable.
3484 * @param u32 The 32-bit value to assign to *pu32.
3485 */
3486DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
3487{
3488 Assert(!((uintptr_t)pu32 & 3));
3489 *pu32 = u32;
3490}
3491
3492
3493/**
3494 * Atomically writes a signed 32-bit value, ordered.
3495 *
3496 * @param pi32 Pointer to the 32-bit variable to read.
3497 * @param i32 The 32-bit value to assign to *pi32.
3498 */
3499DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
3500{
3501 ASMAtomicXchgS32(pi32, i32);
3502}
3503
3504
3505/**
3506 * Atomically writes a signed 32-bit value, unordered.
3507 *
3508 * @param pi32 Pointer to the 32-bit variable to read.
3509 * @param i32 The 32-bit value to assign to *pi32.
3510 */
3511DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
3512{
3513 Assert(!((uintptr_t)pi32 & 3));
3514 *pi32 = i32;
3515}
3516
3517
3518/**
3519 * Atomically writes an unsigned 64-bit value, ordered.
3520 *
3521 * @param pu64 Pointer to the 64-bit variable.
3522 * @param u64 The 64-bit value to assign to *pu64.
3523 */
3524DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
3525{
3526 ASMAtomicXchgU64(pu64, u64);
3527}
3528
3529
3530/**
3531 * Atomically writes an unsigned 64-bit value, unordered.
3532 *
3533 * @param pu64 Pointer to the 64-bit variable.
3534 * @param u64 The 64-bit value to assign to *pu64.
3535 */
3536DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
3537{
3538 Assert(!((uintptr_t)pu64 & 7));
3539#if ARCH_BITS == 64
3540 *pu64 = u64;
3541#else
3542 ASMAtomicXchgU64(pu64, u64);
3543#endif
3544}
3545
3546
3547/**
3548 * Atomically writes a signed 64-bit value, ordered.
3549 *
3550 * @param pi64 Pointer to the 64-bit variable.
3551 * @param i64 The 64-bit value to assign to *pi64.
3552 */
3553DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
3554{
3555 ASMAtomicXchgS64(pi64, i64);
3556}
3557
3558
3559/**
3560 * Atomically writes a signed 64-bit value, unordered.
3561 *
3562 * @param pi64 Pointer to the 64-bit variable.
3563 * @param i64 The 64-bit value to assign to *pi64.
3564 */
3565DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
3566{
3567 Assert(!((uintptr_t)pi64 & 7));
3568#if ARCH_BITS == 64
3569 *pi64 = i64;
3570#else
3571 ASMAtomicXchgS64(pi64, i64);
3572#endif
3573}
3574
3575
3576/**
3577 * Atomically writes a boolean value, unordered.
3578 *
3579 * @param pf Pointer to the boolean variable.
3580 * @param f The boolean value to assign to *pf.
3581 */
3582DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
3583{
3584 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
3585}
3586
3587
3588/**
3589 * Atomically writes a boolean value, unordered.
3590 *
3591 * @param pf Pointer to the boolean variable.
3592 * @param f The boolean value to assign to *pf.
3593 */
3594DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
3595{
3596 *pf = f; /* byte writes are atomic on x86 */
3597}
3598
3599
3600/**
3601 * Atomically writes a pointer value, ordered.
3602 *
3603 * @returns Current *pv value
3604 * @param ppv Pointer to the pointer variable.
3605 * @param pv The pointer value to assigne to *ppv.
3606 */
3607DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, void *pv)
3608{
3609#if ARCH_BITS == 32
3610 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
3611#elif ARCH_BITS == 64
3612 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
3613#else
3614# error "ARCH_BITS is bogus"
3615#endif
3616}
3617
3618
3619/**
3620 * Atomically writes a pointer value, unordered.
3621 *
3622 * @returns Current *pv value
3623 * @param ppv Pointer to the pointer variable.
3624 * @param pv The pointer value to assigne to *ppv.
3625 */
3626DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, void *pv)
3627{
3628#if ARCH_BITS == 32
3629 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
3630#elif ARCH_BITS == 64
3631 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
3632#else
3633# error "ARCH_BITS is bogus"
3634#endif
3635}
3636
3637
3638/**
3639 * Atomically write a value which size might differ
3640 * between platforms or compilers, ordered.
3641 *
3642 * @param pu Pointer to the variable to update.
3643 * @param uNew The value to assign to *pu.
3644 */
3645#define ASMAtomicWriteSize(pu, uNew) \
3646 do { \
3647 switch (sizeof(*(pu))) { \
3648 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
3649 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3650 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3651 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3652 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3653 } \
3654 } while (0)
3655
3656/**
3657 * Atomically write a value which size might differ
3658 * between platforms or compilers, unordered.
3659 *
3660 * @param pu Pointer to the variable to update.
3661 * @param uNew The value to assign to *pu.
3662 */
3663#define ASMAtomicUoWriteSize(pu, uNew) \
3664 do { \
3665 switch (sizeof(*(pu))) { \
3666 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
3667 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3668 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3669 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3670 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3671 } \
3672 } while (0)
3673
3674
3675
3676
3677/**
3678 * Invalidate page.
3679 *
3680 * @param pv Address of the page to invalidate.
3681 */
3682#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3683DECLASM(void) ASMInvalidatePage(void *pv);
3684#else
3685DECLINLINE(void) ASMInvalidatePage(void *pv)
3686{
3687# if RT_INLINE_ASM_USES_INTRIN
3688 __invlpg(pv);
3689
3690# elif RT_INLINE_ASM_GNU_STYLE
3691 __asm__ __volatile__("invlpg %0\n\t"
3692 : : "m" (*(uint8_t *)pv));
3693# else
3694 __asm
3695 {
3696# ifdef RT_ARCH_AMD64
3697 mov rax, [pv]
3698 invlpg [rax]
3699# else
3700 mov eax, [pv]
3701 invlpg [eax]
3702# endif
3703 }
3704# endif
3705}
3706#endif
3707
3708
3709#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3710# if PAGE_SIZE != 0x1000
3711# error "PAGE_SIZE is not 0x1000!"
3712# endif
3713#endif
3714
3715/**
3716 * Zeros a 4K memory page.
3717 *
3718 * @param pv Pointer to the memory block. This must be page aligned.
3719 */
3720#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3721DECLASM(void) ASMMemZeroPage(volatile void *pv);
3722# else
3723DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
3724{
3725# if RT_INLINE_ASM_USES_INTRIN
3726# ifdef RT_ARCH_AMD64
3727 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
3728# else
3729 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
3730# endif
3731
3732# elif RT_INLINE_ASM_GNU_STYLE
3733 RTUINTREG uDummy;
3734# ifdef RT_ARCH_AMD64
3735 __asm__ __volatile__ ("rep stosq"
3736 : "=D" (pv),
3737 "=c" (uDummy)
3738 : "0" (pv),
3739 "c" (0x1000 >> 3),
3740 "a" (0)
3741 : "memory");
3742# else
3743 __asm__ __volatile__ ("rep stosl"
3744 : "=D" (pv),
3745 "=c" (uDummy)
3746 : "0" (pv),
3747 "c" (0x1000 >> 2),
3748 "a" (0)
3749 : "memory");
3750# endif
3751# else
3752 __asm
3753 {
3754# ifdef RT_ARCH_AMD64
3755 xor rax, rax
3756 mov ecx, 0200h
3757 mov rdi, [pv]
3758 rep stosq
3759# else
3760 xor eax, eax
3761 mov ecx, 0400h
3762 mov edi, [pv]
3763 rep stosd
3764# endif
3765 }
3766# endif
3767}
3768# endif
3769
3770
3771/**
3772 * Zeros a memory block with a 32-bit aligned size.
3773 *
3774 * @param pv Pointer to the memory block.
3775 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3776 */
3777#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3778DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3779#else
3780DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3781{
3782# if RT_INLINE_ASM_USES_INTRIN
3783 __stosd((unsigned long *)pv, 0, cb >> 2);
3784
3785# elif RT_INLINE_ASM_GNU_STYLE
3786 __asm__ __volatile__ ("rep stosl"
3787 : "=D" (pv),
3788 "=c" (cb)
3789 : "0" (pv),
3790 "1" (cb >> 2),
3791 "a" (0)
3792 : "memory");
3793# else
3794 __asm
3795 {
3796 xor eax, eax
3797# ifdef RT_ARCH_AMD64
3798 mov rcx, [cb]
3799 shr rcx, 2
3800 mov rdi, [pv]
3801# else
3802 mov ecx, [cb]
3803 shr ecx, 2
3804 mov edi, [pv]
3805# endif
3806 rep stosd
3807 }
3808# endif
3809}
3810#endif
3811
3812
3813/**
3814 * Fills a memory block with a 32-bit aligned size.
3815 *
3816 * @param pv Pointer to the memory block.
3817 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3818 * @param u32 The value to fill with.
3819 */
3820#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3821DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3822#else
3823DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3824{
3825# if RT_INLINE_ASM_USES_INTRIN
3826 __stosd((unsigned long *)pv, 0, cb >> 2);
3827
3828# elif RT_INLINE_ASM_GNU_STYLE
3829 __asm__ __volatile__ ("rep stosl"
3830 : "=D" (pv),
3831 "=c" (cb)
3832 : "0" (pv),
3833 "1" (cb >> 2),
3834 "a" (u32)
3835 : "memory");
3836# else
3837 __asm
3838 {
3839# ifdef RT_ARCH_AMD64
3840 mov rcx, [cb]
3841 shr rcx, 2
3842 mov rdi, [pv]
3843# else
3844 mov ecx, [cb]
3845 shr ecx, 2
3846 mov edi, [pv]
3847# endif
3848 mov eax, [u32]
3849 rep stosd
3850 }
3851# endif
3852}
3853#endif
3854
3855
3856/**
3857 * Checks if a memory block is filled with the specified byte.
3858 *
3859 * This is a sort of inverted memchr.
3860 *
3861 * @returns Pointer to the byte which doesn't equal u8.
3862 * @returns NULL if all equal to u8.
3863 *
3864 * @param pv Pointer to the memory block.
3865 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3866 * @param u8 The value it's supposed to be filled with.
3867 */
3868#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3869DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
3870#else
3871DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
3872{
3873/** @todo rewrite this in inline assembly? */
3874 uint8_t const *pb = (uint8_t const *)pv;
3875 for (; cb; cb--, pb++)
3876 if (RT_UNLIKELY(*pb != u8))
3877 return (void *)pb;
3878 return NULL;
3879}
3880#endif
3881
3882
3883/**
3884 * Checks if a memory block is filled with the specified 32-bit value.
3885 *
3886 * This is a sort of inverted memchr.
3887 *
3888 * @returns Pointer to the first value which doesn't equal u32.
3889 * @returns NULL if all equal to u32.
3890 *
3891 * @param pv Pointer to the memory block.
3892 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3893 * @param u32 The value it's supposed to be filled with.
3894 */
3895#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3896DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
3897#else
3898DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
3899{
3900/** @todo rewrite this in inline assembly? */
3901 uint32_t const *pu32 = (uint32_t const *)pv;
3902 for (; cb; cb -= 4, pu32++)
3903 if (RT_UNLIKELY(*pu32 != u32))
3904 return (uint32_t *)pu32;
3905 return NULL;
3906}
3907#endif
3908
3909
3910/**
3911 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
3912 *
3913 * @returns u32F1 * u32F2.
3914 */
3915#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3916DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
3917#else
3918DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
3919{
3920# ifdef RT_ARCH_AMD64
3921 return (uint64_t)u32F1 * u32F2;
3922# else /* !RT_ARCH_AMD64 */
3923 uint64_t u64;
3924# if RT_INLINE_ASM_GNU_STYLE
3925 __asm__ __volatile__("mull %%edx"
3926 : "=A" (u64)
3927 : "a" (u32F2), "d" (u32F1));
3928# else
3929 __asm
3930 {
3931 mov edx, [u32F1]
3932 mov eax, [u32F2]
3933 mul edx
3934 mov dword ptr [u64], eax
3935 mov dword ptr [u64 + 4], edx
3936 }
3937# endif
3938 return u64;
3939# endif /* !RT_ARCH_AMD64 */
3940}
3941#endif
3942
3943
3944/**
3945 * Multiplies two signed 32-bit values returning a signed 64-bit result.
3946 *
3947 * @returns u32F1 * u32F2.
3948 */
3949#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3950DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
3951#else
3952DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
3953{
3954# ifdef RT_ARCH_AMD64
3955 return (int64_t)i32F1 * i32F2;
3956# else /* !RT_ARCH_AMD64 */
3957 int64_t i64;
3958# if RT_INLINE_ASM_GNU_STYLE
3959 __asm__ __volatile__("imull %%edx"
3960 : "=A" (i64)
3961 : "a" (i32F2), "d" (i32F1));
3962# else
3963 __asm
3964 {
3965 mov edx, [i32F1]
3966 mov eax, [i32F2]
3967 imul edx
3968 mov dword ptr [i64], eax
3969 mov dword ptr [i64 + 4], edx
3970 }
3971# endif
3972 return i64;
3973# endif /* !RT_ARCH_AMD64 */
3974}
3975#endif
3976
3977
3978/**
3979 * Devides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
3980 *
3981 * @returns u64 / u32.
3982 */
3983#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3984DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
3985#else
3986DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
3987{
3988# ifdef RT_ARCH_AMD64
3989 return (uint32_t)(u64 / u32);
3990# else /* !RT_ARCH_AMD64 */
3991# if RT_INLINE_ASM_GNU_STYLE
3992 RTUINTREG uDummy;
3993 __asm__ __volatile__("divl %3"
3994 : "=a" (u32), "=d"(uDummy)
3995 : "A" (u64), "r" (u32));
3996# else
3997 __asm
3998 {
3999 mov eax, dword ptr [u64]
4000 mov edx, dword ptr [u64 + 4]
4001 mov ecx, [u32]
4002 div ecx
4003 mov [u32], eax
4004 }
4005# endif
4006 return u32;
4007# endif /* !RT_ARCH_AMD64 */
4008}
4009#endif
4010
4011
4012/**
4013 * Devides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
4014 *
4015 * @returns u64 / u32.
4016 */
4017#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4018DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
4019#else
4020DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
4021{
4022# ifdef RT_ARCH_AMD64
4023 return (int32_t)(i64 / i32);
4024# else /* !RT_ARCH_AMD64 */
4025# if RT_INLINE_ASM_GNU_STYLE
4026 RTUINTREG iDummy;
4027 __asm__ __volatile__("idivl %3"
4028 : "=a" (i32), "=d"(iDummy)
4029 : "A" (i64), "r" (i32));
4030# else
4031 __asm
4032 {
4033 mov eax, dword ptr [i64]
4034 mov edx, dword ptr [i64 + 4]
4035 mov ecx, [i32]
4036 idiv ecx
4037 mov [i32], eax
4038 }
4039# endif
4040 return i32;
4041# endif /* !RT_ARCH_AMD64 */
4042}
4043#endif
4044
4045
4046/**
4047 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
4048 * using a 96 bit intermediate result.
4049 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
4050 * __udivdi3 and __umoddi3 even if this inline function is not used.
4051 *
4052 * @returns (u64A * u32B) / u32C.
4053 * @param u64A The 64-bit value.
4054 * @param u32B The 32-bit value to multiple by A.
4055 * @param u32C The 32-bit value to divide A*B by.
4056 */
4057#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
4058DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
4059#else
4060DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
4061{
4062# if RT_INLINE_ASM_GNU_STYLE
4063# ifdef RT_ARCH_AMD64
4064 uint64_t u64Result, u64Spill;
4065 __asm__ __volatile__("mulq %2\n\t"
4066 "divq %3\n\t"
4067 : "=a" (u64Result),
4068 "=d" (u64Spill)
4069 : "r" ((uint64_t)u32B),
4070 "r" ((uint64_t)u32C),
4071 "0" (u64A),
4072 "1" (0));
4073 return u64Result;
4074# else
4075 uint32_t u32Dummy;
4076 uint64_t u64Result;
4077 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
4078 edx = u64Lo.hi = (u64A.lo * u32B).hi */
4079 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
4080 eax = u64A.hi */
4081 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
4082 edx = u32C */
4083 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
4084 edx = u32B */
4085 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
4086 edx = u64Hi.hi = (u64A.hi * u32B).hi */
4087 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
4088 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
4089 "divl %%ecx \n\t" /* eax = u64Hi / u32C
4090 edx = u64Hi % u32C */
4091 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
4092 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
4093 "divl %%ecx \n\t" /* u64Result.lo */
4094 "movl %%edi,%%edx \n\t" /* u64Result.hi */
4095 : "=A"(u64Result), "=c"(u32Dummy),
4096 "=S"(u32Dummy), "=D"(u32Dummy)
4097 : "a"((uint32_t)u64A),
4098 "S"((uint32_t)(u64A >> 32)),
4099 "c"(u32B),
4100 "D"(u32C));
4101 return u64Result;
4102# endif
4103# else
4104 RTUINT64U u;
4105 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
4106 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
4107 u64Hi += (u64Lo >> 32);
4108 u.s.Hi = (uint32_t)(u64Hi / u32C);
4109 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
4110 return u.u;
4111# endif
4112}
4113#endif
4114
4115
4116/**
4117 * Probes a byte pointer for read access.
4118 *
4119 * While the function will not fault if the byte is not read accessible,
4120 * the idea is to do this in a safe place like before acquiring locks
4121 * and such like.
4122 *
4123 * Also, this functions guarantees that an eager compiler is not going
4124 * to optimize the probing away.
4125 *
4126 * @param pvByte Pointer to the byte.
4127 */
4128#if RT_INLINE_ASM_EXTERNAL
4129DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
4130#else
4131DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
4132{
4133 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4134 uint8_t u8;
4135# if RT_INLINE_ASM_GNU_STYLE
4136 __asm__ __volatile__("movb (%1), %0\n\t"
4137 : "=r" (u8)
4138 : "r" (pvByte));
4139# else
4140 __asm
4141 {
4142# ifdef RT_ARCH_AMD64
4143 mov rax, [pvByte]
4144 mov al, [rax]
4145# else
4146 mov eax, [pvByte]
4147 mov al, [eax]
4148# endif
4149 mov [u8], al
4150 }
4151# endif
4152 return u8;
4153}
4154#endif
4155
4156/**
4157 * Probes a buffer for read access page by page.
4158 *
4159 * While the function will fault if the buffer is not fully read
4160 * accessible, the idea is to do this in a safe place like before
4161 * acquiring locks and such like.
4162 *
4163 * Also, this functions guarantees that an eager compiler is not going
4164 * to optimize the probing away.
4165 *
4166 * @param pvBuf Pointer to the buffer.
4167 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4168 */
4169DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
4170{
4171 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4172 /* the first byte */
4173 const uint8_t *pu8 = (const uint8_t *)pvBuf;
4174 ASMProbeReadByte(pu8);
4175
4176 /* the pages in between pages. */
4177 while (cbBuf > /*PAGE_SIZE*/0x1000)
4178 {
4179 ASMProbeReadByte(pu8);
4180 cbBuf -= /*PAGE_SIZE*/0x1000;
4181 pu8 += /*PAGE_SIZE*/0x1000;
4182 }
4183
4184 /* the last byte */
4185 ASMProbeReadByte(pu8 + cbBuf - 1);
4186}
4187
4188
4189/** @def ASMBreakpoint
4190 * Debugger Breakpoint.
4191 * @remark In the gnu world we add a nop instruction after the int3 to
4192 * force gdb to remain at the int3 source line.
4193 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
4194 * @internal
4195 */
4196#if RT_INLINE_ASM_GNU_STYLE
4197# ifndef __L4ENV__
4198# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
4199# else
4200# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
4201# endif
4202#else
4203# define ASMBreakpoint() __debugbreak()
4204#endif
4205
4206
4207
4208/** @defgroup grp_inline_bits Bit Operations
4209 * @{
4210 */
4211
4212
4213/**
4214 * Sets a bit in a bitmap.
4215 *
4216 * @param pvBitmap Pointer to the bitmap.
4217 * @param iBit The bit to set.
4218 */
4219#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4220DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
4221#else
4222DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
4223{
4224# if RT_INLINE_ASM_USES_INTRIN
4225 _bittestandset((long *)pvBitmap, iBit);
4226
4227# elif RT_INLINE_ASM_GNU_STYLE
4228 __asm__ __volatile__ ("btsl %1, %0"
4229 : "=m" (*(volatile long *)pvBitmap)
4230 : "Ir" (iBit)
4231 : "memory");
4232# else
4233 __asm
4234 {
4235# ifdef RT_ARCH_AMD64
4236 mov rax, [pvBitmap]
4237 mov edx, [iBit]
4238 bts [rax], edx
4239# else
4240 mov eax, [pvBitmap]
4241 mov edx, [iBit]
4242 bts [eax], edx
4243# endif
4244 }
4245# endif
4246}
4247#endif
4248
4249
4250/**
4251 * Atomically sets a bit in a bitmap, ordered.
4252 *
4253 * @param pvBitmap Pointer to the bitmap.
4254 * @param iBit The bit to set.
4255 */
4256#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4257DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
4258#else
4259DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
4260{
4261# if RT_INLINE_ASM_USES_INTRIN
4262 _interlockedbittestandset((long *)pvBitmap, iBit);
4263# elif RT_INLINE_ASM_GNU_STYLE
4264 __asm__ __volatile__ ("lock; btsl %1, %0"
4265 : "=m" (*(volatile long *)pvBitmap)
4266 : "Ir" (iBit)
4267 : "memory");
4268# else
4269 __asm
4270 {
4271# ifdef RT_ARCH_AMD64
4272 mov rax, [pvBitmap]
4273 mov edx, [iBit]
4274 lock bts [rax], edx
4275# else
4276 mov eax, [pvBitmap]
4277 mov edx, [iBit]
4278 lock bts [eax], edx
4279# endif
4280 }
4281# endif
4282}
4283#endif
4284
4285
4286/**
4287 * Clears a bit in a bitmap.
4288 *
4289 * @param pvBitmap Pointer to the bitmap.
4290 * @param iBit The bit to clear.
4291 */
4292#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4293DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
4294#else
4295DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
4296{
4297# if RT_INLINE_ASM_USES_INTRIN
4298 _bittestandreset((long *)pvBitmap, iBit);
4299
4300# elif RT_INLINE_ASM_GNU_STYLE
4301 __asm__ __volatile__ ("btrl %1, %0"
4302 : "=m" (*(volatile long *)pvBitmap)
4303 : "Ir" (iBit)
4304 : "memory");
4305# else
4306 __asm
4307 {
4308# ifdef RT_ARCH_AMD64
4309 mov rax, [pvBitmap]
4310 mov edx, [iBit]
4311 btr [rax], edx
4312# else
4313 mov eax, [pvBitmap]
4314 mov edx, [iBit]
4315 btr [eax], edx
4316# endif
4317 }
4318# endif
4319}
4320#endif
4321
4322
4323/**
4324 * Atomically clears a bit in a bitmap, ordered.
4325 *
4326 * @param pvBitmap Pointer to the bitmap.
4327 * @param iBit The bit to toggle set.
4328 * @remark No memory barrier, take care on smp.
4329 */
4330#if RT_INLINE_ASM_EXTERNAL
4331DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
4332#else
4333DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
4334{
4335# if RT_INLINE_ASM_GNU_STYLE
4336 __asm__ __volatile__ ("lock; btrl %1, %0"
4337 : "=m" (*(volatile long *)pvBitmap)
4338 : "Ir" (iBit)
4339 : "memory");
4340# else
4341 __asm
4342 {
4343# ifdef RT_ARCH_AMD64
4344 mov rax, [pvBitmap]
4345 mov edx, [iBit]
4346 lock btr [rax], edx
4347# else
4348 mov eax, [pvBitmap]
4349 mov edx, [iBit]
4350 lock btr [eax], edx
4351# endif
4352 }
4353# endif
4354}
4355#endif
4356
4357
4358/**
4359 * Toggles a bit in a bitmap.
4360 *
4361 * @param pvBitmap Pointer to the bitmap.
4362 * @param iBit The bit to toggle.
4363 */
4364#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4365DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
4366#else
4367DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
4368{
4369# if RT_INLINE_ASM_USES_INTRIN
4370 _bittestandcomplement((long *)pvBitmap, iBit);
4371# elif RT_INLINE_ASM_GNU_STYLE
4372 __asm__ __volatile__ ("btcl %1, %0"
4373 : "=m" (*(volatile long *)pvBitmap)
4374 : "Ir" (iBit)
4375 : "memory");
4376# else
4377 __asm
4378 {
4379# ifdef RT_ARCH_AMD64
4380 mov rax, [pvBitmap]
4381 mov edx, [iBit]
4382 btc [rax], edx
4383# else
4384 mov eax, [pvBitmap]
4385 mov edx, [iBit]
4386 btc [eax], edx
4387# endif
4388 }
4389# endif
4390}
4391#endif
4392
4393
4394/**
4395 * Atomically toggles a bit in a bitmap, ordered.
4396 *
4397 * @param pvBitmap Pointer to the bitmap.
4398 * @param iBit The bit to test and set.
4399 */
4400#if RT_INLINE_ASM_EXTERNAL
4401DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
4402#else
4403DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
4404{
4405# if RT_INLINE_ASM_GNU_STYLE
4406 __asm__ __volatile__ ("lock; btcl %1, %0"
4407 : "=m" (*(volatile long *)pvBitmap)
4408 : "Ir" (iBit)
4409 : "memory");
4410# else
4411 __asm
4412 {
4413# ifdef RT_ARCH_AMD64
4414 mov rax, [pvBitmap]
4415 mov edx, [iBit]
4416 lock btc [rax], edx
4417# else
4418 mov eax, [pvBitmap]
4419 mov edx, [iBit]
4420 lock btc [eax], edx
4421# endif
4422 }
4423# endif
4424}
4425#endif
4426
4427
4428/**
4429 * Tests and sets a bit in a bitmap.
4430 *
4431 * @returns true if the bit was set.
4432 * @returns false if the bit was clear.
4433 * @param pvBitmap Pointer to the bitmap.
4434 * @param iBit The bit to test and set.
4435 */
4436#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4437DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4438#else
4439DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4440{
4441 union { bool f; uint32_t u32; uint8_t u8; } rc;
4442# if RT_INLINE_ASM_USES_INTRIN
4443 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
4444
4445# elif RT_INLINE_ASM_GNU_STYLE
4446 __asm__ __volatile__ ("btsl %2, %1\n\t"
4447 "setc %b0\n\t"
4448 "andl $1, %0\n\t"
4449 : "=q" (rc.u32),
4450 "=m" (*(volatile long *)pvBitmap)
4451 : "Ir" (iBit)
4452 : "memory");
4453# else
4454 __asm
4455 {
4456 mov edx, [iBit]
4457# ifdef RT_ARCH_AMD64
4458 mov rax, [pvBitmap]
4459 bts [rax], edx
4460# else
4461 mov eax, [pvBitmap]
4462 bts [eax], edx
4463# endif
4464 setc al
4465 and eax, 1
4466 mov [rc.u32], eax
4467 }
4468# endif
4469 return rc.f;
4470}
4471#endif
4472
4473
4474/**
4475 * Atomically tests and sets a bit in a bitmap, ordered.
4476 *
4477 * @returns true if the bit was set.
4478 * @returns false if the bit was clear.
4479 * @param pvBitmap Pointer to the bitmap.
4480 * @param iBit The bit to set.
4481 */
4482#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4483DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4484#else
4485DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4486{
4487 union { bool f; uint32_t u32; uint8_t u8; } rc;
4488# if RT_INLINE_ASM_USES_INTRIN
4489 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
4490# elif RT_INLINE_ASM_GNU_STYLE
4491 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
4492 "setc %b0\n\t"
4493 "andl $1, %0\n\t"
4494 : "=q" (rc.u32),
4495 "=m" (*(volatile long *)pvBitmap)
4496 : "Ir" (iBit)
4497 : "memory");
4498# else
4499 __asm
4500 {
4501 mov edx, [iBit]
4502# ifdef RT_ARCH_AMD64
4503 mov rax, [pvBitmap]
4504 lock bts [rax], edx
4505# else
4506 mov eax, [pvBitmap]
4507 lock bts [eax], edx
4508# endif
4509 setc al
4510 and eax, 1
4511 mov [rc.u32], eax
4512 }
4513# endif
4514 return rc.f;
4515}
4516#endif
4517
4518
4519/**
4520 * Tests and clears a bit in a bitmap.
4521 *
4522 * @returns true if the bit was set.
4523 * @returns false if the bit was clear.
4524 * @param pvBitmap Pointer to the bitmap.
4525 * @param iBit The bit to test and clear.
4526 */
4527#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4528DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4529#else
4530DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4531{
4532 union { bool f; uint32_t u32; uint8_t u8; } rc;
4533# if RT_INLINE_ASM_USES_INTRIN
4534 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
4535
4536# elif RT_INLINE_ASM_GNU_STYLE
4537 __asm__ __volatile__ ("btrl %2, %1\n\t"
4538 "setc %b0\n\t"
4539 "andl $1, %0\n\t"
4540 : "=q" (rc.u32),
4541 "=m" (*(volatile long *)pvBitmap)
4542 : "Ir" (iBit)
4543 : "memory");
4544# else
4545 __asm
4546 {
4547 mov edx, [iBit]
4548# ifdef RT_ARCH_AMD64
4549 mov rax, [pvBitmap]
4550 btr [rax], edx
4551# else
4552 mov eax, [pvBitmap]
4553 btr [eax], edx
4554# endif
4555 setc al
4556 and eax, 1
4557 mov [rc.u32], eax
4558 }
4559# endif
4560 return rc.f;
4561}
4562#endif
4563
4564
4565/**
4566 * Atomically tests and clears a bit in a bitmap, ordered.
4567 *
4568 * @returns true if the bit was set.
4569 * @returns false if the bit was clear.
4570 * @param pvBitmap Pointer to the bitmap.
4571 * @param iBit The bit to test and clear.
4572 * @remark No memory barrier, take care on smp.
4573 */
4574#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4575DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4576#else
4577DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4578{
4579 union { bool f; uint32_t u32; uint8_t u8; } rc;
4580# if RT_INLINE_ASM_USES_INTRIN
4581 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
4582
4583# elif RT_INLINE_ASM_GNU_STYLE
4584 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
4585 "setc %b0\n\t"
4586 "andl $1, %0\n\t"
4587 : "=q" (rc.u32),
4588 "=m" (*(volatile long *)pvBitmap)
4589 : "Ir" (iBit)
4590 : "memory");
4591# else
4592 __asm
4593 {
4594 mov edx, [iBit]
4595# ifdef RT_ARCH_AMD64
4596 mov rax, [pvBitmap]
4597 lock btr [rax], edx
4598# else
4599 mov eax, [pvBitmap]
4600 lock btr [eax], edx
4601# endif
4602 setc al
4603 and eax, 1
4604 mov [rc.u32], eax
4605 }
4606# endif
4607 return rc.f;
4608}
4609#endif
4610
4611
4612/**
4613 * Tests and toggles a bit in a bitmap.
4614 *
4615 * @returns true if the bit was set.
4616 * @returns false if the bit was clear.
4617 * @param pvBitmap Pointer to the bitmap.
4618 * @param iBit The bit to test and toggle.
4619 */
4620#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4621DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4622#else
4623DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4624{
4625 union { bool f; uint32_t u32; uint8_t u8; } rc;
4626# if RT_INLINE_ASM_USES_INTRIN
4627 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
4628
4629# elif RT_INLINE_ASM_GNU_STYLE
4630 __asm__ __volatile__ ("btcl %2, %1\n\t"
4631 "setc %b0\n\t"
4632 "andl $1, %0\n\t"
4633 : "=q" (rc.u32),
4634 "=m" (*(volatile long *)pvBitmap)
4635 : "Ir" (iBit)
4636 : "memory");
4637# else
4638 __asm
4639 {
4640 mov edx, [iBit]
4641# ifdef RT_ARCH_AMD64
4642 mov rax, [pvBitmap]
4643 btc [rax], edx
4644# else
4645 mov eax, [pvBitmap]
4646 btc [eax], edx
4647# endif
4648 setc al
4649 and eax, 1
4650 mov [rc.u32], eax
4651 }
4652# endif
4653 return rc.f;
4654}
4655#endif
4656
4657
4658/**
4659 * Atomically tests and toggles a bit in a bitmap, ordered.
4660 *
4661 * @returns true if the bit was set.
4662 * @returns false if the bit was clear.
4663 * @param pvBitmap Pointer to the bitmap.
4664 * @param iBit The bit to test and toggle.
4665 */
4666#if RT_INLINE_ASM_EXTERNAL
4667DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4668#else
4669DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4670{
4671 union { bool f; uint32_t u32; uint8_t u8; } rc;
4672# if RT_INLINE_ASM_GNU_STYLE
4673 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
4674 "setc %b0\n\t"
4675 "andl $1, %0\n\t"
4676 : "=q" (rc.u32),
4677 "=m" (*(volatile long *)pvBitmap)
4678 : "Ir" (iBit)
4679 : "memory");
4680# else
4681 __asm
4682 {
4683 mov edx, [iBit]
4684# ifdef RT_ARCH_AMD64
4685 mov rax, [pvBitmap]
4686 lock btc [rax], edx
4687# else
4688 mov eax, [pvBitmap]
4689 lock btc [eax], edx
4690# endif
4691 setc al
4692 and eax, 1
4693 mov [rc.u32], eax
4694 }
4695# endif
4696 return rc.f;
4697}
4698#endif
4699
4700
4701/**
4702 * Tests if a bit in a bitmap is set.
4703 *
4704 * @returns true if the bit is set.
4705 * @returns false if the bit is clear.
4706 * @param pvBitmap Pointer to the bitmap.
4707 * @param iBit The bit to test.
4708 */
4709#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4710DECLASM(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit);
4711#else
4712DECLINLINE(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit)
4713{
4714 union { bool f; uint32_t u32; uint8_t u8; } rc;
4715# if RT_INLINE_ASM_USES_INTRIN
4716 rc.u32 = _bittest((long *)pvBitmap, iBit);
4717# elif RT_INLINE_ASM_GNU_STYLE
4718
4719 __asm__ __volatile__ ("btl %2, %1\n\t"
4720 "setc %b0\n\t"
4721 "andl $1, %0\n\t"
4722 : "=q" (rc.u32),
4723 "=m" (*(volatile long *)pvBitmap)
4724 : "Ir" (iBit)
4725 : "memory");
4726# else
4727 __asm
4728 {
4729 mov edx, [iBit]
4730# ifdef RT_ARCH_AMD64
4731 mov rax, [pvBitmap]
4732 bt [rax], edx
4733# else
4734 mov eax, [pvBitmap]
4735 bt [eax], edx
4736# endif
4737 setc al
4738 and eax, 1
4739 mov [rc.u32], eax
4740 }
4741# endif
4742 return rc.f;
4743}
4744#endif
4745
4746
4747/**
4748 * Clears a bit range within a bitmap.
4749 *
4750 * @param pvBitmap Pointer to the bitmap.
4751 * @param iBitStart The First bit to clear.
4752 * @param iBitEnd The first bit not to clear.
4753 */
4754DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4755{
4756 if (iBitStart < iBitEnd)
4757 {
4758 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4759 int iStart = iBitStart & ~31;
4760 int iEnd = iBitEnd & ~31;
4761 if (iStart == iEnd)
4762 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
4763 else
4764 {
4765 /* bits in first dword. */
4766 if (iBitStart & 31)
4767 {
4768 *pu32 &= (1 << (iBitStart & 31)) - 1;
4769 pu32++;
4770 iBitStart = iStart + 32;
4771 }
4772
4773 /* whole dword. */
4774 if (iBitStart != iEnd)
4775 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4776
4777 /* bits in last dword. */
4778 if (iBitEnd & 31)
4779 {
4780 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4781 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
4782 }
4783 }
4784 }
4785}
4786
4787
4788/**
4789 * Finds the first clear bit in a bitmap.
4790 *
4791 * @returns Index of the first zero bit.
4792 * @returns -1 if no clear bit was found.
4793 * @param pvBitmap Pointer to the bitmap.
4794 * @param cBits The number of bits in the bitmap. Multiple of 32.
4795 */
4796#if RT_INLINE_ASM_EXTERNAL
4797DECLASM(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits);
4798#else
4799DECLINLINE(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits)
4800{
4801 if (cBits)
4802 {
4803 int32_t iBit;
4804# if RT_INLINE_ASM_GNU_STYLE
4805 RTCCUINTREG uEAX, uECX, uEDI;
4806 cBits = RT_ALIGN_32(cBits, 32);
4807 __asm__ __volatile__("repe; scasl\n\t"
4808 "je 1f\n\t"
4809# ifdef RT_ARCH_AMD64
4810 "lea -4(%%rdi), %%rdi\n\t"
4811 "xorl (%%rdi), %%eax\n\t"
4812 "subq %5, %%rdi\n\t"
4813# else
4814 "lea -4(%%edi), %%edi\n\t"
4815 "xorl (%%edi), %%eax\n\t"
4816 "subl %5, %%edi\n\t"
4817# endif
4818 "shll $3, %%edi\n\t"
4819 "bsfl %%eax, %%edx\n\t"
4820 "addl %%edi, %%edx\n\t"
4821 "1:\t\n"
4822 : "=d" (iBit),
4823 "=&c" (uECX),
4824 "=&D" (uEDI),
4825 "=&a" (uEAX)
4826 : "0" (0xffffffff),
4827 "mr" (pvBitmap),
4828 "1" (cBits >> 5),
4829 "2" (pvBitmap),
4830 "3" (0xffffffff));
4831# else
4832 cBits = RT_ALIGN_32(cBits, 32);
4833 __asm
4834 {
4835# ifdef RT_ARCH_AMD64
4836 mov rdi, [pvBitmap]
4837 mov rbx, rdi
4838# else
4839 mov edi, [pvBitmap]
4840 mov ebx, edi
4841# endif
4842 mov edx, 0ffffffffh
4843 mov eax, edx
4844 mov ecx, [cBits]
4845 shr ecx, 5
4846 repe scasd
4847 je done
4848
4849# ifdef RT_ARCH_AMD64
4850 lea rdi, [rdi - 4]
4851 xor eax, [rdi]
4852 sub rdi, rbx
4853# else
4854 lea edi, [edi - 4]
4855 xor eax, [edi]
4856 sub edi, ebx
4857# endif
4858 shl edi, 3
4859 bsf edx, eax
4860 add edx, edi
4861 done:
4862 mov [iBit], edx
4863 }
4864# endif
4865 return iBit;
4866 }
4867 return -1;
4868}
4869#endif
4870
4871
4872/**
4873 * Finds the next clear bit in a bitmap.
4874 *
4875 * @returns Index of the first zero bit.
4876 * @returns -1 if no clear bit was found.
4877 * @param pvBitmap Pointer to the bitmap.
4878 * @param cBits The number of bits in the bitmap. Multiple of 32.
4879 * @param iBitPrev The bit returned from the last search.
4880 * The search will start at iBitPrev + 1.
4881 */
4882#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4883DECLASM(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4884#else
4885DECLINLINE(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4886{
4887 int iBit = ++iBitPrev & 31;
4888 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
4889 cBits -= iBitPrev & ~31;
4890 if (iBit)
4891 {
4892 /* inspect the first dword. */
4893 uint32_t u32 = (~*(volatile uint32_t *)pvBitmap) >> iBit;
4894# if RT_INLINE_ASM_USES_INTRIN
4895 unsigned long ulBit = 0;
4896 if (_BitScanForward(&ulBit, u32))
4897 return ulBit + iBitPrev;
4898 iBit = -1;
4899# else
4900# if RT_INLINE_ASM_GNU_STYLE
4901 __asm__ __volatile__("bsf %1, %0\n\t"
4902 "jnz 1f\n\t"
4903 "movl $-1, %0\n\t"
4904 "1:\n\t"
4905 : "=r" (iBit)
4906 : "r" (u32));
4907# else
4908 __asm
4909 {
4910 mov edx, [u32]
4911 bsf eax, edx
4912 jnz done
4913 mov eax, 0ffffffffh
4914 done:
4915 mov [iBit], eax
4916 }
4917# endif
4918 if (iBit >= 0)
4919 return iBit + iBitPrev;
4920# endif
4921 /* Search the rest of the bitmap, if there is anything. */
4922 if (cBits > 32)
4923 {
4924 iBit = ASMBitFirstClear((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
4925 if (iBit >= 0)
4926 return iBit + (iBitPrev & ~31) + 32;
4927 }
4928 }
4929 else
4930 {
4931 /* Search the rest of the bitmap. */
4932 iBit = ASMBitFirstClear(pvBitmap, cBits);
4933 if (iBit >= 0)
4934 return iBit + (iBitPrev & ~31);
4935 }
4936 return iBit;
4937}
4938#endif
4939
4940
4941/**
4942 * Finds the first set bit in a bitmap.
4943 *
4944 * @returns Index of the first set bit.
4945 * @returns -1 if no clear bit was found.
4946 * @param pvBitmap Pointer to the bitmap.
4947 * @param cBits The number of bits in the bitmap. Multiple of 32.
4948 */
4949#if RT_INLINE_ASM_EXTERNAL
4950DECLASM(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits);
4951#else
4952DECLINLINE(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits)
4953{
4954 if (cBits)
4955 {
4956 int32_t iBit;
4957# if RT_INLINE_ASM_GNU_STYLE
4958 RTCCUINTREG uEAX, uECX, uEDI;
4959 cBits = RT_ALIGN_32(cBits, 32);
4960 __asm__ __volatile__("repe; scasl\n\t"
4961 "je 1f\n\t"
4962# ifdef RT_ARCH_AMD64
4963 "lea -4(%%rdi), %%rdi\n\t"
4964 "movl (%%rdi), %%eax\n\t"
4965 "subq %5, %%rdi\n\t"
4966# else
4967 "lea -4(%%edi), %%edi\n\t"
4968 "movl (%%edi), %%eax\n\t"
4969 "subl %5, %%edi\n\t"
4970# endif
4971 "shll $3, %%edi\n\t"
4972 "bsfl %%eax, %%edx\n\t"
4973 "addl %%edi, %%edx\n\t"
4974 "1:\t\n"
4975 : "=d" (iBit),
4976 "=&c" (uECX),
4977 "=&D" (uEDI),
4978 "=&a" (uEAX)
4979 : "0" (0xffffffff),
4980 "mr" (pvBitmap),
4981 "1" (cBits >> 5),
4982 "2" (pvBitmap),
4983 "3" (0));
4984# else
4985 cBits = RT_ALIGN_32(cBits, 32);
4986 __asm
4987 {
4988# ifdef RT_ARCH_AMD64
4989 mov rdi, [pvBitmap]
4990 mov rbx, rdi
4991# else
4992 mov edi, [pvBitmap]
4993 mov ebx, edi
4994# endif
4995 mov edx, 0ffffffffh
4996 xor eax, eax
4997 mov ecx, [cBits]
4998 shr ecx, 5
4999 repe scasd
5000 je done
5001# ifdef RT_ARCH_AMD64
5002 lea rdi, [rdi - 4]
5003 mov eax, [rdi]
5004 sub rdi, rbx
5005# else
5006 lea edi, [edi - 4]
5007 mov eax, [edi]
5008 sub edi, ebx
5009# endif
5010 shl edi, 3
5011 bsf edx, eax
5012 add edx, edi
5013 done:
5014 mov [iBit], edx
5015 }
5016# endif
5017 return iBit;
5018 }
5019 return -1;
5020}
5021#endif
5022
5023
5024/**
5025 * Finds the next set bit in a bitmap.
5026 *
5027 * @returns Index of the next set bit.
5028 * @returns -1 if no set bit was found.
5029 * @param pvBitmap Pointer to the bitmap.
5030 * @param cBits The number of bits in the bitmap. Multiple of 32.
5031 * @param iBitPrev The bit returned from the last search.
5032 * The search will start at iBitPrev + 1.
5033 */
5034#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5035DECLASM(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5036#else
5037DECLINLINE(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5038{
5039 int iBit = ++iBitPrev & 31;
5040 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
5041 cBits -= iBitPrev & ~31;
5042 if (iBit)
5043 {
5044 /* inspect the first dword. */
5045 uint32_t u32 = *(volatile uint32_t *)pvBitmap >> iBit;
5046# if RT_INLINE_ASM_USES_INTRIN
5047 unsigned long ulBit = 0;
5048 if (_BitScanForward(&ulBit, u32))
5049 return ulBit + iBitPrev;
5050 iBit = -1;
5051# else
5052# if RT_INLINE_ASM_GNU_STYLE
5053 __asm__ __volatile__("bsf %1, %0\n\t"
5054 "jnz 1f\n\t"
5055 "movl $-1, %0\n\t"
5056 "1:\n\t"
5057 : "=r" (iBit)
5058 : "r" (u32));
5059# else
5060 __asm
5061 {
5062 mov edx, u32
5063 bsf eax, edx
5064 jnz done
5065 mov eax, 0ffffffffh
5066 done:
5067 mov [iBit], eax
5068 }
5069# endif
5070 if (iBit >= 0)
5071 return iBit + iBitPrev;
5072# endif
5073 /* Search the rest of the bitmap, if there is anything. */
5074 if (cBits > 32)
5075 {
5076 iBit = ASMBitFirstSet((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
5077 if (iBit >= 0)
5078 return iBit + (iBitPrev & ~31) + 32;
5079 }
5080
5081 }
5082 else
5083 {
5084 /* Search the rest of the bitmap. */
5085 iBit = ASMBitFirstSet(pvBitmap, cBits);
5086 if (iBit >= 0)
5087 return iBit + (iBitPrev & ~31);
5088 }
5089 return iBit;
5090}
5091#endif
5092
5093
5094/**
5095 * Finds the first bit which is set in the given 32-bit integer.
5096 * Bits are numbered from 1 (least significant) to 32.
5097 *
5098 * @returns index [1..32] of the first set bit.
5099 * @returns 0 if all bits are cleared.
5100 * @param u32 Integer to search for set bits.
5101 * @remark Similar to ffs() in BSD.
5102 */
5103DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5104{
5105# if RT_INLINE_ASM_USES_INTRIN
5106 unsigned long iBit;
5107 if (_BitScanForward(&iBit, u32))
5108 iBit++;
5109 else
5110 iBit = 0;
5111# elif RT_INLINE_ASM_GNU_STYLE
5112 uint32_t iBit;
5113 __asm__ __volatile__("bsf %1, %0\n\t"
5114 "jnz 1f\n\t"
5115 "xorl %0, %0\n\t"
5116 "jmp 2f\n"
5117 "1:\n\t"
5118 "incl %0\n"
5119 "2:\n\t"
5120 : "=r" (iBit)
5121 : "rm" (u32));
5122# else
5123 uint32_t iBit;
5124 _asm
5125 {
5126 bsf eax, [u32]
5127 jnz found
5128 xor eax, eax
5129 jmp done
5130 found:
5131 inc eax
5132 done:
5133 mov [iBit], eax
5134 }
5135# endif
5136 return iBit;
5137}
5138
5139
5140/**
5141 * Finds the first bit which is set in the given 32-bit integer.
5142 * Bits are numbered from 1 (least significant) to 32.
5143 *
5144 * @returns index [1..32] of the first set bit.
5145 * @returns 0 if all bits are cleared.
5146 * @param i32 Integer to search for set bits.
5147 * @remark Similar to ffs() in BSD.
5148 */
5149DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5150{
5151 return ASMBitFirstSetU32((uint32_t)i32);
5152}
5153
5154
5155/**
5156 * Finds the last bit which is set in the given 32-bit integer.
5157 * Bits are numbered from 1 (least significant) to 32.
5158 *
5159 * @returns index [1..32] of the last set bit.
5160 * @returns 0 if all bits are cleared.
5161 * @param u32 Integer to search for set bits.
5162 * @remark Similar to fls() in BSD.
5163 */
5164DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5165{
5166# if RT_INLINE_ASM_USES_INTRIN
5167 unsigned long iBit;
5168 if (_BitScanReverse(&iBit, u32))
5169 iBit++;
5170 else
5171 iBit = 0;
5172# elif RT_INLINE_ASM_GNU_STYLE
5173 uint32_t iBit;
5174 __asm__ __volatile__("bsrl %1, %0\n\t"
5175 "jnz 1f\n\t"
5176 "xorl %0, %0\n\t"
5177 "jmp 2f\n"
5178 "1:\n\t"
5179 "incl %0\n"
5180 "2:\n\t"
5181 : "=r" (iBit)
5182 : "rm" (u32));
5183# else
5184 uint32_t iBit;
5185 _asm
5186 {
5187 bsr eax, [u32]
5188 jnz found
5189 xor eax, eax
5190 jmp done
5191 found:
5192 inc eax
5193 done:
5194 mov [iBit], eax
5195 }
5196# endif
5197 return iBit;
5198}
5199
5200
5201/**
5202 * Finds the last bit which is set in the given 32-bit integer.
5203 * Bits are numbered from 1 (least significant) to 32.
5204 *
5205 * @returns index [1..32] of the last set bit.
5206 * @returns 0 if all bits are cleared.
5207 * @param i32 Integer to search for set bits.
5208 * @remark Similar to fls() in BSD.
5209 */
5210DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5211{
5212 return ASMBitLastSetS32((uint32_t)i32);
5213}
5214
5215
5216/**
5217 * Reverse the byte order of the given 32-bit integer.
5218 * @param u32 Integer
5219 */
5220DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5221{
5222#if RT_INLINE_ASM_USES_INTRIN
5223 u32 = _byteswap_ulong(u32);
5224#elif RT_INLINE_ASM_GNU_STYLE
5225 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5226#else
5227 _asm
5228 {
5229 mov eax, [u32]
5230 bswap eax
5231 mov [u32], eax
5232 }
5233#endif
5234 return u32;
5235}
5236
5237/** @} */
5238
5239
5240/** @} */
5241#endif
5242
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette