VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 7633

Last change on this file since 7633 was 7633, checked in by vboxsync, 17 years ago

ASMMemIsAllU32

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 132.3 KB
Line 
1/** @file
2 * innotek Portable Runtime - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 innotek GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @todo #include <iprt/param.h> for PAGE_SIZE. */
33/** @def RT_INLINE_ASM_USES_INTRIN
34 * Defined as 1 if we're using a _MSC_VER 1400.
35 * Otherwise defined as 0.
36 */
37
38#ifdef _MSC_VER
39# if _MSC_VER >= 1400
40# define RT_INLINE_ASM_USES_INTRIN 1
41# include <intrin.h>
42 /* Emit the intrinsics at all optimization levels. */
43# pragma intrinsic(_ReadWriteBarrier)
44# pragma intrinsic(__cpuid)
45# pragma intrinsic(_enable)
46# pragma intrinsic(_disable)
47# pragma intrinsic(__rdtsc)
48# pragma intrinsic(__readmsr)
49# pragma intrinsic(__writemsr)
50# pragma intrinsic(__outbyte)
51# pragma intrinsic(__outword)
52# pragma intrinsic(__outdword)
53# pragma intrinsic(__inbyte)
54# pragma intrinsic(__inword)
55# pragma intrinsic(__indword)
56# pragma intrinsic(__invlpg)
57# pragma intrinsic(__stosd)
58# pragma intrinsic(__stosw)
59# pragma intrinsic(__stosb)
60# pragma intrinsic(__readcr0)
61# pragma intrinsic(__readcr2)
62# pragma intrinsic(__readcr3)
63# pragma intrinsic(__readcr4)
64# pragma intrinsic(__writecr0)
65# pragma intrinsic(__writecr3)
66# pragma intrinsic(__writecr4)
67# pragma intrinsic(_BitScanForward)
68# pragma intrinsic(_BitScanReverse)
69# pragma intrinsic(_bittest)
70# pragma intrinsic(_bittestandset)
71# pragma intrinsic(_bittestandreset)
72# pragma intrinsic(_bittestandcomplement)
73# pragma intrinsic(_byteswap_ushort)
74# pragma intrinsic(_byteswap_ulong)
75# pragma intrinsic(_interlockedbittestandset)
76# pragma intrinsic(_interlockedbittestandreset)
77# pragma intrinsic(_InterlockedAnd)
78# pragma intrinsic(_InterlockedOr)
79# pragma intrinsic(_InterlockedIncrement)
80# pragma intrinsic(_InterlockedDecrement)
81# pragma intrinsic(_InterlockedExchange)
82# pragma intrinsic(_InterlockedExchangeAdd)
83# pragma intrinsic(_InterlockedCompareExchange)
84# pragma intrinsic(_InterlockedCompareExchange64)
85# ifdef RT_ARCH_AMD64
86# pragma intrinsic(__stosq)
87# pragma intrinsic(__readcr8)
88# pragma intrinsic(__writecr8)
89# pragma intrinsic(_byteswap_uint64)
90# pragma intrinsic(_InterlockedExchange64)
91# endif
92# endif
93#endif
94#ifndef RT_INLINE_ASM_USES_INTRIN
95# define RT_INLINE_ASM_USES_INTRIN 0
96#endif
97
98
99
100/** @defgroup grp_asm ASM - Assembly Routines
101 * @ingroup grp_rt
102 *
103 * @remarks The difference between ordered and unordered atomic operations are that
104 * the former will complete outstanding reads and writes before continuing
105 * while the latter doesn't make any promisses about the order. Ordered
106 * operations doesn't, it seems, make any 100% promise wrt to whether
107 * the operation will complete before any subsequent memory access.
108 * (please, correct if wrong.)
109 *
110 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
111 * are unordered (note the Uo).
112 *
113 * @{
114 */
115
116/** @def RT_INLINE_ASM_EXTERNAL
117 * Defined as 1 if the compiler does not support inline assembly.
118 * The ASM* functions will then be implemented in an external .asm file.
119 *
120 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
121 * inline assmebly in their AMD64 compiler.
122 */
123#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
124# define RT_INLINE_ASM_EXTERNAL 1
125#else
126# define RT_INLINE_ASM_EXTERNAL 0
127#endif
128
129/** @def RT_INLINE_ASM_GNU_STYLE
130 * Defined as 1 if the compiler understand GNU style inline assembly.
131 */
132#if defined(_MSC_VER)
133# define RT_INLINE_ASM_GNU_STYLE 0
134#else
135# define RT_INLINE_ASM_GNU_STYLE 1
136#endif
137
138
139/** @todo find a more proper place for this structure? */
140#pragma pack(1)
141/** IDTR */
142typedef struct RTIDTR
143{
144 /** Size of the IDT. */
145 uint16_t cbIdt;
146 /** Address of the IDT. */
147 uintptr_t pIdt;
148} RTIDTR, *PRTIDTR;
149#pragma pack()
150
151#pragma pack(1)
152/** GDTR */
153typedef struct RTGDTR
154{
155 /** Size of the GDT. */
156 uint16_t cbGdt;
157 /** Address of the GDT. */
158 uintptr_t pGdt;
159} RTGDTR, *PRTGDTR;
160#pragma pack()
161
162
163/** @def ASMReturnAddress
164 * Gets the return address of the current (or calling if you like) function or method.
165 */
166#ifdef _MSC_VER
167# ifdef __cplusplus
168extern "C"
169# endif
170void * _ReturnAddress(void);
171# pragma intrinsic(_ReturnAddress)
172# define ASMReturnAddress() _ReturnAddress()
173#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
174# define ASMReturnAddress() __builtin_return_address(0)
175#else
176# error "Unsupported compiler."
177#endif
178
179
180/**
181 * Gets the content of the IDTR CPU register.
182 * @param pIdtr Where to store the IDTR contents.
183 */
184#if RT_INLINE_ASM_EXTERNAL
185DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
186#else
187DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
188{
189# if RT_INLINE_ASM_GNU_STYLE
190 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
191# else
192 __asm
193 {
194# ifdef RT_ARCH_AMD64
195 mov rax, [pIdtr]
196 sidt [rax]
197# else
198 mov eax, [pIdtr]
199 sidt [eax]
200# endif
201 }
202# endif
203}
204#endif
205
206
207/**
208 * Sets the content of the IDTR CPU register.
209 * @param pIdtr Where to load the IDTR contents from
210 */
211#if RT_INLINE_ASM_EXTERNAL
212DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
213#else
214DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
215{
216# if RT_INLINE_ASM_GNU_STYLE
217 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
218# else
219 __asm
220 {
221# ifdef RT_ARCH_AMD64
222 mov rax, [pIdtr]
223 lidt [rax]
224# else
225 mov eax, [pIdtr]
226 lidt [eax]
227# endif
228 }
229# endif
230}
231#endif
232
233
234/**
235 * Gets the content of the GDTR CPU register.
236 * @param pGdtr Where to store the GDTR contents.
237 */
238#if RT_INLINE_ASM_EXTERNAL
239DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
240#else
241DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
242{
243# if RT_INLINE_ASM_GNU_STYLE
244 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
245# else
246 __asm
247 {
248# ifdef RT_ARCH_AMD64
249 mov rax, [pGdtr]
250 sgdt [rax]
251# else
252 mov eax, [pGdtr]
253 sgdt [eax]
254# endif
255 }
256# endif
257}
258#endif
259
260/**
261 * Get the cs register.
262 * @returns cs.
263 */
264#if RT_INLINE_ASM_EXTERNAL
265DECLASM(RTSEL) ASMGetCS(void);
266#else
267DECLINLINE(RTSEL) ASMGetCS(void)
268{
269 RTSEL SelCS;
270# if RT_INLINE_ASM_GNU_STYLE
271 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
272# else
273 __asm
274 {
275 mov ax, cs
276 mov [SelCS], ax
277 }
278# endif
279 return SelCS;
280}
281#endif
282
283
284/**
285 * Get the DS register.
286 * @returns DS.
287 */
288#if RT_INLINE_ASM_EXTERNAL
289DECLASM(RTSEL) ASMGetDS(void);
290#else
291DECLINLINE(RTSEL) ASMGetDS(void)
292{
293 RTSEL SelDS;
294# if RT_INLINE_ASM_GNU_STYLE
295 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
296# else
297 __asm
298 {
299 mov ax, ds
300 mov [SelDS], ax
301 }
302# endif
303 return SelDS;
304}
305#endif
306
307
308/**
309 * Get the ES register.
310 * @returns ES.
311 */
312#if RT_INLINE_ASM_EXTERNAL
313DECLASM(RTSEL) ASMGetES(void);
314#else
315DECLINLINE(RTSEL) ASMGetES(void)
316{
317 RTSEL SelES;
318# if RT_INLINE_ASM_GNU_STYLE
319 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
320# else
321 __asm
322 {
323 mov ax, es
324 mov [SelES], ax
325 }
326# endif
327 return SelES;
328}
329#endif
330
331
332/**
333 * Get the FS register.
334 * @returns FS.
335 */
336#if RT_INLINE_ASM_EXTERNAL
337DECLASM(RTSEL) ASMGetFS(void);
338#else
339DECLINLINE(RTSEL) ASMGetFS(void)
340{
341 RTSEL SelFS;
342# if RT_INLINE_ASM_GNU_STYLE
343 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
344# else
345 __asm
346 {
347 mov ax, fs
348 mov [SelFS], ax
349 }
350# endif
351 return SelFS;
352}
353# endif
354
355
356/**
357 * Get the GS register.
358 * @returns GS.
359 */
360#if RT_INLINE_ASM_EXTERNAL
361DECLASM(RTSEL) ASMGetGS(void);
362#else
363DECLINLINE(RTSEL) ASMGetGS(void)
364{
365 RTSEL SelGS;
366# if RT_INLINE_ASM_GNU_STYLE
367 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
368# else
369 __asm
370 {
371 mov ax, gs
372 mov [SelGS], ax
373 }
374# endif
375 return SelGS;
376}
377#endif
378
379
380/**
381 * Get the SS register.
382 * @returns SS.
383 */
384#if RT_INLINE_ASM_EXTERNAL
385DECLASM(RTSEL) ASMGetSS(void);
386#else
387DECLINLINE(RTSEL) ASMGetSS(void)
388{
389 RTSEL SelSS;
390# if RT_INLINE_ASM_GNU_STYLE
391 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
392# else
393 __asm
394 {
395 mov ax, ss
396 mov [SelSS], ax
397 }
398# endif
399 return SelSS;
400}
401#endif
402
403
404/**
405 * Get the TR register.
406 * @returns TR.
407 */
408#if RT_INLINE_ASM_EXTERNAL
409DECLASM(RTSEL) ASMGetTR(void);
410#else
411DECLINLINE(RTSEL) ASMGetTR(void)
412{
413 RTSEL SelTR;
414# if RT_INLINE_ASM_GNU_STYLE
415 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
416# else
417 __asm
418 {
419 str ax
420 mov [SelTR], ax
421 }
422# endif
423 return SelTR;
424}
425#endif
426
427
428/**
429 * Get the [RE]FLAGS register.
430 * @returns [RE]FLAGS.
431 */
432#if RT_INLINE_ASM_EXTERNAL
433DECLASM(RTCCUINTREG) ASMGetFlags(void);
434#else
435DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
436{
437 RTCCUINTREG uFlags;
438# if RT_INLINE_ASM_GNU_STYLE
439# ifdef RT_ARCH_AMD64
440 __asm__ __volatile__("pushfq\n\t"
441 "popq %0\n\t"
442 : "=g" (uFlags));
443# else
444 __asm__ __volatile__("pushfl\n\t"
445 "popl %0\n\t"
446 : "=g" (uFlags));
447# endif
448# else
449 __asm
450 {
451# ifdef RT_ARCH_AMD64
452 pushfq
453 pop [uFlags]
454# else
455 pushfd
456 pop [uFlags]
457# endif
458 }
459# endif
460 return uFlags;
461}
462#endif
463
464
465/**
466 * Set the [RE]FLAGS register.
467 * @param uFlags The new [RE]FLAGS value.
468 */
469#if RT_INLINE_ASM_EXTERNAL
470DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
471#else
472DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
473{
474# if RT_INLINE_ASM_GNU_STYLE
475# ifdef RT_ARCH_AMD64
476 __asm__ __volatile__("pushq %0\n\t"
477 "popfq\n\t"
478 : : "g" (uFlags));
479# else
480 __asm__ __volatile__("pushl %0\n\t"
481 "popfl\n\t"
482 : : "g" (uFlags));
483# endif
484# else
485 __asm
486 {
487# ifdef RT_ARCH_AMD64
488 push [uFlags]
489 popfq
490# else
491 push [uFlags]
492 popfd
493# endif
494 }
495# endif
496}
497#endif
498
499
500/**
501 * Gets the content of the CPU timestamp counter register.
502 *
503 * @returns TSC.
504 */
505#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
506DECLASM(uint64_t) ASMReadTSC(void);
507#else
508DECLINLINE(uint64_t) ASMReadTSC(void)
509{
510 RTUINT64U u;
511# if RT_INLINE_ASM_GNU_STYLE
512 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
513# else
514# if RT_INLINE_ASM_USES_INTRIN
515 u.u = __rdtsc();
516# else
517 __asm
518 {
519 rdtsc
520 mov [u.s.Lo], eax
521 mov [u.s.Hi], edx
522 }
523# endif
524# endif
525 return u.u;
526}
527#endif
528
529
530/**
531 * Performs the cpuid instruction returning all registers.
532 *
533 * @param uOperator CPUID operation (eax).
534 * @param pvEAX Where to store eax.
535 * @param pvEBX Where to store ebx.
536 * @param pvECX Where to store ecx.
537 * @param pvEDX Where to store edx.
538 * @remark We're using void pointers to ease the use of special bitfield structures and such.
539 */
540#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
541DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
542#else
543DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
544{
545# if RT_INLINE_ASM_GNU_STYLE
546# ifdef RT_ARCH_AMD64
547 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
548 __asm__ ("cpuid\n\t"
549 : "=a" (uRAX),
550 "=b" (uRBX),
551 "=c" (uRCX),
552 "=d" (uRDX)
553 : "0" (uOperator));
554 *(uint32_t *)pvEAX = (uint32_t)uRAX;
555 *(uint32_t *)pvEBX = (uint32_t)uRBX;
556 *(uint32_t *)pvECX = (uint32_t)uRCX;
557 *(uint32_t *)pvEDX = (uint32_t)uRDX;
558# else
559 __asm__ ("xchgl %%ebx, %1\n\t"
560 "cpuid\n\t"
561 "xchgl %%ebx, %1\n\t"
562 : "=a" (*(uint32_t *)pvEAX),
563 "=r" (*(uint32_t *)pvEBX),
564 "=c" (*(uint32_t *)pvECX),
565 "=d" (*(uint32_t *)pvEDX)
566 : "0" (uOperator));
567# endif
568
569# elif RT_INLINE_ASM_USES_INTRIN
570 int aInfo[4];
571 __cpuid(aInfo, uOperator);
572 *(uint32_t *)pvEAX = aInfo[0];
573 *(uint32_t *)pvEBX = aInfo[1];
574 *(uint32_t *)pvECX = aInfo[2];
575 *(uint32_t *)pvEDX = aInfo[3];
576
577# else
578 uint32_t uEAX;
579 uint32_t uEBX;
580 uint32_t uECX;
581 uint32_t uEDX;
582 __asm
583 {
584 push ebx
585 mov eax, [uOperator]
586 cpuid
587 mov [uEAX], eax
588 mov [uEBX], ebx
589 mov [uECX], ecx
590 mov [uEDX], edx
591 pop ebx
592 }
593 *(uint32_t *)pvEAX = uEAX;
594 *(uint32_t *)pvEBX = uEBX;
595 *(uint32_t *)pvECX = uECX;
596 *(uint32_t *)pvEDX = uEDX;
597# endif
598}
599#endif
600
601
602/**
603 * Performs the cpuid instruction returning all registers.
604 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
605 *
606 * @param uOperator CPUID operation (eax).
607 * @param uIdxECX ecx index
608 * @param pvEAX Where to store eax.
609 * @param pvEBX Where to store ebx.
610 * @param pvECX Where to store ecx.
611 * @param pvEDX Where to store edx.
612 * @remark We're using void pointers to ease the use of special bitfield structures and such.
613 */
614#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
615DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
616#else
617DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
618{
619# if RT_INLINE_ASM_GNU_STYLE
620# ifdef RT_ARCH_AMD64
621 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
622 __asm__ ("cpuid\n\t"
623 : "=a" (uRAX),
624 "=b" (uRBX),
625 "=c" (uRCX),
626 "=d" (uRDX)
627 : "0" (uOperator),
628 "2" (uIdxECX));
629 *(uint32_t *)pvEAX = (uint32_t)uRAX;
630 *(uint32_t *)pvEBX = (uint32_t)uRBX;
631 *(uint32_t *)pvECX = (uint32_t)uRCX;
632 *(uint32_t *)pvEDX = (uint32_t)uRDX;
633# else
634 __asm__ ("xchgl %%ebx, %1\n\t"
635 "cpuid\n\t"
636 "xchgl %%ebx, %1\n\t"
637 : "=a" (*(uint32_t *)pvEAX),
638 "=r" (*(uint32_t *)pvEBX),
639 "=c" (*(uint32_t *)pvECX),
640 "=d" (*(uint32_t *)pvEDX)
641 : "0" (uOperator),
642 "2" (uIdxECX));
643# endif
644
645# elif RT_INLINE_ASM_USES_INTRIN
646 int aInfo[4];
647 /* ??? another intrinsic ??? */
648 __cpuid(aInfo, uOperator);
649 *(uint32_t *)pvEAX = aInfo[0];
650 *(uint32_t *)pvEBX = aInfo[1];
651 *(uint32_t *)pvECX = aInfo[2];
652 *(uint32_t *)pvEDX = aInfo[3];
653
654# else
655 uint32_t uEAX;
656 uint32_t uEBX;
657 uint32_t uECX;
658 uint32_t uEDX;
659 __asm
660 {
661 push ebx
662 mov eax, [uOperator]
663 mov ecx, [uIdxECX]
664 cpuid
665 mov [uEAX], eax
666 mov [uEBX], ebx
667 mov [uECX], ecx
668 mov [uEDX], edx
669 pop ebx
670 }
671 *(uint32_t *)pvEAX = uEAX;
672 *(uint32_t *)pvEBX = uEBX;
673 *(uint32_t *)pvECX = uECX;
674 *(uint32_t *)pvEDX = uEDX;
675# endif
676}
677#endif
678
679
680/**
681 * Performs the cpuid instruction returning ecx and edx.
682 *
683 * @param uOperator CPUID operation (eax).
684 * @param pvECX Where to store ecx.
685 * @param pvEDX Where to store edx.
686 * @remark We're using void pointers to ease the use of special bitfield structures and such.
687 */
688#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
689DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
690#else
691DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
692{
693 uint32_t uEBX;
694 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
695}
696#endif
697
698
699/**
700 * Performs the cpuid instruction returning edx.
701 *
702 * @param uOperator CPUID operation (eax).
703 * @returns EDX after cpuid operation.
704 */
705#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
706DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
707#else
708DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
709{
710 RTCCUINTREG xDX;
711# if RT_INLINE_ASM_GNU_STYLE
712# ifdef RT_ARCH_AMD64
713 RTCCUINTREG uSpill;
714 __asm__ ("cpuid"
715 : "=a" (uSpill),
716 "=d" (xDX)
717 : "0" (uOperator)
718 : "rbx", "rcx");
719# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: PIC by default. */
720 __asm__ ("push %%ebx\n\t"
721 "cpuid\n\t"
722 "pop %%ebx\n\t"
723 : "=a" (uOperator),
724 "=d" (xDX)
725 : "0" (uOperator)
726 : "ecx");
727# else
728 __asm__ ("cpuid"
729 : "=a" (uOperator),
730 "=d" (xDX)
731 : "0" (uOperator)
732 : "ebx", "ecx");
733# endif
734
735# elif RT_INLINE_ASM_USES_INTRIN
736 int aInfo[4];
737 __cpuid(aInfo, uOperator);
738 xDX = aInfo[3];
739
740# else
741 __asm
742 {
743 push ebx
744 mov eax, [uOperator]
745 cpuid
746 mov [xDX], edx
747 pop ebx
748 }
749# endif
750 return (uint32_t)xDX;
751}
752#endif
753
754
755/**
756 * Performs the cpuid instruction returning ecx.
757 *
758 * @param uOperator CPUID operation (eax).
759 * @returns ECX after cpuid operation.
760 */
761#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
762DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
763#else
764DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
765{
766 RTCCUINTREG xCX;
767# if RT_INLINE_ASM_GNU_STYLE
768# ifdef RT_ARCH_AMD64
769 RTCCUINTREG uSpill;
770 __asm__ ("cpuid"
771 : "=a" (uSpill),
772 "=c" (xCX)
773 : "0" (uOperator)
774 : "rbx", "rdx");
775# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
776 __asm__ ("push %%ebx\n\t"
777 "cpuid\n\t"
778 "pop %%ebx\n\t"
779 : "=a" (uOperator),
780 "=c" (xCX)
781 : "0" (uOperator)
782 : "edx");
783# else
784 __asm__ ("cpuid"
785 : "=a" (uOperator),
786 "=c" (xCX)
787 : "0" (uOperator)
788 : "ebx", "edx");
789
790# endif
791
792# elif RT_INLINE_ASM_USES_INTRIN
793 int aInfo[4];
794 __cpuid(aInfo, uOperator);
795 xCX = aInfo[2];
796
797# else
798 __asm
799 {
800 push ebx
801 mov eax, [uOperator]
802 cpuid
803 mov [xCX], ecx
804 pop ebx
805 }
806# endif
807 return (uint32_t)xCX;
808}
809#endif
810
811
812/**
813 * Checks if the current CPU supports CPUID.
814 *
815 * @returns true if CPUID is supported.
816 */
817DECLINLINE(bool) ASMHasCpuId(void)
818{
819#ifdef RT_ARCH_AMD64
820 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
821#else /* !RT_ARCH_AMD64 */
822 bool fRet = false;
823# if RT_INLINE_ASM_GNU_STYLE
824 uint32_t u1;
825 uint32_t u2;
826 __asm__ ("pushf\n\t"
827 "pop %1\n\t"
828 "mov %1, %2\n\t"
829 "xorl $0x200000, %1\n\t"
830 "push %1\n\t"
831 "popf\n\t"
832 "pushf\n\t"
833 "pop %1\n\t"
834 "cmpl %1, %2\n\t"
835 "setne %0\n\t"
836 "push %2\n\t"
837 "popf\n\t"
838 : "=m" (fRet), "=r" (u1), "=r" (u2));
839# else
840 __asm
841 {
842 pushfd
843 pop eax
844 mov ebx, eax
845 xor eax, 0200000h
846 push eax
847 popfd
848 pushfd
849 pop eax
850 cmp eax, ebx
851 setne fRet
852 push ebx
853 popfd
854 }
855# endif
856 return fRet;
857#endif /* !RT_ARCH_AMD64 */
858}
859
860
861/**
862 * Gets the APIC ID of the current CPU.
863 *
864 * @returns the APIC ID.
865 */
866#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
867DECLASM(uint8_t) ASMGetApicId(void);
868#else
869DECLINLINE(uint8_t) ASMGetApicId(void)
870{
871 RTCCUINTREG xBX;
872# if RT_INLINE_ASM_GNU_STYLE
873# ifdef RT_ARCH_AMD64
874 RTCCUINTREG uSpill;
875 __asm__ ("cpuid"
876 : "=a" (uSpill),
877 "=b" (xBX)
878 : "0" (1)
879 : "rcx", "rdx");
880# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__)
881 RTCCUINTREG uSpill;
882 __asm__ ("mov %%ebx,%1\n\t"
883 "cpuid\n\t"
884 "xchgl %%ebx,%1\n\t"
885 : "=a" (uSpill),
886 "=r" (xBX)
887 : "0" (1)
888 : "ecx", "edx");
889# else
890 RTCCUINTREG uSpill;
891 __asm__ ("cpuid"
892 : "=a" (uSpill),
893 "=b" (xBX)
894 : "0" (1)
895 : "ecx", "edx");
896# endif
897
898# elif RT_INLINE_ASM_USES_INTRIN
899 int aInfo[4];
900 __cpuid(aInfo, 1);
901 xBX = aInfo[1];
902
903# else
904 __asm
905 {
906 push ebx
907 mov eax, 1
908 cpuid
909 mov [xBX], ebx
910 pop ebx
911 }
912# endif
913 return (uint8_t)(xBX >> 24);
914}
915#endif
916
917/**
918 * Get cr0.
919 * @returns cr0.
920 */
921#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
922DECLASM(RTCCUINTREG) ASMGetCR0(void);
923#else
924DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
925{
926 RTCCUINTREG uCR0;
927# if RT_INLINE_ASM_USES_INTRIN
928 uCR0 = __readcr0();
929
930# elif RT_INLINE_ASM_GNU_STYLE
931# ifdef RT_ARCH_AMD64
932 __asm__ ("movq %%cr0, %0\t\n" : "=r" (uCR0));
933# else
934 __asm__ ("movl %%cr0, %0\t\n" : "=r" (uCR0));
935# endif
936# else
937 __asm
938 {
939# ifdef RT_ARCH_AMD64
940 mov rax, cr0
941 mov [uCR0], rax
942# else
943 mov eax, cr0
944 mov [uCR0], eax
945# endif
946 }
947# endif
948 return uCR0;
949}
950#endif
951
952
953/**
954 * Sets the CR0 register.
955 * @param uCR0 The new CR0 value.
956 */
957#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
958DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
959#else
960DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
961{
962# if RT_INLINE_ASM_USES_INTRIN
963 __writecr0(uCR0);
964
965# elif RT_INLINE_ASM_GNU_STYLE
966# ifdef RT_ARCH_AMD64
967 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
968# else
969 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
970# endif
971# else
972 __asm
973 {
974# ifdef RT_ARCH_AMD64
975 mov rax, [uCR0]
976 mov cr0, rax
977# else
978 mov eax, [uCR0]
979 mov cr0, eax
980# endif
981 }
982# endif
983}
984#endif
985
986
987/**
988 * Get cr2.
989 * @returns cr2.
990 */
991#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
992DECLASM(RTCCUINTREG) ASMGetCR2(void);
993#else
994DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
995{
996 RTCCUINTREG uCR2;
997# if RT_INLINE_ASM_USES_INTRIN
998 uCR2 = __readcr2();
999
1000# elif RT_INLINE_ASM_GNU_STYLE
1001# ifdef RT_ARCH_AMD64
1002 __asm__ ("movq %%cr2, %0\t\n" : "=r" (uCR2));
1003# else
1004 __asm__ ("movl %%cr2, %0\t\n" : "=r" (uCR2));
1005# endif
1006# else
1007 __asm
1008 {
1009# ifdef RT_ARCH_AMD64
1010 mov rax, cr2
1011 mov [uCR2], rax
1012# else
1013 mov eax, cr2
1014 mov [uCR2], eax
1015# endif
1016 }
1017# endif
1018 return uCR2;
1019}
1020#endif
1021
1022
1023/**
1024 * Sets the CR2 register.
1025 * @param uCR2 The new CR0 value.
1026 */
1027#if RT_INLINE_ASM_EXTERNAL
1028DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1029#else
1030DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1031{
1032# if RT_INLINE_ASM_GNU_STYLE
1033# ifdef RT_ARCH_AMD64
1034 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1035# else
1036 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1037# endif
1038# else
1039 __asm
1040 {
1041# ifdef RT_ARCH_AMD64
1042 mov rax, [uCR2]
1043 mov cr2, rax
1044# else
1045 mov eax, [uCR2]
1046 mov cr2, eax
1047# endif
1048 }
1049# endif
1050}
1051#endif
1052
1053
1054/**
1055 * Get cr3.
1056 * @returns cr3.
1057 */
1058#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1059DECLASM(RTCCUINTREG) ASMGetCR3(void);
1060#else
1061DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1062{
1063 RTCCUINTREG uCR3;
1064# if RT_INLINE_ASM_USES_INTRIN
1065 uCR3 = __readcr3();
1066
1067# elif RT_INLINE_ASM_GNU_STYLE
1068# ifdef RT_ARCH_AMD64
1069 __asm__ ("movq %%cr3, %0\t\n" : "=r" (uCR3));
1070# else
1071 __asm__ ("movl %%cr3, %0\t\n" : "=r" (uCR3));
1072# endif
1073# else
1074 __asm
1075 {
1076# ifdef RT_ARCH_AMD64
1077 mov rax, cr3
1078 mov [uCR3], rax
1079# else
1080 mov eax, cr3
1081 mov [uCR3], eax
1082# endif
1083 }
1084# endif
1085 return uCR3;
1086}
1087#endif
1088
1089
1090/**
1091 * Sets the CR3 register.
1092 *
1093 * @param uCR3 New CR3 value.
1094 */
1095#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1096DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1097#else
1098DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1099{
1100# if RT_INLINE_ASM_USES_INTRIN
1101 __writecr3(uCR3);
1102
1103# elif RT_INLINE_ASM_GNU_STYLE
1104# ifdef RT_ARCH_AMD64
1105 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1106# else
1107 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1108# endif
1109# else
1110 __asm
1111 {
1112# ifdef RT_ARCH_AMD64
1113 mov rax, [uCR3]
1114 mov cr3, rax
1115# else
1116 mov eax, [uCR3]
1117 mov cr3, eax
1118# endif
1119 }
1120# endif
1121}
1122#endif
1123
1124
1125/**
1126 * Reloads the CR3 register.
1127 */
1128#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1129DECLASM(void) ASMReloadCR3(void);
1130#else
1131DECLINLINE(void) ASMReloadCR3(void)
1132{
1133# if RT_INLINE_ASM_USES_INTRIN
1134 __writecr3(__readcr3());
1135
1136# elif RT_INLINE_ASM_GNU_STYLE
1137 RTCCUINTREG u;
1138# ifdef RT_ARCH_AMD64
1139 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1140 "movq %0, %%cr3\n\t"
1141 : "=r" (u));
1142# else
1143 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1144 "movl %0, %%cr3\n\t"
1145 : "=r" (u));
1146# endif
1147# else
1148 __asm
1149 {
1150# ifdef RT_ARCH_AMD64
1151 mov rax, cr3
1152 mov cr3, rax
1153# else
1154 mov eax, cr3
1155 mov cr3, eax
1156# endif
1157 }
1158# endif
1159}
1160#endif
1161
1162
1163/**
1164 * Get cr4.
1165 * @returns cr4.
1166 */
1167#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1168DECLASM(RTCCUINTREG) ASMGetCR4(void);
1169#else
1170DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1171{
1172 RTCCUINTREG uCR4;
1173# if RT_INLINE_ASM_USES_INTRIN
1174 uCR4 = __readcr4();
1175
1176# elif RT_INLINE_ASM_GNU_STYLE
1177# ifdef RT_ARCH_AMD64
1178 __asm__ ("movq %%cr4, %0\t\n" : "=r" (uCR4));
1179# else
1180 __asm__ ("movl %%cr4, %0\t\n" : "=r" (uCR4));
1181# endif
1182# else
1183 __asm
1184 {
1185# ifdef RT_ARCH_AMD64
1186 mov rax, cr4
1187 mov [uCR4], rax
1188# else
1189 push eax /* just in case */
1190 /*mov eax, cr4*/
1191 _emit 0x0f
1192 _emit 0x20
1193 _emit 0xe0
1194 mov [uCR4], eax
1195 pop eax
1196# endif
1197 }
1198# endif
1199 return uCR4;
1200}
1201#endif
1202
1203
1204/**
1205 * Sets the CR4 register.
1206 *
1207 * @param uCR4 New CR4 value.
1208 */
1209#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1210DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1211#else
1212DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1213{
1214# if RT_INLINE_ASM_USES_INTRIN
1215 __writecr4(uCR4);
1216
1217# elif RT_INLINE_ASM_GNU_STYLE
1218# ifdef RT_ARCH_AMD64
1219 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1220# else
1221 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1222# endif
1223# else
1224 __asm
1225 {
1226# ifdef RT_ARCH_AMD64
1227 mov rax, [uCR4]
1228 mov cr4, rax
1229# else
1230 mov eax, [uCR4]
1231 _emit 0x0F
1232 _emit 0x22
1233 _emit 0xE0 /* mov cr4, eax */
1234# endif
1235 }
1236# endif
1237}
1238#endif
1239
1240
1241/**
1242 * Get cr8.
1243 * @returns cr8.
1244 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1245 */
1246#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1247DECLASM(RTCCUINTREG) ASMGetCR8(void);
1248#else
1249DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1250{
1251# ifdef RT_ARCH_AMD64
1252 RTCCUINTREG uCR8;
1253# if RT_INLINE_ASM_USES_INTRIN
1254 uCR8 = __readcr8();
1255
1256# elif RT_INLINE_ASM_GNU_STYLE
1257 __asm__ ("movq %%cr8, %0\t\n" : "=r" (uCR8));
1258# else
1259 __asm
1260 {
1261 mov rax, cr8
1262 mov [uCR8], rax
1263 }
1264# endif
1265 return uCR8;
1266# else /* !RT_ARCH_AMD64 */
1267 return 0;
1268# endif /* !RT_ARCH_AMD64 */
1269}
1270#endif
1271
1272
1273/**
1274 * Enables interrupts (EFLAGS.IF).
1275 */
1276#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1277DECLASM(void) ASMIntEnable(void);
1278#else
1279DECLINLINE(void) ASMIntEnable(void)
1280{
1281# if RT_INLINE_ASM_GNU_STYLE
1282 __asm("sti\n");
1283# elif RT_INLINE_ASM_USES_INTRIN
1284 _enable();
1285# else
1286 __asm sti
1287# endif
1288}
1289#endif
1290
1291
1292/**
1293 * Disables interrupts (!EFLAGS.IF).
1294 */
1295#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1296DECLASM(void) ASMIntDisable(void);
1297#else
1298DECLINLINE(void) ASMIntDisable(void)
1299{
1300# if RT_INLINE_ASM_GNU_STYLE
1301 __asm("cli\n");
1302# elif RT_INLINE_ASM_USES_INTRIN
1303 _disable();
1304# else
1305 __asm cli
1306# endif
1307}
1308#endif
1309
1310
1311/**
1312 * Disables interrupts and returns previous xFLAGS.
1313 */
1314#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1315DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1316#else
1317DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1318{
1319 RTCCUINTREG xFlags;
1320# if RT_INLINE_ASM_GNU_STYLE
1321# ifdef RT_ARCH_AMD64
1322 __asm__ __volatile__("pushfq\n\t"
1323 "cli\n\t"
1324 "popq %0\n\t"
1325 : "=rm" (xFlags));
1326# else
1327 __asm__ __volatile__("pushfl\n\t"
1328 "cli\n\t"
1329 "popl %0\n\t"
1330 : "=rm" (xFlags));
1331# endif
1332# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1333 xFlags = ASMGetFlags();
1334 _disable();
1335# else
1336 __asm {
1337 pushfd
1338 cli
1339 pop [xFlags]
1340 }
1341# endif
1342 return xFlags;
1343}
1344#endif
1345
1346
1347/**
1348 * Reads a machine specific register.
1349 *
1350 * @returns Register content.
1351 * @param uRegister Register to read.
1352 */
1353#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1354DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1355#else
1356DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1357{
1358 RTUINT64U u;
1359# if RT_INLINE_ASM_GNU_STYLE
1360 __asm__ ("rdmsr\n\t"
1361 : "=a" (u.s.Lo),
1362 "=d" (u.s.Hi)
1363 : "c" (uRegister));
1364
1365# elif RT_INLINE_ASM_USES_INTRIN
1366 u.u = __readmsr(uRegister);
1367
1368# else
1369 __asm
1370 {
1371 mov ecx, [uRegister]
1372 rdmsr
1373 mov [u.s.Lo], eax
1374 mov [u.s.Hi], edx
1375 }
1376# endif
1377
1378 return u.u;
1379}
1380#endif
1381
1382
1383/**
1384 * Writes a machine specific register.
1385 *
1386 * @returns Register content.
1387 * @param uRegister Register to write to.
1388 * @param u64Val Value to write.
1389 */
1390#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1391DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1392#else
1393DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1394{
1395 RTUINT64U u;
1396
1397 u.u = u64Val;
1398# if RT_INLINE_ASM_GNU_STYLE
1399 __asm__ __volatile__("wrmsr\n\t"
1400 ::"a" (u.s.Lo),
1401 "d" (u.s.Hi),
1402 "c" (uRegister));
1403
1404# elif RT_INLINE_ASM_USES_INTRIN
1405 __writemsr(uRegister, u.u);
1406
1407# else
1408 __asm
1409 {
1410 mov ecx, [uRegister]
1411 mov edx, [u.s.Hi]
1412 mov eax, [u.s.Lo]
1413 wrmsr
1414 }
1415# endif
1416}
1417#endif
1418
1419
1420/**
1421 * Reads low part of a machine specific register.
1422 *
1423 * @returns Register content.
1424 * @param uRegister Register to read.
1425 */
1426#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1427DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1428#else
1429DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1430{
1431 uint32_t u32;
1432# if RT_INLINE_ASM_GNU_STYLE
1433 __asm__ ("rdmsr\n\t"
1434 : "=a" (u32)
1435 : "c" (uRegister)
1436 : "edx");
1437
1438# elif RT_INLINE_ASM_USES_INTRIN
1439 u32 = (uint32_t)__readmsr(uRegister);
1440
1441#else
1442 __asm
1443 {
1444 mov ecx, [uRegister]
1445 rdmsr
1446 mov [u32], eax
1447 }
1448# endif
1449
1450 return u32;
1451}
1452#endif
1453
1454
1455/**
1456 * Reads high part of a machine specific register.
1457 *
1458 * @returns Register content.
1459 * @param uRegister Register to read.
1460 */
1461#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1462DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1463#else
1464DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1465{
1466 uint32_t u32;
1467# if RT_INLINE_ASM_GNU_STYLE
1468 __asm__ ("rdmsr\n\t"
1469 : "=d" (u32)
1470 : "c" (uRegister)
1471 : "eax");
1472
1473# elif RT_INLINE_ASM_USES_INTRIN
1474 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1475
1476# else
1477 __asm
1478 {
1479 mov ecx, [uRegister]
1480 rdmsr
1481 mov [u32], edx
1482 }
1483# endif
1484
1485 return u32;
1486}
1487#endif
1488
1489
1490/**
1491 * Gets dr7.
1492 *
1493 * @returns dr7.
1494 */
1495#if RT_INLINE_ASM_EXTERNAL
1496DECLASM(RTCCUINTREG) ASMGetDR7(void);
1497#else
1498DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1499{
1500 RTCCUINTREG uDR7;
1501# if RT_INLINE_ASM_GNU_STYLE
1502# ifdef RT_ARCH_AMD64
1503 __asm__ ("movq %%dr7, %0\n\t" : "=r" (uDR7));
1504# else
1505 __asm__ ("movl %%dr7, %0\n\t" : "=r" (uDR7));
1506# endif
1507# else
1508 __asm
1509 {
1510# ifdef RT_ARCH_AMD64
1511 mov rax, dr7
1512 mov [uDR7], rax
1513# else
1514 mov eax, dr7
1515 mov [uDR7], eax
1516# endif
1517 }
1518# endif
1519 return uDR7;
1520}
1521#endif
1522
1523
1524/**
1525 * Gets dr6.
1526 *
1527 * @returns dr6.
1528 */
1529#if RT_INLINE_ASM_EXTERNAL
1530DECLASM(RTCCUINTREG) ASMGetDR6(void);
1531#else
1532DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1533{
1534 RTCCUINTREG uDR6;
1535# if RT_INLINE_ASM_GNU_STYLE
1536# ifdef RT_ARCH_AMD64
1537 __asm__ ("movq %%dr6, %0\n\t" : "=r" (uDR6));
1538# else
1539 __asm__ ("movl %%dr6, %0\n\t" : "=r" (uDR6));
1540# endif
1541# else
1542 __asm
1543 {
1544# ifdef RT_ARCH_AMD64
1545 mov rax, dr6
1546 mov [uDR6], rax
1547# else
1548 mov eax, dr6
1549 mov [uDR6], eax
1550# endif
1551 }
1552# endif
1553 return uDR6;
1554}
1555#endif
1556
1557
1558/**
1559 * Reads and clears DR6.
1560 *
1561 * @returns DR6.
1562 */
1563#if RT_INLINE_ASM_EXTERNAL
1564DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1565#else
1566DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1567{
1568 RTCCUINTREG uDR6;
1569# if RT_INLINE_ASM_GNU_STYLE
1570 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1571# ifdef RT_ARCH_AMD64
1572 __asm__ ("movq %%dr6, %0\n\t"
1573 "movq %1, %%dr6\n\t"
1574 : "=r" (uDR6)
1575 : "r" (uNewValue));
1576# else
1577 __asm__ ("movl %%dr6, %0\n\t"
1578 "movl %1, %%dr6\n\t"
1579 : "=r" (uDR6)
1580 : "r" (uNewValue));
1581# endif
1582# else
1583 __asm
1584 {
1585# ifdef RT_ARCH_AMD64
1586 mov rax, dr6
1587 mov [uDR6], rax
1588 mov rcx, rax
1589 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1590 mov dr6, rcx
1591# else
1592 mov eax, dr6
1593 mov [uDR6], eax
1594 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1595 mov dr6, ecx
1596# endif
1597 }
1598# endif
1599 return uDR6;
1600}
1601#endif
1602
1603
1604/**
1605 * Compiler memory barrier.
1606 *
1607 * Ensure that the compiler does not use any cached (register/tmp stack) memory
1608 * values or any outstanding writes when returning from this function.
1609 *
1610 * This function must be used if non-volatile data is modified by a
1611 * device or the VMM. Typical cases are port access, MMIO access,
1612 * trapping instruction, etc.
1613 */
1614#if RT_INLINE_ASM_GNU_STYLE
1615# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
1616#elif RT_INLINE_ASM_USES_INTRIN
1617# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
1618#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
1619DECLINLINE(void) ASMCompilerBarrier(void)
1620{
1621 __asm
1622 {
1623 }
1624}
1625#endif
1626
1627
1628/**
1629 * Writes a 8-bit unsigned integer to an I/O port, ordered.
1630 *
1631 * @param Port I/O port to read from.
1632 * @param u8 8-bit integer to write.
1633 */
1634#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1635DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1636#else
1637DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1638{
1639# if RT_INLINE_ASM_GNU_STYLE
1640 __asm__ __volatile__("outb %b1, %w0\n\t"
1641 :: "Nd" (Port),
1642 "a" (u8));
1643
1644# elif RT_INLINE_ASM_USES_INTRIN
1645 __outbyte(Port, u8);
1646
1647# else
1648 __asm
1649 {
1650 mov dx, [Port]
1651 mov al, [u8]
1652 out dx, al
1653 }
1654# endif
1655}
1656#endif
1657
1658
1659/**
1660 * Gets a 8-bit unsigned integer from an I/O port, ordered.
1661 *
1662 * @returns 8-bit integer.
1663 * @param Port I/O port to read from.
1664 */
1665#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1666DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1667#else
1668DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1669{
1670 uint8_t u8;
1671# if RT_INLINE_ASM_GNU_STYLE
1672 __asm__ __volatile__("inb %w1, %b0\n\t"
1673 : "=a" (u8)
1674 : "Nd" (Port));
1675
1676# elif RT_INLINE_ASM_USES_INTRIN
1677 u8 = __inbyte(Port);
1678
1679# else
1680 __asm
1681 {
1682 mov dx, [Port]
1683 in al, dx
1684 mov [u8], al
1685 }
1686# endif
1687 return u8;
1688}
1689#endif
1690
1691
1692/**
1693 * Writes a 16-bit unsigned integer to an I/O port, ordered.
1694 *
1695 * @param Port I/O port to read from.
1696 * @param u16 16-bit integer to write.
1697 */
1698#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1699DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1700#else
1701DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1702{
1703# if RT_INLINE_ASM_GNU_STYLE
1704 __asm__ __volatile__("outw %w1, %w0\n\t"
1705 :: "Nd" (Port),
1706 "a" (u16));
1707
1708# elif RT_INLINE_ASM_USES_INTRIN
1709 __outword(Port, u16);
1710
1711# else
1712 __asm
1713 {
1714 mov dx, [Port]
1715 mov ax, [u16]
1716 out dx, ax
1717 }
1718# endif
1719}
1720#endif
1721
1722
1723/**
1724 * Gets a 16-bit unsigned integer from an I/O port, ordered.
1725 *
1726 * @returns 16-bit integer.
1727 * @param Port I/O port to read from.
1728 */
1729#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1730DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1731#else
1732DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1733{
1734 uint16_t u16;
1735# if RT_INLINE_ASM_GNU_STYLE
1736 __asm__ __volatile__("inw %w1, %w0\n\t"
1737 : "=a" (u16)
1738 : "Nd" (Port));
1739
1740# elif RT_INLINE_ASM_USES_INTRIN
1741 u16 = __inword(Port);
1742
1743# else
1744 __asm
1745 {
1746 mov dx, [Port]
1747 in ax, dx
1748 mov [u16], ax
1749 }
1750# endif
1751 return u16;
1752}
1753#endif
1754
1755
1756/**
1757 * Writes a 32-bit unsigned integer to an I/O port, ordered.
1758 *
1759 * @param Port I/O port to read from.
1760 * @param u32 32-bit integer to write.
1761 */
1762#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1763DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1764#else
1765DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1766{
1767# if RT_INLINE_ASM_GNU_STYLE
1768 __asm__ __volatile__("outl %1, %w0\n\t"
1769 :: "Nd" (Port),
1770 "a" (u32));
1771
1772# elif RT_INLINE_ASM_USES_INTRIN
1773 __outdword(Port, u32);
1774
1775# else
1776 __asm
1777 {
1778 mov dx, [Port]
1779 mov eax, [u32]
1780 out dx, eax
1781 }
1782# endif
1783}
1784#endif
1785
1786
1787/**
1788 * Gets a 32-bit unsigned integer from an I/O port, ordered.
1789 *
1790 * @returns 32-bit integer.
1791 * @param Port I/O port to read from.
1792 */
1793#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1794DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1795#else
1796DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1797{
1798 uint32_t u32;
1799# if RT_INLINE_ASM_GNU_STYLE
1800 __asm__ __volatile__("inl %w1, %0\n\t"
1801 : "=a" (u32)
1802 : "Nd" (Port));
1803
1804# elif RT_INLINE_ASM_USES_INTRIN
1805 u32 = __indword(Port);
1806
1807# else
1808 __asm
1809 {
1810 mov dx, [Port]
1811 in eax, dx
1812 mov [u32], eax
1813 }
1814# endif
1815 return u32;
1816}
1817#endif
1818
1819/** @todo string i/o */
1820
1821
1822/**
1823 * Atomically Exchange an unsigned 8-bit value, ordered.
1824 *
1825 * @returns Current *pu8 value
1826 * @param pu8 Pointer to the 8-bit variable to update.
1827 * @param u8 The 8-bit value to assign to *pu8.
1828 */
1829#if RT_INLINE_ASM_EXTERNAL
1830DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1831#else
1832DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1833{
1834# if RT_INLINE_ASM_GNU_STYLE
1835 __asm__ __volatile__("xchgb %0, %1\n\t"
1836 : "=m" (*pu8),
1837 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
1838 : "1" (u8));
1839# else
1840 __asm
1841 {
1842# ifdef RT_ARCH_AMD64
1843 mov rdx, [pu8]
1844 mov al, [u8]
1845 xchg [rdx], al
1846 mov [u8], al
1847# else
1848 mov edx, [pu8]
1849 mov al, [u8]
1850 xchg [edx], al
1851 mov [u8], al
1852# endif
1853 }
1854# endif
1855 return u8;
1856}
1857#endif
1858
1859
1860/**
1861 * Atomically Exchange a signed 8-bit value, ordered.
1862 *
1863 * @returns Current *pu8 value
1864 * @param pi8 Pointer to the 8-bit variable to update.
1865 * @param i8 The 8-bit value to assign to *pi8.
1866 */
1867DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1868{
1869 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1870}
1871
1872
1873/**
1874 * Atomically Exchange a bool value, ordered.
1875 *
1876 * @returns Current *pf value
1877 * @param pf Pointer to the 8-bit variable to update.
1878 * @param f The 8-bit value to assign to *pi8.
1879 */
1880DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
1881{
1882#ifdef _MSC_VER
1883 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1884#else
1885 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1886#endif
1887}
1888
1889
1890/**
1891 * Atomically Exchange an unsigned 16-bit value, ordered.
1892 *
1893 * @returns Current *pu16 value
1894 * @param pu16 Pointer to the 16-bit variable to update.
1895 * @param u16 The 16-bit value to assign to *pu16.
1896 */
1897#if RT_INLINE_ASM_EXTERNAL
1898DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
1899#else
1900DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
1901{
1902# if RT_INLINE_ASM_GNU_STYLE
1903 __asm__ __volatile__("xchgw %0, %1\n\t"
1904 : "=m" (*pu16),
1905 "=r" (u16)
1906 : "1" (u16));
1907# else
1908 __asm
1909 {
1910# ifdef RT_ARCH_AMD64
1911 mov rdx, [pu16]
1912 mov ax, [u16]
1913 xchg [rdx], ax
1914 mov [u16], ax
1915# else
1916 mov edx, [pu16]
1917 mov ax, [u16]
1918 xchg [edx], ax
1919 mov [u16], ax
1920# endif
1921 }
1922# endif
1923 return u16;
1924}
1925#endif
1926
1927
1928/**
1929 * Atomically Exchange a signed 16-bit value, ordered.
1930 *
1931 * @returns Current *pu16 value
1932 * @param pi16 Pointer to the 16-bit variable to update.
1933 * @param i16 The 16-bit value to assign to *pi16.
1934 */
1935DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
1936{
1937 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
1938}
1939
1940
1941/**
1942 * Atomically Exchange an unsigned 32-bit value, ordered.
1943 *
1944 * @returns Current *pu32 value
1945 * @param pu32 Pointer to the 32-bit variable to update.
1946 * @param u32 The 32-bit value to assign to *pu32.
1947 */
1948#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1949DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
1950#else
1951DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
1952{
1953# if RT_INLINE_ASM_GNU_STYLE
1954 __asm__ __volatile__("xchgl %0, %1\n\t"
1955 : "=m" (*pu32),
1956 "=r" (u32)
1957 : "1" (u32));
1958
1959# elif RT_INLINE_ASM_USES_INTRIN
1960 u32 = _InterlockedExchange((long *)pu32, u32);
1961
1962# else
1963 __asm
1964 {
1965# ifdef RT_ARCH_AMD64
1966 mov rdx, [pu32]
1967 mov eax, u32
1968 xchg [rdx], eax
1969 mov [u32], eax
1970# else
1971 mov edx, [pu32]
1972 mov eax, u32
1973 xchg [edx], eax
1974 mov [u32], eax
1975# endif
1976 }
1977# endif
1978 return u32;
1979}
1980#endif
1981
1982
1983/**
1984 * Atomically Exchange a signed 32-bit value, ordered.
1985 *
1986 * @returns Current *pu32 value
1987 * @param pi32 Pointer to the 32-bit variable to update.
1988 * @param i32 The 32-bit value to assign to *pi32.
1989 */
1990DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
1991{
1992 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
1993}
1994
1995
1996/**
1997 * Atomically Exchange an unsigned 64-bit value, ordered.
1998 *
1999 * @returns Current *pu64 value
2000 * @param pu64 Pointer to the 64-bit variable to update.
2001 * @param u64 The 64-bit value to assign to *pu64.
2002 */
2003#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2004DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2005#else
2006DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2007{
2008# if defined(RT_ARCH_AMD64)
2009# if RT_INLINE_ASM_USES_INTRIN
2010 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2011
2012# elif RT_INLINE_ASM_GNU_STYLE
2013 __asm__ __volatile__("xchgq %0, %1\n\t"
2014 : "=m" (*pu64),
2015 "=r" (u64)
2016 : "1" (u64));
2017# else
2018 __asm
2019 {
2020 mov rdx, [pu64]
2021 mov rax, [u64]
2022 xchg [rdx], rax
2023 mov [u64], rax
2024 }
2025# endif
2026# else /* !RT_ARCH_AMD64 */
2027# if RT_INLINE_ASM_GNU_STYLE
2028# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2029 uint32_t u32 = (uint32_t)u64;
2030 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2031 "xchgl %%ebx, %3\n\t"
2032 "1:\n\t"
2033 "lock; cmpxchg8b (%5)\n\t"
2034 "jnz 1b\n\t"
2035 "xchgl %%ebx, %3\n\t"
2036 /*"xchgl %%esi, %5\n\t"*/
2037 : "=A" (u64),
2038 "=m" (*pu64)
2039 : "0" (*pu64),
2040 "m" ( u32 ),
2041 "c" ( (uint32_t)(u64 >> 32) ),
2042 "S" (pu64) );
2043# else /* !PIC */
2044 __asm__ __volatile__("1:\n\t"
2045 "lock; cmpxchg8b %1\n\t"
2046 "jnz 1b\n\t"
2047 : "=A" (u64),
2048 "=m" (*pu64)
2049 : "0" (*pu64),
2050 "b" ( (uint32_t)u64 ),
2051 "c" ( (uint32_t)(u64 >> 32) ));
2052# endif
2053# else
2054 __asm
2055 {
2056 mov ebx, dword ptr [u64]
2057 mov ecx, dword ptr [u64 + 4]
2058 mov edi, pu64
2059 mov eax, dword ptr [edi]
2060 mov edx, dword ptr [edi + 4]
2061 retry:
2062 lock cmpxchg8b [edi]
2063 jnz retry
2064 mov dword ptr [u64], eax
2065 mov dword ptr [u64 + 4], edx
2066 }
2067# endif
2068# endif /* !RT_ARCH_AMD64 */
2069 return u64;
2070}
2071#endif
2072
2073
2074/**
2075 * Atomically Exchange an signed 64-bit value, ordered.
2076 *
2077 * @returns Current *pi64 value
2078 * @param pi64 Pointer to the 64-bit variable to update.
2079 * @param i64 The 64-bit value to assign to *pi64.
2080 */
2081DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2082{
2083 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2084}
2085
2086
2087#ifdef RT_ARCH_AMD64
2088/**
2089 * Atomically Exchange an unsigned 128-bit value, ordered.
2090 *
2091 * @returns Current *pu128.
2092 * @param pu128 Pointer to the 128-bit variable to update.
2093 * @param u128 The 128-bit value to assign to *pu128.
2094 *
2095 * @remark We cannot really assume that any hardware supports this. Nor do I have
2096 * GAS support for it. So, for the time being we'll BREAK the atomic
2097 * bit of this function and use two 64-bit exchanges instead.
2098 */
2099# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2100DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2101# else
2102DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2103{
2104 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2105 {
2106 /** @todo this is clumsy code */
2107 RTUINT128U u128Ret;
2108 u128Ret.u = u128;
2109 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2110 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2111 return u128Ret.u;
2112 }
2113#if 0 /* later? */
2114 else
2115 {
2116# if RT_INLINE_ASM_GNU_STYLE
2117 __asm__ __volatile__("1:\n\t"
2118 "lock; cmpxchg8b %1\n\t"
2119 "jnz 1b\n\t"
2120 : "=A" (u128),
2121 "=m" (*pu128)
2122 : "0" (*pu128),
2123 "b" ( (uint64_t)u128 ),
2124 "c" ( (uint64_t)(u128 >> 64) ));
2125# else
2126 __asm
2127 {
2128 mov rbx, dword ptr [u128]
2129 mov rcx, dword ptr [u128 + 8]
2130 mov rdi, pu128
2131 mov rax, dword ptr [rdi]
2132 mov rdx, dword ptr [rdi + 8]
2133 retry:
2134 lock cmpxchg16b [rdi]
2135 jnz retry
2136 mov dword ptr [u128], rax
2137 mov dword ptr [u128 + 8], rdx
2138 }
2139# endif
2140 }
2141 return u128;
2142#endif
2143}
2144# endif
2145#endif /* RT_ARCH_AMD64 */
2146
2147
2148/**
2149 * Atomically Exchange a value which size might differ
2150 * between platforms or compilers, ordered.
2151 *
2152 * @param pu Pointer to the variable to update.
2153 * @param uNew The value to assign to *pu.
2154 */
2155#define ASMAtomicXchgSize(pu, uNew) \
2156 do { \
2157 switch (sizeof(*(pu))) { \
2158 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2159 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2160 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2161 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2162 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2163 } \
2164 } while (0)
2165
2166
2167/**
2168 * Atomically Exchange a pointer value, ordered.
2169 *
2170 * @returns Current *ppv value
2171 * @param ppv Pointer to the pointer variable to update.
2172 * @param pv The pointer value to assign to *ppv.
2173 */
2174DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2175{
2176#if ARCH_BITS == 32
2177 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2178#elif ARCH_BITS == 64
2179 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2180#else
2181# error "ARCH_BITS is bogus"
2182#endif
2183}
2184
2185
2186/**
2187 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
2188 *
2189 * @returns true if xchg was done.
2190 * @returns false if xchg wasn't done.
2191 *
2192 * @param pu32 Pointer to the value to update.
2193 * @param u32New The new value to assigned to *pu32.
2194 * @param u32Old The old value to *pu32 compare with.
2195 */
2196#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2197DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2198#else
2199DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old)
2200{
2201# if RT_INLINE_ASM_GNU_STYLE
2202 uint8_t u8Ret;
2203 __asm__ __volatile__("lock; cmpxchgl %2, %0\n\t"
2204 "setz %1\n\t"
2205 : "=m" (*pu32),
2206 "=qm" (u8Ret)
2207 : "r" (u32New),
2208 "a" (u32Old));
2209 return (bool)u8Ret;
2210
2211# elif RT_INLINE_ASM_USES_INTRIN
2212 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2213
2214# else
2215 uint32_t u32Ret;
2216 __asm
2217 {
2218# ifdef RT_ARCH_AMD64
2219 mov rdx, [pu32]
2220# else
2221 mov edx, [pu32]
2222# endif
2223 mov eax, [u32Old]
2224 mov ecx, [u32New]
2225# ifdef RT_ARCH_AMD64
2226 lock cmpxchg [rdx], ecx
2227# else
2228 lock cmpxchg [edx], ecx
2229# endif
2230 setz al
2231 movzx eax, al
2232 mov [u32Ret], eax
2233 }
2234 return !!u32Ret;
2235# endif
2236}
2237#endif
2238
2239
2240/**
2241 * Atomically Compare and Exchange a signed 32-bit value, ordered.
2242 *
2243 * @returns true if xchg was done.
2244 * @returns false if xchg wasn't done.
2245 *
2246 * @param pi32 Pointer to the value to update.
2247 * @param i32New The new value to assigned to *pi32.
2248 * @param i32Old The old value to *pi32 compare with.
2249 */
2250DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2251{
2252 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2253}
2254
2255
2256/**
2257 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
2258 *
2259 * @returns true if xchg was done.
2260 * @returns false if xchg wasn't done.
2261 *
2262 * @param pu64 Pointer to the 64-bit variable to update.
2263 * @param u64New The 64-bit value to assign to *pu64.
2264 * @param u64Old The value to compare with.
2265 */
2266#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2267DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2268#else
2269DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old)
2270{
2271# if RT_INLINE_ASM_USES_INTRIN
2272 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2273
2274# elif defined(RT_ARCH_AMD64)
2275# if RT_INLINE_ASM_GNU_STYLE
2276 uint8_t u8Ret;
2277 __asm__ __volatile__("lock; cmpxchgq %2, %0\n\t"
2278 "setz %1\n\t"
2279 : "=m" (*pu64),
2280 "=qm" (u8Ret)
2281 : "r" (u64New),
2282 "a" (u64Old));
2283 return (bool)u8Ret;
2284# else
2285 bool fRet;
2286 __asm
2287 {
2288 mov rdx, [pu32]
2289 mov rax, [u64Old]
2290 mov rcx, [u64New]
2291 lock cmpxchg [rdx], rcx
2292 setz al
2293 mov [fRet], al
2294 }
2295 return fRet;
2296# endif
2297# else /* !RT_ARCH_AMD64 */
2298 uint32_t u32Ret;
2299# if RT_INLINE_ASM_GNU_STYLE
2300# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2301 uint32_t u32 = (uint32_t)u64New;
2302 uint32_t u32Spill;
2303 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
2304 "lock; cmpxchg8b (%6)\n\t"
2305 "setz %%al\n\t"
2306 "xchgl %%ebx, %4\n\t"
2307 "movzbl %%al, %%eax\n\t"
2308 : "=a" (u32Ret),
2309 "=d" (u32Spill),
2310 "=m" (*pu64)
2311 : "A" (u64Old),
2312 "m" ( u32 ),
2313 "c" ( (uint32_t)(u64New >> 32) ),
2314 "S" (pu64) );
2315# else /* !PIC */
2316 uint32_t u32Spill;
2317 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
2318 "setz %%al\n\t"
2319 "movzbl %%al, %%eax\n\t"
2320 : "=a" (u32Ret),
2321 "=d" (u32Spill),
2322 "=m" (*pu64)
2323 : "A" (u64Old),
2324 "b" ( (uint32_t)u64New ),
2325 "c" ( (uint32_t)(u64New >> 32) ));
2326# endif
2327 return (bool)u32Ret;
2328# else
2329 __asm
2330 {
2331 mov ebx, dword ptr [u64New]
2332 mov ecx, dword ptr [u64New + 4]
2333 mov edi, [pu64]
2334 mov eax, dword ptr [u64Old]
2335 mov edx, dword ptr [u64Old + 4]
2336 lock cmpxchg8b [edi]
2337 setz al
2338 movzx eax, al
2339 mov dword ptr [u32Ret], eax
2340 }
2341 return !!u32Ret;
2342# endif
2343# endif /* !RT_ARCH_AMD64 */
2344}
2345#endif
2346
2347
2348/**
2349 * Atomically Compare and exchange a signed 64-bit value, ordered.
2350 *
2351 * @returns true if xchg was done.
2352 * @returns false if xchg wasn't done.
2353 *
2354 * @param pi64 Pointer to the 64-bit variable to update.
2355 * @param i64 The 64-bit value to assign to *pu64.
2356 * @param i64Old The value to compare with.
2357 */
2358DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2359{
2360 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2361}
2362
2363
2364/** @def ASMAtomicCmpXchgSize
2365 * Atomically Compare and Exchange a value which size might differ
2366 * between platforms or compilers, ordered.
2367 *
2368 * @param pu Pointer to the value to update.
2369 * @param uNew The new value to assigned to *pu.
2370 * @param uOld The old value to *pu compare with.
2371 * @param fRc Where to store the result.
2372 */
2373#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2374 do { \
2375 switch (sizeof(*(pu))) { \
2376 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2377 break; \
2378 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2379 break; \
2380 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2381 (fRc) = false; \
2382 break; \
2383 } \
2384 } while (0)
2385
2386
2387/**
2388 * Atomically Compare and Exchange a pointer value, ordered.
2389 *
2390 * @returns true if xchg was done.
2391 * @returns false if xchg wasn't done.
2392 *
2393 * @param ppv Pointer to the value to update.
2394 * @param pvNew The new value to assigned to *ppv.
2395 * @param pvOld The old value to *ppv compare with.
2396 */
2397DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2398{
2399#if ARCH_BITS == 32
2400 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2401#elif ARCH_BITS == 64
2402 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2403#else
2404# error "ARCH_BITS is bogus"
2405#endif
2406}
2407
2408
2409/**
2410 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
2411 * passes back old value, ordered.
2412 *
2413 * @returns true if xchg was done.
2414 * @returns false if xchg wasn't done.
2415 *
2416 * @param pu32 Pointer to the value to update.
2417 * @param u32New The new value to assigned to *pu32.
2418 * @param u32Old The old value to *pu32 compare with.
2419 * @param pu32Old Pointer store the old value at.
2420 */
2421#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2422DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
2423#else
2424DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
2425{
2426# if RT_INLINE_ASM_GNU_STYLE
2427 uint8_t u8Ret;
2428 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2429 "setz %1\n\t"
2430 : "=m" (*pu32),
2431 "=qm" (u8Ret),
2432 "=a" (*pu32Old)
2433 : "r" (u32New),
2434 "a" (u32Old));
2435 return (bool)u8Ret;
2436
2437# elif RT_INLINE_ASM_USES_INTRIN
2438 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
2439
2440# else
2441 uint32_t u32Ret;
2442 __asm
2443 {
2444# ifdef RT_ARCH_AMD64
2445 mov rdx, [pu32]
2446# else
2447 mov edx, [pu32]
2448# endif
2449 mov eax, [u32Old]
2450 mov ecx, [u32New]
2451# ifdef RT_ARCH_AMD64
2452 lock cmpxchg [rdx], ecx
2453 mov rdx, [pu32Old]
2454 mov [rdx], eax
2455# else
2456 lock cmpxchg [edx], ecx
2457 mov edx, [pu32Old]
2458 mov [edx], eax
2459# endif
2460 setz al
2461 movzx eax, al
2462 mov [u32Ret], eax
2463 }
2464 return !!u32Ret;
2465# endif
2466}
2467#endif
2468
2469
2470/**
2471 * Atomically Compare and Exchange a signed 32-bit value, additionally
2472 * passes back old value, ordered.
2473 *
2474 * @returns true if xchg was done.
2475 * @returns false if xchg wasn't done.
2476 *
2477 * @param pi32 Pointer to the value to update.
2478 * @param i32New The new value to assigned to *pi32.
2479 * @param i32Old The old value to *pi32 compare with.
2480 * @param pi32Old Pointer store the old value at.
2481 */
2482DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
2483{
2484 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
2485}
2486
2487
2488/**
2489 * Atomically Compare and exchange an unsigned 64-bit value, additionally
2490 * passing back old value, ordered.
2491 *
2492 * @returns true if xchg was done.
2493 * @returns false if xchg wasn't done.
2494 *
2495 * @param pu64 Pointer to the 64-bit variable to update.
2496 * @param u64New The 64-bit value to assign to *pu64.
2497 * @param u64Old The value to compare with.
2498 * @param pu64Old Pointer store the old value at.
2499 */
2500#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2501DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
2502#else
2503DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
2504{
2505# if RT_INLINE_ASM_USES_INTRIN
2506 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
2507
2508# elif defined(RT_ARCH_AMD64)
2509# if RT_INLINE_ASM_GNU_STYLE
2510 uint8_t u8Ret;
2511 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2512 "setz %1\n\t"
2513 : "=m" (*pu64),
2514 "=qm" (u8Ret),
2515 "=a" (*pu64Old)
2516 : "r" (u64New),
2517 "a" (u64Old));
2518 return (bool)u8Ret;
2519# else
2520 bool fRet;
2521 __asm
2522 {
2523 mov rdx, [pu32]
2524 mov rax, [u64Old]
2525 mov rcx, [u64New]
2526 lock cmpxchg [rdx], rcx
2527 mov rdx, [pu64Old]
2528 mov [rdx], rax
2529 setz al
2530 mov [fRet], al
2531 }
2532 return fRet;
2533# endif
2534# else /* !RT_ARCH_AMD64 */
2535# if RT_INLINE_ASM_GNU_STYLE
2536 uint64_t u64Ret;
2537# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2538 /* NB: this code uses a memory clobber description, because the clean
2539 * solution with an output value for *pu64 makes gcc run out of registers.
2540 * This will cause suboptimal code, and anyone with a better solution is
2541 * welcome to improve this. */
2542 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
2543 "lock; cmpxchg8b %3\n\t"
2544 "xchgl %%ebx, %1\n\t"
2545 : "=A" (u64Ret)
2546 : "DS" ((uint32_t)u64New),
2547 "c" ((uint32_t)(u64New >> 32)),
2548 "m" (*pu64),
2549 "0" (u64Old)
2550 : "memory" );
2551# else /* !PIC */
2552 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
2553 : "=A" (u64Ret),
2554 "=m" (*pu64)
2555 : "b" ((uint32_t)u64New),
2556 "c" ((uint32_t)(u64New >> 32)),
2557 "m" (*pu64),
2558 "0" (u64Old));
2559# endif
2560 *pu64Old = u64Ret;
2561 return u64Ret == u64Old;
2562# else
2563 uint32_t u32Ret;
2564 __asm
2565 {
2566 mov ebx, dword ptr [u64New]
2567 mov ecx, dword ptr [u64New + 4]
2568 mov edi, [pu64]
2569 mov eax, dword ptr [u64Old]
2570 mov edx, dword ptr [u64Old + 4]
2571 lock cmpxchg8b [edi]
2572 mov ebx, [pu64Old]
2573 mov [ebx], eax
2574 setz al
2575 movzx eax, al
2576 add ebx, 4
2577 mov [ebx], edx
2578 mov dword ptr [u32Ret], eax
2579 }
2580 return !!u32Ret;
2581# endif
2582# endif /* !RT_ARCH_AMD64 */
2583}
2584#endif
2585
2586
2587/**
2588 * Atomically Compare and exchange a signed 64-bit value, additionally
2589 * passing back old value, ordered.
2590 *
2591 * @returns true if xchg was done.
2592 * @returns false if xchg wasn't done.
2593 *
2594 * @param pi64 Pointer to the 64-bit variable to update.
2595 * @param i64 The 64-bit value to assign to *pu64.
2596 * @param i64Old The value to compare with.
2597 * @param pi64Old Pointer store the old value at.
2598 */
2599DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
2600{
2601 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
2602}
2603
2604
2605/** @def ASMAtomicCmpXchgExSize
2606 * Atomically Compare and Exchange a value which size might differ
2607 * between platforms or compilers. Additionally passes back old value.
2608 *
2609 * @param pu Pointer to the value to update.
2610 * @param uNew The new value to assigned to *pu.
2611 * @param uOld The old value to *pu compare with.
2612 * @param fRc Where to store the result.
2613 * @param uOldVal Where to store the old value.
2614 */
2615#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, uOldVal) \
2616 do { \
2617 switch (sizeof(*(pu))) { \
2618 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)&(uOldVal)); \
2619 break; \
2620 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)&(uOldVal)); \
2621 break; \
2622 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2623 (fRc) = false; \
2624 (uOldVal) = 0; \
2625 break; \
2626 } \
2627 } while (0)
2628
2629
2630/**
2631 * Atomically Compare and Exchange a pointer value, additionally
2632 * passing back old value, ordered.
2633 *
2634 * @returns true if xchg was done.
2635 * @returns false if xchg wasn't done.
2636 *
2637 * @param ppv Pointer to the value to update.
2638 * @param pvNew The new value to assigned to *ppv.
2639 * @param pvOld The old value to *ppv compare with.
2640 * @param ppvOld Pointer store the old value at.
2641 */
2642DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, void *pvNew, void *pvOld, void **ppvOld)
2643{
2644#if ARCH_BITS == 32
2645 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
2646#elif ARCH_BITS == 64
2647 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
2648#else
2649# error "ARCH_BITS is bogus"
2650#endif
2651}
2652
2653
2654/**
2655 * Atomically exchanges and adds to a 32-bit value, ordered.
2656 *
2657 * @returns The old value.
2658 * @param pu32 Pointer to the value.
2659 * @param u32 Number to add.
2660 */
2661#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2662DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
2663#else
2664DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
2665{
2666# if RT_INLINE_ASM_USES_INTRIN
2667 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
2668 return u32;
2669
2670# elif RT_INLINE_ASM_GNU_STYLE
2671 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2672 : "=r" (u32),
2673 "=m" (*pu32)
2674 : "0" (u32)
2675 : "memory");
2676 return u32;
2677# else
2678 __asm
2679 {
2680 mov eax, [u32]
2681# ifdef RT_ARCH_AMD64
2682 mov rdx, [pu32]
2683 lock xadd [rdx], eax
2684# else
2685 mov edx, [pu32]
2686 lock xadd [edx], eax
2687# endif
2688 mov [u32], eax
2689 }
2690 return u32;
2691# endif
2692}
2693#endif
2694
2695
2696/**
2697 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2698 *
2699 * @returns The old value.
2700 * @param pi32 Pointer to the value.
2701 * @param i32 Number to add.
2702 */
2703DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
2704{
2705 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
2706}
2707
2708
2709/**
2710 * Atomically increment a 32-bit value, ordered.
2711 *
2712 * @returns The new value.
2713 * @param pu32 Pointer to the value to increment.
2714 */
2715#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2716DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2717#else
2718DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2719{
2720 uint32_t u32;
2721# if RT_INLINE_ASM_USES_INTRIN
2722 u32 = _InterlockedIncrement((long *)pu32);
2723 return u32;
2724
2725# elif RT_INLINE_ASM_GNU_STYLE
2726 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2727 : "=r" (u32),
2728 "=m" (*pu32)
2729 : "0" (1)
2730 : "memory");
2731 return u32+1;
2732# else
2733 __asm
2734 {
2735 mov eax, 1
2736# ifdef RT_ARCH_AMD64
2737 mov rdx, [pu32]
2738 lock xadd [rdx], eax
2739# else
2740 mov edx, [pu32]
2741 lock xadd [edx], eax
2742# endif
2743 mov u32, eax
2744 }
2745 return u32+1;
2746# endif
2747}
2748#endif
2749
2750
2751/**
2752 * Atomically increment a signed 32-bit value, ordered.
2753 *
2754 * @returns The new value.
2755 * @param pi32 Pointer to the value to increment.
2756 */
2757DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2758{
2759 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2760}
2761
2762
2763/**
2764 * Atomically decrement an unsigned 32-bit value, ordered.
2765 *
2766 * @returns The new value.
2767 * @param pu32 Pointer to the value to decrement.
2768 */
2769#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2770DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2771#else
2772DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2773{
2774 uint32_t u32;
2775# if RT_INLINE_ASM_USES_INTRIN
2776 u32 = _InterlockedDecrement((long *)pu32);
2777 return u32;
2778
2779# elif RT_INLINE_ASM_GNU_STYLE
2780 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2781 : "=r" (u32),
2782 "=m" (*pu32)
2783 : "0" (-1)
2784 : "memory");
2785 return u32-1;
2786# else
2787 __asm
2788 {
2789 mov eax, -1
2790# ifdef RT_ARCH_AMD64
2791 mov rdx, [pu32]
2792 lock xadd [rdx], eax
2793# else
2794 mov edx, [pu32]
2795 lock xadd [edx], eax
2796# endif
2797 mov u32, eax
2798 }
2799 return u32-1;
2800# endif
2801}
2802#endif
2803
2804
2805/**
2806 * Atomically decrement a signed 32-bit value, ordered.
2807 *
2808 * @returns The new value.
2809 * @param pi32 Pointer to the value to decrement.
2810 */
2811DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2812{
2813 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2814}
2815
2816
2817/**
2818 * Atomically Or an unsigned 32-bit value, ordered.
2819 *
2820 * @param pu32 Pointer to the pointer variable to OR u32 with.
2821 * @param u32 The value to OR *pu32 with.
2822 */
2823#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2824DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2825#else
2826DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2827{
2828# if RT_INLINE_ASM_USES_INTRIN
2829 _InterlockedOr((long volatile *)pu32, (long)u32);
2830
2831# elif RT_INLINE_ASM_GNU_STYLE
2832 __asm__ __volatile__("lock; orl %1, %0\n\t"
2833 : "=m" (*pu32)
2834 : "ir" (u32));
2835# else
2836 __asm
2837 {
2838 mov eax, [u32]
2839# ifdef RT_ARCH_AMD64
2840 mov rdx, [pu32]
2841 lock or [rdx], eax
2842# else
2843 mov edx, [pu32]
2844 lock or [edx], eax
2845# endif
2846 }
2847# endif
2848}
2849#endif
2850
2851
2852/**
2853 * Atomically Or a signed 32-bit value, ordered.
2854 *
2855 * @param pi32 Pointer to the pointer variable to OR u32 with.
2856 * @param i32 The value to OR *pu32 with.
2857 */
2858DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2859{
2860 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2861}
2862
2863
2864/**
2865 * Atomically And an unsigned 32-bit value, ordered.
2866 *
2867 * @param pu32 Pointer to the pointer variable to AND u32 with.
2868 * @param u32 The value to AND *pu32 with.
2869 */
2870#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2871DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
2872#else
2873DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
2874{
2875# if RT_INLINE_ASM_USES_INTRIN
2876 _InterlockedAnd((long volatile *)pu32, u32);
2877
2878# elif RT_INLINE_ASM_GNU_STYLE
2879 __asm__ __volatile__("lock; andl %1, %0\n\t"
2880 : "=m" (*pu32)
2881 : "ir" (u32));
2882# else
2883 __asm
2884 {
2885 mov eax, [u32]
2886# ifdef RT_ARCH_AMD64
2887 mov rdx, [pu32]
2888 lock and [rdx], eax
2889# else
2890 mov edx, [pu32]
2891 lock and [edx], eax
2892# endif
2893 }
2894# endif
2895}
2896#endif
2897
2898
2899/**
2900 * Atomically And a signed 32-bit value, ordered.
2901 *
2902 * @param pi32 Pointer to the pointer variable to AND i32 with.
2903 * @param i32 The value to AND *pi32 with.
2904 */
2905DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
2906{
2907 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
2908}
2909
2910
2911/**
2912 * Memory fence, waits for any pending writes and reads to complete.
2913 */
2914DECLINLINE(void) ASMMemoryFence(void)
2915{
2916 /** @todo use mfence? check if all cpus we care for support it. */
2917 uint32_t volatile u32;
2918 ASMAtomicXchgU32(&u32, 0);
2919}
2920
2921
2922/**
2923 * Write fence, waits for any pending writes to complete.
2924 */
2925DECLINLINE(void) ASMWriteFence(void)
2926{
2927 /** @todo use sfence? check if all cpus we care for support it. */
2928 ASMMemoryFence();
2929}
2930
2931
2932/**
2933 * Read fence, waits for any pending reads to complete.
2934 */
2935DECLINLINE(void) ASMReadFence(void)
2936{
2937 /** @todo use lfence? check if all cpus we care for support it. */
2938 ASMMemoryFence();
2939}
2940
2941
2942/**
2943 * Atomically reads an unsigned 8-bit value, ordered.
2944 *
2945 * @returns Current *pu8 value
2946 * @param pu8 Pointer to the 8-bit variable to read.
2947 */
2948DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
2949{
2950 ASMMemoryFence();
2951 return *pu8; /* byte reads are atomic on x86 */
2952}
2953
2954
2955/**
2956 * Atomically reads an unsigned 8-bit value, unordered.
2957 *
2958 * @returns Current *pu8 value
2959 * @param pu8 Pointer to the 8-bit variable to read.
2960 */
2961DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
2962{
2963 return *pu8; /* byte reads are atomic on x86 */
2964}
2965
2966
2967/**
2968 * Atomically reads a signed 8-bit value, ordered.
2969 *
2970 * @returns Current *pi8 value
2971 * @param pi8 Pointer to the 8-bit variable to read.
2972 */
2973DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
2974{
2975 ASMMemoryFence();
2976 return *pi8; /* byte reads are atomic on x86 */
2977}
2978
2979
2980/**
2981 * Atomically reads a signed 8-bit value, unordered.
2982 *
2983 * @returns Current *pi8 value
2984 * @param pi8 Pointer to the 8-bit variable to read.
2985 */
2986DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
2987{
2988 return *pi8; /* byte reads are atomic on x86 */
2989}
2990
2991
2992/**
2993 * Atomically reads an unsigned 16-bit value, ordered.
2994 *
2995 * @returns Current *pu16 value
2996 * @param pu16 Pointer to the 16-bit variable to read.
2997 */
2998DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
2999{
3000 ASMMemoryFence();
3001 Assert(!((uintptr_t)pu16 & 1));
3002 return *pu16;
3003}
3004
3005
3006/**
3007 * Atomically reads an unsigned 16-bit value, unordered.
3008 *
3009 * @returns Current *pu16 value
3010 * @param pu16 Pointer to the 16-bit variable to read.
3011 */
3012DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
3013{
3014 Assert(!((uintptr_t)pu16 & 1));
3015 return *pu16;
3016}
3017
3018
3019/**
3020 * Atomically reads a signed 16-bit value, ordered.
3021 *
3022 * @returns Current *pi16 value
3023 * @param pi16 Pointer to the 16-bit variable to read.
3024 */
3025DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
3026{
3027 ASMMemoryFence();
3028 Assert(!((uintptr_t)pi16 & 1));
3029 return *pi16;
3030}
3031
3032
3033/**
3034 * Atomically reads a signed 16-bit value, unordered.
3035 *
3036 * @returns Current *pi16 value
3037 * @param pi16 Pointer to the 16-bit variable to read.
3038 */
3039DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
3040{
3041 Assert(!((uintptr_t)pi16 & 1));
3042 return *pi16;
3043}
3044
3045
3046/**
3047 * Atomically reads an unsigned 32-bit value, ordered.
3048 *
3049 * @returns Current *pu32 value
3050 * @param pu32 Pointer to the 32-bit variable to read.
3051 */
3052DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
3053{
3054 ASMMemoryFence();
3055 Assert(!((uintptr_t)pu32 & 3));
3056 return *pu32;
3057}
3058
3059
3060/**
3061 * Atomically reads an unsigned 32-bit value, unordered.
3062 *
3063 * @returns Current *pu32 value
3064 * @param pu32 Pointer to the 32-bit variable to read.
3065 */
3066DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
3067{
3068 Assert(!((uintptr_t)pu32 & 3));
3069 return *pu32;
3070}
3071
3072
3073/**
3074 * Atomically reads a signed 32-bit value, ordered.
3075 *
3076 * @returns Current *pi32 value
3077 * @param pi32 Pointer to the 32-bit variable to read.
3078 */
3079DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
3080{
3081 ASMMemoryFence();
3082 Assert(!((uintptr_t)pi32 & 3));
3083 return *pi32;
3084}
3085
3086
3087/**
3088 * Atomically reads a signed 32-bit value, unordered.
3089 *
3090 * @returns Current *pi32 value
3091 * @param pi32 Pointer to the 32-bit variable to read.
3092 */
3093DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
3094{
3095 Assert(!((uintptr_t)pi32 & 3));
3096 return *pi32;
3097}
3098
3099
3100/**
3101 * Atomically reads an unsigned 64-bit value, ordered.
3102 *
3103 * @returns Current *pu64 value
3104 * @param pu64 Pointer to the 64-bit variable to read.
3105 * The memory pointed to must be writable.
3106 * @remark This will fault if the memory is read-only!
3107 */
3108#if RT_INLINE_ASM_EXTERNAL
3109DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
3110#else
3111DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
3112{
3113 uint64_t u64;
3114# ifdef RT_ARCH_AMD64
3115# if RT_INLINE_ASM_GNU_STYLE
3116 Assert(!((uintptr_t)pu64 & 7));
3117 __asm__ __volatile__( "mfence\n\t"
3118 "movq %1, %0\n\t"
3119 : "=r" (u64)
3120 : "m" (*pu64));
3121# else
3122 __asm
3123 {
3124 mfence
3125 mov rdx, [pu64]
3126 mov rax, [rdx]
3127 mov [u64], rax
3128 }
3129# endif
3130# else /* !RT_ARCH_AMD64 */
3131# if RT_INLINE_ASM_GNU_STYLE
3132# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3133 uint32_t u32EBX = 0;
3134 Assert(!((uintptr_t)pu64 & 7));
3135 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3136 "lock; cmpxchg8b (%5)\n\t"
3137 "xchgl %%ebx, %3\n\t"
3138 : "=A" (u64),
3139 "=m" (*pu64)
3140 : "0" (0),
3141 "m" (u32EBX),
3142 "c" (0),
3143 "S" (pu64));
3144# else /* !PIC */
3145 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3146 : "=A" (u64),
3147 "=m" (*pu64)
3148 : "0" (0),
3149 "b" (0),
3150 "c" (0));
3151# endif
3152# else
3153 Assert(!((uintptr_t)pu64 & 7));
3154 __asm
3155 {
3156 xor eax, eax
3157 xor edx, edx
3158 mov edi, pu64
3159 xor ecx, ecx
3160 xor ebx, ebx
3161 lock cmpxchg8b [edi]
3162 mov dword ptr [u64], eax
3163 mov dword ptr [u64 + 4], edx
3164 }
3165# endif
3166# endif /* !RT_ARCH_AMD64 */
3167 return u64;
3168}
3169#endif
3170
3171
3172/**
3173 * Atomically reads an unsigned 64-bit value, unordered.
3174 *
3175 * @returns Current *pu64 value
3176 * @param pu64 Pointer to the 64-bit variable to read.
3177 * The memory pointed to must be writable.
3178 * @remark This will fault if the memory is read-only!
3179 */
3180#if RT_INLINE_ASM_EXTERNAL
3181DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
3182#else
3183DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
3184{
3185 uint64_t u64;
3186# ifdef RT_ARCH_AMD64
3187# if RT_INLINE_ASM_GNU_STYLE
3188 Assert(!((uintptr_t)pu64 & 7));
3189 __asm__ __volatile__("movq %1, %0\n\t"
3190 : "=r" (u64)
3191 : "m" (*pu64));
3192# else
3193 __asm
3194 {
3195 mov rdx, [pu64]
3196 mov rax, [rdx]
3197 mov [u64], rax
3198 }
3199# endif
3200# else /* !RT_ARCH_AMD64 */
3201# if RT_INLINE_ASM_GNU_STYLE
3202# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3203 uint32_t u32EBX = 0;
3204 Assert(!((uintptr_t)pu64 & 7));
3205 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3206 "lock; cmpxchg8b (%5)\n\t"
3207 "xchgl %%ebx, %3\n\t"
3208 : "=A" (u64),
3209 "=m" (*pu64)
3210 : "0" (0),
3211 "m" (u32EBX),
3212 "c" (0),
3213 "S" (pu64));
3214# else /* !PIC */
3215 __asm__ __volatile__("cmpxchg8b %1\n\t"
3216 : "=A" (u64),
3217 "=m" (*pu64)
3218 : "0" (0),
3219 "b" (0),
3220 "c" (0));
3221# endif
3222# else
3223 Assert(!((uintptr_t)pu64 & 7));
3224 __asm
3225 {
3226 xor eax, eax
3227 xor edx, edx
3228 mov edi, pu64
3229 xor ecx, ecx
3230 xor ebx, ebx
3231 lock cmpxchg8b [edi]
3232 mov dword ptr [u64], eax
3233 mov dword ptr [u64 + 4], edx
3234 }
3235# endif
3236# endif /* !RT_ARCH_AMD64 */
3237 return u64;
3238}
3239#endif
3240
3241
3242/**
3243 * Atomically reads a signed 64-bit value, ordered.
3244 *
3245 * @returns Current *pi64 value
3246 * @param pi64 Pointer to the 64-bit variable to read.
3247 * The memory pointed to must be writable.
3248 * @remark This will fault if the memory is read-only!
3249 */
3250DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
3251{
3252 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
3253}
3254
3255
3256/**
3257 * Atomically reads a signed 64-bit value, unordered.
3258 *
3259 * @returns Current *pi64 value
3260 * @param pi64 Pointer to the 64-bit variable to read.
3261 * The memory pointed to must be writable.
3262 * @remark This will fault if the memory is read-only!
3263 */
3264DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
3265{
3266 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
3267}
3268
3269
3270/**
3271 * Atomically reads a pointer value, ordered.
3272 *
3273 * @returns Current *pv value
3274 * @param ppv Pointer to the pointer variable to read.
3275 */
3276DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
3277{
3278#if ARCH_BITS == 32
3279 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
3280#elif ARCH_BITS == 64
3281 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
3282#else
3283# error "ARCH_BITS is bogus"
3284#endif
3285}
3286
3287
3288/**
3289 * Atomically reads a pointer value, unordered.
3290 *
3291 * @returns Current *pv value
3292 * @param ppv Pointer to the pointer variable to read.
3293 */
3294DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
3295{
3296#if ARCH_BITS == 32
3297 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
3298#elif ARCH_BITS == 64
3299 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
3300#else
3301# error "ARCH_BITS is bogus"
3302#endif
3303}
3304
3305
3306/**
3307 * Atomically reads a boolean value, ordered.
3308 *
3309 * @returns Current *pf value
3310 * @param pf Pointer to the boolean variable to read.
3311 */
3312DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
3313{
3314 ASMMemoryFence();
3315 return *pf; /* byte reads are atomic on x86 */
3316}
3317
3318
3319/**
3320 * Atomically reads a boolean value, unordered.
3321 *
3322 * @returns Current *pf value
3323 * @param pf Pointer to the boolean variable to read.
3324 */
3325DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
3326{
3327 return *pf; /* byte reads are atomic on x86 */
3328}
3329
3330
3331/**
3332 * Atomically read a value which size might differ
3333 * between platforms or compilers, ordered.
3334 *
3335 * @param pu Pointer to the variable to update.
3336 * @param puRes Where to store the result.
3337 */
3338#define ASMAtomicReadSize(pu, puRes) \
3339 do { \
3340 switch (sizeof(*(pu))) { \
3341 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3342 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
3343 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
3344 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
3345 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3346 } \
3347 } while (0)
3348
3349
3350/**
3351 * Atomically read a value which size might differ
3352 * between platforms or compilers, unordered.
3353 *
3354 * @param pu Pointer to the variable to update.
3355 * @param puRes Where to store the result.
3356 */
3357#define ASMAtomicUoReadSize(pu, puRes) \
3358 do { \
3359 switch (sizeof(*(pu))) { \
3360 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3361 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
3362 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
3363 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
3364 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3365 } \
3366 } while (0)
3367
3368
3369/**
3370 * Atomically writes an unsigned 8-bit value, ordered.
3371 *
3372 * @param pu8 Pointer to the 8-bit variable.
3373 * @param u8 The 8-bit value to assign to *pu8.
3374 */
3375DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
3376{
3377 ASMAtomicXchgU8(pu8, u8);
3378}
3379
3380
3381/**
3382 * Atomically writes an unsigned 8-bit value, unordered.
3383 *
3384 * @param pu8 Pointer to the 8-bit variable.
3385 * @param u8 The 8-bit value to assign to *pu8.
3386 */
3387DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
3388{
3389 *pu8 = u8; /* byte writes are atomic on x86 */
3390}
3391
3392
3393/**
3394 * Atomically writes a signed 8-bit value, ordered.
3395 *
3396 * @param pi8 Pointer to the 8-bit variable to read.
3397 * @param i8 The 8-bit value to assign to *pi8.
3398 */
3399DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
3400{
3401 ASMAtomicXchgS8(pi8, i8);
3402}
3403
3404
3405/**
3406 * Atomically writes a signed 8-bit value, unordered.
3407 *
3408 * @param pi8 Pointer to the 8-bit variable to read.
3409 * @param i8 The 8-bit value to assign to *pi8.
3410 */
3411DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
3412{
3413 *pi8 = i8; /* byte writes are atomic on x86 */
3414}
3415
3416
3417/**
3418 * Atomically writes an unsigned 16-bit value, ordered.
3419 *
3420 * @param pu16 Pointer to the 16-bit variable.
3421 * @param u16 The 16-bit value to assign to *pu16.
3422 */
3423DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
3424{
3425 ASMAtomicXchgU16(pu16, u16);
3426}
3427
3428
3429/**
3430 * Atomically writes an unsigned 16-bit value, unordered.
3431 *
3432 * @param pu16 Pointer to the 16-bit variable.
3433 * @param u16 The 16-bit value to assign to *pu16.
3434 */
3435DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
3436{
3437 Assert(!((uintptr_t)pu16 & 1));
3438 *pu16 = u16;
3439}
3440
3441
3442/**
3443 * Atomically writes a signed 16-bit value, ordered.
3444 *
3445 * @param pi16 Pointer to the 16-bit variable to read.
3446 * @param i16 The 16-bit value to assign to *pi16.
3447 */
3448DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
3449{
3450 ASMAtomicXchgS16(pi16, i16);
3451}
3452
3453
3454/**
3455 * Atomically writes a signed 16-bit value, unordered.
3456 *
3457 * @param pi16 Pointer to the 16-bit variable to read.
3458 * @param i16 The 16-bit value to assign to *pi16.
3459 */
3460DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
3461{
3462 Assert(!((uintptr_t)pi16 & 1));
3463 *pi16 = i16;
3464}
3465
3466
3467/**
3468 * Atomically writes an unsigned 32-bit value, ordered.
3469 *
3470 * @param pu32 Pointer to the 32-bit variable.
3471 * @param u32 The 32-bit value to assign to *pu32.
3472 */
3473DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
3474{
3475 ASMAtomicXchgU32(pu32, u32);
3476}
3477
3478
3479/**
3480 * Atomically writes an unsigned 32-bit value, unordered.
3481 *
3482 * @param pu32 Pointer to the 32-bit variable.
3483 * @param u32 The 32-bit value to assign to *pu32.
3484 */
3485DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
3486{
3487 Assert(!((uintptr_t)pu32 & 3));
3488 *pu32 = u32;
3489}
3490
3491
3492/**
3493 * Atomically writes a signed 32-bit value, ordered.
3494 *
3495 * @param pi32 Pointer to the 32-bit variable to read.
3496 * @param i32 The 32-bit value to assign to *pi32.
3497 */
3498DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
3499{
3500 ASMAtomicXchgS32(pi32, i32);
3501}
3502
3503
3504/**
3505 * Atomically writes a signed 32-bit value, unordered.
3506 *
3507 * @param pi32 Pointer to the 32-bit variable to read.
3508 * @param i32 The 32-bit value to assign to *pi32.
3509 */
3510DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
3511{
3512 Assert(!((uintptr_t)pi32 & 3));
3513 *pi32 = i32;
3514}
3515
3516
3517/**
3518 * Atomically writes an unsigned 64-bit value, ordered.
3519 *
3520 * @param pu64 Pointer to the 64-bit variable.
3521 * @param u64 The 64-bit value to assign to *pu64.
3522 */
3523DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
3524{
3525 ASMAtomicXchgU64(pu64, u64);
3526}
3527
3528
3529/**
3530 * Atomically writes an unsigned 64-bit value, unordered.
3531 *
3532 * @param pu64 Pointer to the 64-bit variable.
3533 * @param u64 The 64-bit value to assign to *pu64.
3534 */
3535DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
3536{
3537 Assert(!((uintptr_t)pu64 & 7));
3538#if ARCH_BITS == 64
3539 *pu64 = u64;
3540#else
3541 ASMAtomicXchgU64(pu64, u64);
3542#endif
3543}
3544
3545
3546/**
3547 * Atomically writes a signed 64-bit value, ordered.
3548 *
3549 * @param pi64 Pointer to the 64-bit variable.
3550 * @param i64 The 64-bit value to assign to *pi64.
3551 */
3552DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
3553{
3554 ASMAtomicXchgS64(pi64, i64);
3555}
3556
3557
3558/**
3559 * Atomically writes a signed 64-bit value, unordered.
3560 *
3561 * @param pi64 Pointer to the 64-bit variable.
3562 * @param i64 The 64-bit value to assign to *pi64.
3563 */
3564DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
3565{
3566 Assert(!((uintptr_t)pi64 & 7));
3567#if ARCH_BITS == 64
3568 *pi64 = i64;
3569#else
3570 ASMAtomicXchgS64(pi64, i64);
3571#endif
3572}
3573
3574
3575/**
3576 * Atomically writes a boolean value, unordered.
3577 *
3578 * @param pf Pointer to the boolean variable.
3579 * @param f The boolean value to assign to *pf.
3580 */
3581DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
3582{
3583 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
3584}
3585
3586
3587/**
3588 * Atomically writes a boolean value, unordered.
3589 *
3590 * @param pf Pointer to the boolean variable.
3591 * @param f The boolean value to assign to *pf.
3592 */
3593DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
3594{
3595 *pf = f; /* byte writes are atomic on x86 */
3596}
3597
3598
3599/**
3600 * Atomically writes a pointer value, ordered.
3601 *
3602 * @returns Current *pv value
3603 * @param ppv Pointer to the pointer variable.
3604 * @param pv The pointer value to assigne to *ppv.
3605 */
3606DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, void *pv)
3607{
3608#if ARCH_BITS == 32
3609 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
3610#elif ARCH_BITS == 64
3611 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
3612#else
3613# error "ARCH_BITS is bogus"
3614#endif
3615}
3616
3617
3618/**
3619 * Atomically writes a pointer value, unordered.
3620 *
3621 * @returns Current *pv value
3622 * @param ppv Pointer to the pointer variable.
3623 * @param pv The pointer value to assigne to *ppv.
3624 */
3625DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, void *pv)
3626{
3627#if ARCH_BITS == 32
3628 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
3629#elif ARCH_BITS == 64
3630 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
3631#else
3632# error "ARCH_BITS is bogus"
3633#endif
3634}
3635
3636
3637/**
3638 * Atomically write a value which size might differ
3639 * between platforms or compilers, ordered.
3640 *
3641 * @param pu Pointer to the variable to update.
3642 * @param uNew The value to assign to *pu.
3643 */
3644#define ASMAtomicWriteSize(pu, uNew) \
3645 do { \
3646 switch (sizeof(*(pu))) { \
3647 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
3648 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3649 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3650 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3651 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3652 } \
3653 } while (0)
3654
3655/**
3656 * Atomically write a value which size might differ
3657 * between platforms or compilers, unordered.
3658 *
3659 * @param pu Pointer to the variable to update.
3660 * @param uNew The value to assign to *pu.
3661 */
3662#define ASMAtomicUoWriteSize(pu, uNew) \
3663 do { \
3664 switch (sizeof(*(pu))) { \
3665 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
3666 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3667 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3668 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3669 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3670 } \
3671 } while (0)
3672
3673
3674
3675
3676/**
3677 * Invalidate page.
3678 *
3679 * @param pv Address of the page to invalidate.
3680 */
3681#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3682DECLASM(void) ASMInvalidatePage(void *pv);
3683#else
3684DECLINLINE(void) ASMInvalidatePage(void *pv)
3685{
3686# if RT_INLINE_ASM_USES_INTRIN
3687 __invlpg(pv);
3688
3689# elif RT_INLINE_ASM_GNU_STYLE
3690 __asm__ __volatile__("invlpg %0\n\t"
3691 : : "m" (*(uint8_t *)pv));
3692# else
3693 __asm
3694 {
3695# ifdef RT_ARCH_AMD64
3696 mov rax, [pv]
3697 invlpg [rax]
3698# else
3699 mov eax, [pv]
3700 invlpg [eax]
3701# endif
3702 }
3703# endif
3704}
3705#endif
3706
3707
3708#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3709# if PAGE_SIZE != 0x1000
3710# error "PAGE_SIZE is not 0x1000!"
3711# endif
3712#endif
3713
3714/**
3715 * Zeros a 4K memory page.
3716 *
3717 * @param pv Pointer to the memory block. This must be page aligned.
3718 */
3719#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3720DECLASM(void) ASMMemZeroPage(volatile void *pv);
3721# else
3722DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
3723{
3724# if RT_INLINE_ASM_USES_INTRIN
3725# ifdef RT_ARCH_AMD64
3726 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
3727# else
3728 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
3729# endif
3730
3731# elif RT_INLINE_ASM_GNU_STYLE
3732 RTUINTREG uDummy;
3733# ifdef RT_ARCH_AMD64
3734 __asm__ __volatile__ ("rep stosq"
3735 : "=D" (pv),
3736 "=c" (uDummy)
3737 : "0" (pv),
3738 "c" (0x1000 >> 3),
3739 "a" (0)
3740 : "memory");
3741# else
3742 __asm__ __volatile__ ("rep stosl"
3743 : "=D" (pv),
3744 "=c" (uDummy)
3745 : "0" (pv),
3746 "c" (0x1000 >> 2),
3747 "a" (0)
3748 : "memory");
3749# endif
3750# else
3751 __asm
3752 {
3753# ifdef RT_ARCH_AMD64
3754 xor rax, rax
3755 mov ecx, 0200h
3756 mov rdi, [pv]
3757 rep stosq
3758# else
3759 xor eax, eax
3760 mov ecx, 0400h
3761 mov edi, [pv]
3762 rep stosd
3763# endif
3764 }
3765# endif
3766}
3767# endif
3768
3769
3770/**
3771 * Zeros a memory block with a 32-bit aligned size.
3772 *
3773 * @param pv Pointer to the memory block.
3774 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3775 */
3776#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3777DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3778#else
3779DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3780{
3781# if RT_INLINE_ASM_USES_INTRIN
3782 __stosd((unsigned long *)pv, 0, cb >> 2);
3783
3784# elif RT_INLINE_ASM_GNU_STYLE
3785 __asm__ __volatile__ ("rep stosl"
3786 : "=D" (pv),
3787 "=c" (cb)
3788 : "0" (pv),
3789 "1" (cb >> 2),
3790 "a" (0)
3791 : "memory");
3792# else
3793 __asm
3794 {
3795 xor eax, eax
3796# ifdef RT_ARCH_AMD64
3797 mov rcx, [cb]
3798 shr rcx, 2
3799 mov rdi, [pv]
3800# else
3801 mov ecx, [cb]
3802 shr ecx, 2
3803 mov edi, [pv]
3804# endif
3805 rep stosd
3806 }
3807# endif
3808}
3809#endif
3810
3811
3812/**
3813 * Fills a memory block with a 32-bit aligned size.
3814 *
3815 * @param pv Pointer to the memory block.
3816 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3817 * @param u32 The value to fill with.
3818 */
3819#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3820DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3821#else
3822DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3823{
3824# if RT_INLINE_ASM_USES_INTRIN
3825 __stosd((unsigned long *)pv, 0, cb >> 2);
3826
3827# elif RT_INLINE_ASM_GNU_STYLE
3828 __asm__ __volatile__ ("rep stosl"
3829 : "=D" (pv),
3830 "=c" (cb)
3831 : "0" (pv),
3832 "1" (cb >> 2),
3833 "a" (u32)
3834 : "memory");
3835# else
3836 __asm
3837 {
3838# ifdef RT_ARCH_AMD64
3839 mov rcx, [cb]
3840 shr rcx, 2
3841 mov rdi, [pv]
3842# else
3843 mov ecx, [cb]
3844 shr ecx, 2
3845 mov edi, [pv]
3846# endif
3847 mov eax, [u32]
3848 rep stosd
3849 }
3850# endif
3851}
3852#endif
3853
3854
3855/**
3856 * Checks if a memory block is filled with the specified byte.
3857 *
3858 * This is a sort of inverted memchr.
3859 *
3860 * @returns Pointer to the byte which doesn't equal u8.
3861 * @returns NULL if all equal to u8.
3862 *
3863 * @param pv Pointer to the memory block.
3864 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3865 * @param u8 The value it's supposed to be filled with.
3866 */
3867#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3868DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
3869#else
3870DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
3871{
3872/** @todo rewrite this in inline assembly? */
3873 uint8_t const *pb = (uint8_t const *)pv;
3874 for (; cb; cb--, pb++)
3875 if (RT_UNLIKELY(*pb != u8))
3876 return (void *)pb;
3877 return NULL;
3878}
3879#endif
3880
3881
3882/**
3883 * Checks if a memory block is filled with the specified 32-bit value.
3884 *
3885 * This is a sort of inverted memchr.
3886 *
3887 * @returns Pointer to the first value which doesn't equal u32.
3888 * @returns NULL if all equal to u32.
3889 *
3890 * @param pv Pointer to the memory block.
3891 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3892 * @param u32 The value it's supposed to be filled with.
3893 */
3894#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3895DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
3896#else
3897DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
3898{
3899/** @todo rewrite this in inline assembly? */
3900 uint32_t const *pu32 = (uint32_t const *)pv;
3901 for (; cb; cb -= 4, pu32++)
3902 if (RT_UNLIKELY(*pu32 != u32))
3903 return (uint32_t *)pu32;
3904 return NULL;
3905}
3906#endif
3907
3908
3909/**
3910 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
3911 *
3912 * @returns u32F1 * u32F2.
3913 */
3914#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3915DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
3916#else
3917DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
3918{
3919# ifdef RT_ARCH_AMD64
3920 return (uint64_t)u32F1 * u32F2;
3921# else /* !RT_ARCH_AMD64 */
3922 uint64_t u64;
3923# if RT_INLINE_ASM_GNU_STYLE
3924 __asm__ __volatile__("mull %%edx"
3925 : "=A" (u64)
3926 : "a" (u32F2), "d" (u32F1));
3927# else
3928 __asm
3929 {
3930 mov edx, [u32F1]
3931 mov eax, [u32F2]
3932 mul edx
3933 mov dword ptr [u64], eax
3934 mov dword ptr [u64 + 4], edx
3935 }
3936# endif
3937 return u64;
3938# endif /* !RT_ARCH_AMD64 */
3939}
3940#endif
3941
3942
3943/**
3944 * Multiplies two signed 32-bit values returning a signed 64-bit result.
3945 *
3946 * @returns u32F1 * u32F2.
3947 */
3948#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3949DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
3950#else
3951DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
3952{
3953# ifdef RT_ARCH_AMD64
3954 return (int64_t)i32F1 * i32F2;
3955# else /* !RT_ARCH_AMD64 */
3956 int64_t i64;
3957# if RT_INLINE_ASM_GNU_STYLE
3958 __asm__ __volatile__("imull %%edx"
3959 : "=A" (i64)
3960 : "a" (i32F2), "d" (i32F1));
3961# else
3962 __asm
3963 {
3964 mov edx, [i32F1]
3965 mov eax, [i32F2]
3966 imul edx
3967 mov dword ptr [i64], eax
3968 mov dword ptr [i64 + 4], edx
3969 }
3970# endif
3971 return i64;
3972# endif /* !RT_ARCH_AMD64 */
3973}
3974#endif
3975
3976
3977/**
3978 * Devides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
3979 *
3980 * @returns u64 / u32.
3981 */
3982#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3983DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
3984#else
3985DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
3986{
3987# ifdef RT_ARCH_AMD64
3988 return (uint32_t)(u64 / u32);
3989# else /* !RT_ARCH_AMD64 */
3990# if RT_INLINE_ASM_GNU_STYLE
3991 RTUINTREG uDummy;
3992 __asm__ __volatile__("divl %3"
3993 : "=a" (u32), "=d"(uDummy)
3994 : "A" (u64), "r" (u32));
3995# else
3996 __asm
3997 {
3998 mov eax, dword ptr [u64]
3999 mov edx, dword ptr [u64 + 4]
4000 mov ecx, [u32]
4001 div ecx
4002 mov [u32], eax
4003 }
4004# endif
4005 return u32;
4006# endif /* !RT_ARCH_AMD64 */
4007}
4008#endif
4009
4010
4011/**
4012 * Devides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
4013 *
4014 * @returns u64 / u32.
4015 */
4016#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4017DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
4018#else
4019DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
4020{
4021# ifdef RT_ARCH_AMD64
4022 return (int32_t)(i64 / i32);
4023# else /* !RT_ARCH_AMD64 */
4024# if RT_INLINE_ASM_GNU_STYLE
4025 RTUINTREG iDummy;
4026 __asm__ __volatile__("idivl %3"
4027 : "=a" (i32), "=d"(iDummy)
4028 : "A" (i64), "r" (i32));
4029# else
4030 __asm
4031 {
4032 mov eax, dword ptr [i64]
4033 mov edx, dword ptr [i64 + 4]
4034 mov ecx, [i32]
4035 idiv ecx
4036 mov [i32], eax
4037 }
4038# endif
4039 return i32;
4040# endif /* !RT_ARCH_AMD64 */
4041}
4042#endif
4043
4044
4045/**
4046 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
4047 * using a 96 bit intermediate result.
4048 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
4049 * __udivdi3 and __umoddi3 even if this inline function is not used.
4050 *
4051 * @returns (u64A * u32B) / u32C.
4052 * @param u64A The 64-bit value.
4053 * @param u32B The 32-bit value to multiple by A.
4054 * @param u32C The 32-bit value to divide A*B by.
4055 */
4056#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
4057DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
4058#else
4059DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
4060{
4061# if RT_INLINE_ASM_GNU_STYLE
4062# ifdef RT_ARCH_AMD64
4063 uint64_t u64Result, u64Spill;
4064 __asm__ __volatile__("mulq %2\n\t"
4065 "divq %3\n\t"
4066 : "=a" (u64Result),
4067 "=d" (u64Spill)
4068 : "r" ((uint64_t)u32B),
4069 "r" ((uint64_t)u32C),
4070 "0" (u64A),
4071 "1" (0));
4072 return u64Result;
4073# else
4074 uint32_t u32Dummy;
4075 uint64_t u64Result;
4076 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
4077 edx = u64Lo.hi = (u64A.lo * u32B).hi */
4078 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
4079 eax = u64A.hi */
4080 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
4081 edx = u32C */
4082 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
4083 edx = u32B */
4084 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
4085 edx = u64Hi.hi = (u64A.hi * u32B).hi */
4086 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
4087 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
4088 "divl %%ecx \n\t" /* eax = u64Hi / u32C
4089 edx = u64Hi % u32C */
4090 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
4091 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
4092 "divl %%ecx \n\t" /* u64Result.lo */
4093 "movl %%edi,%%edx \n\t" /* u64Result.hi */
4094 : "=A"(u64Result), "=c"(u32Dummy),
4095 "=S"(u32Dummy), "=D"(u32Dummy)
4096 : "a"((uint32_t)u64A),
4097 "S"((uint32_t)(u64A >> 32)),
4098 "c"(u32B),
4099 "D"(u32C));
4100 return u64Result;
4101# endif
4102# else
4103 RTUINT64U u;
4104 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
4105 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
4106 u64Hi += (u64Lo >> 32);
4107 u.s.Hi = (uint32_t)(u64Hi / u32C);
4108 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
4109 return u.u;
4110# endif
4111}
4112#endif
4113
4114
4115/**
4116 * Probes a byte pointer for read access.
4117 *
4118 * While the function will not fault if the byte is not read accessible,
4119 * the idea is to do this in a safe place like before acquiring locks
4120 * and such like.
4121 *
4122 * Also, this functions guarantees that an eager compiler is not going
4123 * to optimize the probing away.
4124 *
4125 * @param pvByte Pointer to the byte.
4126 */
4127#if RT_INLINE_ASM_EXTERNAL
4128DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
4129#else
4130DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
4131{
4132 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4133 uint8_t u8;
4134# if RT_INLINE_ASM_GNU_STYLE
4135 __asm__ __volatile__("movb (%1), %0\n\t"
4136 : "=r" (u8)
4137 : "r" (pvByte));
4138# else
4139 __asm
4140 {
4141# ifdef RT_ARCH_AMD64
4142 mov rax, [pvByte]
4143 mov al, [rax]
4144# else
4145 mov eax, [pvByte]
4146 mov al, [eax]
4147# endif
4148 mov [u8], al
4149 }
4150# endif
4151 return u8;
4152}
4153#endif
4154
4155/**
4156 * Probes a buffer for read access page by page.
4157 *
4158 * While the function will fault if the buffer is not fully read
4159 * accessible, the idea is to do this in a safe place like before
4160 * acquiring locks and such like.
4161 *
4162 * Also, this functions guarantees that an eager compiler is not going
4163 * to optimize the probing away.
4164 *
4165 * @param pvBuf Pointer to the buffer.
4166 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4167 */
4168DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
4169{
4170 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4171 /* the first byte */
4172 const uint8_t *pu8 = (const uint8_t *)pvBuf;
4173 ASMProbeReadByte(pu8);
4174
4175 /* the pages in between pages. */
4176 while (cbBuf > /*PAGE_SIZE*/0x1000)
4177 {
4178 ASMProbeReadByte(pu8);
4179 cbBuf -= /*PAGE_SIZE*/0x1000;
4180 pu8 += /*PAGE_SIZE*/0x1000;
4181 }
4182
4183 /* the last byte */
4184 ASMProbeReadByte(pu8 + cbBuf - 1);
4185}
4186
4187
4188/** @def ASMBreakpoint
4189 * Debugger Breakpoint.
4190 * @remark In the gnu world we add a nop instruction after the int3 to
4191 * force gdb to remain at the int3 source line.
4192 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
4193 * @internal
4194 */
4195#if RT_INLINE_ASM_GNU_STYLE
4196# ifndef __L4ENV__
4197# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
4198# else
4199# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
4200# endif
4201#else
4202# define ASMBreakpoint() __debugbreak()
4203#endif
4204
4205
4206
4207/** @defgroup grp_inline_bits Bit Operations
4208 * @{
4209 */
4210
4211
4212/**
4213 * Sets a bit in a bitmap.
4214 *
4215 * @param pvBitmap Pointer to the bitmap.
4216 * @param iBit The bit to set.
4217 */
4218#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4219DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
4220#else
4221DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
4222{
4223# if RT_INLINE_ASM_USES_INTRIN
4224 _bittestandset((long *)pvBitmap, iBit);
4225
4226# elif RT_INLINE_ASM_GNU_STYLE
4227 __asm__ __volatile__ ("btsl %1, %0"
4228 : "=m" (*(volatile long *)pvBitmap)
4229 : "Ir" (iBit)
4230 : "memory");
4231# else
4232 __asm
4233 {
4234# ifdef RT_ARCH_AMD64
4235 mov rax, [pvBitmap]
4236 mov edx, [iBit]
4237 bts [rax], edx
4238# else
4239 mov eax, [pvBitmap]
4240 mov edx, [iBit]
4241 bts [eax], edx
4242# endif
4243 }
4244# endif
4245}
4246#endif
4247
4248
4249/**
4250 * Atomically sets a bit in a bitmap, ordered.
4251 *
4252 * @param pvBitmap Pointer to the bitmap.
4253 * @param iBit The bit to set.
4254 */
4255#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4256DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
4257#else
4258DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
4259{
4260# if RT_INLINE_ASM_USES_INTRIN
4261 _interlockedbittestandset((long *)pvBitmap, iBit);
4262# elif RT_INLINE_ASM_GNU_STYLE
4263 __asm__ __volatile__ ("lock; btsl %1, %0"
4264 : "=m" (*(volatile long *)pvBitmap)
4265 : "Ir" (iBit)
4266 : "memory");
4267# else
4268 __asm
4269 {
4270# ifdef RT_ARCH_AMD64
4271 mov rax, [pvBitmap]
4272 mov edx, [iBit]
4273 lock bts [rax], edx
4274# else
4275 mov eax, [pvBitmap]
4276 mov edx, [iBit]
4277 lock bts [eax], edx
4278# endif
4279 }
4280# endif
4281}
4282#endif
4283
4284
4285/**
4286 * Clears a bit in a bitmap.
4287 *
4288 * @param pvBitmap Pointer to the bitmap.
4289 * @param iBit The bit to clear.
4290 */
4291#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4292DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
4293#else
4294DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
4295{
4296# if RT_INLINE_ASM_USES_INTRIN
4297 _bittestandreset((long *)pvBitmap, iBit);
4298
4299# elif RT_INLINE_ASM_GNU_STYLE
4300 __asm__ __volatile__ ("btrl %1, %0"
4301 : "=m" (*(volatile long *)pvBitmap)
4302 : "Ir" (iBit)
4303 : "memory");
4304# else
4305 __asm
4306 {
4307# ifdef RT_ARCH_AMD64
4308 mov rax, [pvBitmap]
4309 mov edx, [iBit]
4310 btr [rax], edx
4311# else
4312 mov eax, [pvBitmap]
4313 mov edx, [iBit]
4314 btr [eax], edx
4315# endif
4316 }
4317# endif
4318}
4319#endif
4320
4321
4322/**
4323 * Atomically clears a bit in a bitmap, ordered.
4324 *
4325 * @param pvBitmap Pointer to the bitmap.
4326 * @param iBit The bit to toggle set.
4327 * @remark No memory barrier, take care on smp.
4328 */
4329#if RT_INLINE_ASM_EXTERNAL
4330DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
4331#else
4332DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
4333{
4334# if RT_INLINE_ASM_GNU_STYLE
4335 __asm__ __volatile__ ("lock; btrl %1, %0"
4336 : "=m" (*(volatile long *)pvBitmap)
4337 : "Ir" (iBit)
4338 : "memory");
4339# else
4340 __asm
4341 {
4342# ifdef RT_ARCH_AMD64
4343 mov rax, [pvBitmap]
4344 mov edx, [iBit]
4345 lock btr [rax], edx
4346# else
4347 mov eax, [pvBitmap]
4348 mov edx, [iBit]
4349 lock btr [eax], edx
4350# endif
4351 }
4352# endif
4353}
4354#endif
4355
4356
4357/**
4358 * Toggles a bit in a bitmap.
4359 *
4360 * @param pvBitmap Pointer to the bitmap.
4361 * @param iBit The bit to toggle.
4362 */
4363#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4364DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
4365#else
4366DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
4367{
4368# if RT_INLINE_ASM_USES_INTRIN
4369 _bittestandcomplement((long *)pvBitmap, iBit);
4370# elif RT_INLINE_ASM_GNU_STYLE
4371 __asm__ __volatile__ ("btcl %1, %0"
4372 : "=m" (*(volatile long *)pvBitmap)
4373 : "Ir" (iBit)
4374 : "memory");
4375# else
4376 __asm
4377 {
4378# ifdef RT_ARCH_AMD64
4379 mov rax, [pvBitmap]
4380 mov edx, [iBit]
4381 btc [rax], edx
4382# else
4383 mov eax, [pvBitmap]
4384 mov edx, [iBit]
4385 btc [eax], edx
4386# endif
4387 }
4388# endif
4389}
4390#endif
4391
4392
4393/**
4394 * Atomically toggles a bit in a bitmap, ordered.
4395 *
4396 * @param pvBitmap Pointer to the bitmap.
4397 * @param iBit The bit to test and set.
4398 */
4399#if RT_INLINE_ASM_EXTERNAL
4400DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
4401#else
4402DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
4403{
4404# if RT_INLINE_ASM_GNU_STYLE
4405 __asm__ __volatile__ ("lock; btcl %1, %0"
4406 : "=m" (*(volatile long *)pvBitmap)
4407 : "Ir" (iBit)
4408 : "memory");
4409# else
4410 __asm
4411 {
4412# ifdef RT_ARCH_AMD64
4413 mov rax, [pvBitmap]
4414 mov edx, [iBit]
4415 lock btc [rax], edx
4416# else
4417 mov eax, [pvBitmap]
4418 mov edx, [iBit]
4419 lock btc [eax], edx
4420# endif
4421 }
4422# endif
4423}
4424#endif
4425
4426
4427/**
4428 * Tests and sets a bit in a bitmap.
4429 *
4430 * @returns true if the bit was set.
4431 * @returns false if the bit was clear.
4432 * @param pvBitmap Pointer to the bitmap.
4433 * @param iBit The bit to test and set.
4434 */
4435#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4436DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4437#else
4438DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4439{
4440 union { bool f; uint32_t u32; uint8_t u8; } rc;
4441# if RT_INLINE_ASM_USES_INTRIN
4442 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
4443
4444# elif RT_INLINE_ASM_GNU_STYLE
4445 __asm__ __volatile__ ("btsl %2, %1\n\t"
4446 "setc %b0\n\t"
4447 "andl $1, %0\n\t"
4448 : "=q" (rc.u32),
4449 "=m" (*(volatile long *)pvBitmap)
4450 : "Ir" (iBit)
4451 : "memory");
4452# else
4453 __asm
4454 {
4455 mov edx, [iBit]
4456# ifdef RT_ARCH_AMD64
4457 mov rax, [pvBitmap]
4458 bts [rax], edx
4459# else
4460 mov eax, [pvBitmap]
4461 bts [eax], edx
4462# endif
4463 setc al
4464 and eax, 1
4465 mov [rc.u32], eax
4466 }
4467# endif
4468 return rc.f;
4469}
4470#endif
4471
4472
4473/**
4474 * Atomically tests and sets a bit in a bitmap, ordered.
4475 *
4476 * @returns true if the bit was set.
4477 * @returns false if the bit was clear.
4478 * @param pvBitmap Pointer to the bitmap.
4479 * @param iBit The bit to set.
4480 */
4481#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4482DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4483#else
4484DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4485{
4486 union { bool f; uint32_t u32; uint8_t u8; } rc;
4487# if RT_INLINE_ASM_USES_INTRIN
4488 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
4489# elif RT_INLINE_ASM_GNU_STYLE
4490 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
4491 "setc %b0\n\t"
4492 "andl $1, %0\n\t"
4493 : "=q" (rc.u32),
4494 "=m" (*(volatile long *)pvBitmap)
4495 : "Ir" (iBit)
4496 : "memory");
4497# else
4498 __asm
4499 {
4500 mov edx, [iBit]
4501# ifdef RT_ARCH_AMD64
4502 mov rax, [pvBitmap]
4503 lock bts [rax], edx
4504# else
4505 mov eax, [pvBitmap]
4506 lock bts [eax], edx
4507# endif
4508 setc al
4509 and eax, 1
4510 mov [rc.u32], eax
4511 }
4512# endif
4513 return rc.f;
4514}
4515#endif
4516
4517
4518/**
4519 * Tests and clears a bit in a bitmap.
4520 *
4521 * @returns true if the bit was set.
4522 * @returns false if the bit was clear.
4523 * @param pvBitmap Pointer to the bitmap.
4524 * @param iBit The bit to test and clear.
4525 */
4526#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4527DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4528#else
4529DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4530{
4531 union { bool f; uint32_t u32; uint8_t u8; } rc;
4532# if RT_INLINE_ASM_USES_INTRIN
4533 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
4534
4535# elif RT_INLINE_ASM_GNU_STYLE
4536 __asm__ __volatile__ ("btrl %2, %1\n\t"
4537 "setc %b0\n\t"
4538 "andl $1, %0\n\t"
4539 : "=q" (rc.u32),
4540 "=m" (*(volatile long *)pvBitmap)
4541 : "Ir" (iBit)
4542 : "memory");
4543# else
4544 __asm
4545 {
4546 mov edx, [iBit]
4547# ifdef RT_ARCH_AMD64
4548 mov rax, [pvBitmap]
4549 btr [rax], edx
4550# else
4551 mov eax, [pvBitmap]
4552 btr [eax], edx
4553# endif
4554 setc al
4555 and eax, 1
4556 mov [rc.u32], eax
4557 }
4558# endif
4559 return rc.f;
4560}
4561#endif
4562
4563
4564/**
4565 * Atomically tests and clears a bit in a bitmap, ordered.
4566 *
4567 * @returns true if the bit was set.
4568 * @returns false if the bit was clear.
4569 * @param pvBitmap Pointer to the bitmap.
4570 * @param iBit The bit to test and clear.
4571 * @remark No memory barrier, take care on smp.
4572 */
4573#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4574DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4575#else
4576DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4577{
4578 union { bool f; uint32_t u32; uint8_t u8; } rc;
4579# if RT_INLINE_ASM_USES_INTRIN
4580 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
4581
4582# elif RT_INLINE_ASM_GNU_STYLE
4583 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
4584 "setc %b0\n\t"
4585 "andl $1, %0\n\t"
4586 : "=q" (rc.u32),
4587 "=m" (*(volatile long *)pvBitmap)
4588 : "Ir" (iBit)
4589 : "memory");
4590# else
4591 __asm
4592 {
4593 mov edx, [iBit]
4594# ifdef RT_ARCH_AMD64
4595 mov rax, [pvBitmap]
4596 lock btr [rax], edx
4597# else
4598 mov eax, [pvBitmap]
4599 lock btr [eax], edx
4600# endif
4601 setc al
4602 and eax, 1
4603 mov [rc.u32], eax
4604 }
4605# endif
4606 return rc.f;
4607}
4608#endif
4609
4610
4611/**
4612 * Tests and toggles a bit in a bitmap.
4613 *
4614 * @returns true if the bit was set.
4615 * @returns false if the bit was clear.
4616 * @param pvBitmap Pointer to the bitmap.
4617 * @param iBit The bit to test and toggle.
4618 */
4619#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4620DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4621#else
4622DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4623{
4624 union { bool f; uint32_t u32; uint8_t u8; } rc;
4625# if RT_INLINE_ASM_USES_INTRIN
4626 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
4627
4628# elif RT_INLINE_ASM_GNU_STYLE
4629 __asm__ __volatile__ ("btcl %2, %1\n\t"
4630 "setc %b0\n\t"
4631 "andl $1, %0\n\t"
4632 : "=q" (rc.u32),
4633 "=m" (*(volatile long *)pvBitmap)
4634 : "Ir" (iBit)
4635 : "memory");
4636# else
4637 __asm
4638 {
4639 mov edx, [iBit]
4640# ifdef RT_ARCH_AMD64
4641 mov rax, [pvBitmap]
4642 btc [rax], edx
4643# else
4644 mov eax, [pvBitmap]
4645 btc [eax], edx
4646# endif
4647 setc al
4648 and eax, 1
4649 mov [rc.u32], eax
4650 }
4651# endif
4652 return rc.f;
4653}
4654#endif
4655
4656
4657/**
4658 * Atomically tests and toggles a bit in a bitmap, ordered.
4659 *
4660 * @returns true if the bit was set.
4661 * @returns false if the bit was clear.
4662 * @param pvBitmap Pointer to the bitmap.
4663 * @param iBit The bit to test and toggle.
4664 */
4665#if RT_INLINE_ASM_EXTERNAL
4666DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4667#else
4668DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4669{
4670 union { bool f; uint32_t u32; uint8_t u8; } rc;
4671# if RT_INLINE_ASM_GNU_STYLE
4672 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
4673 "setc %b0\n\t"
4674 "andl $1, %0\n\t"
4675 : "=q" (rc.u32),
4676 "=m" (*(volatile long *)pvBitmap)
4677 : "Ir" (iBit)
4678 : "memory");
4679# else
4680 __asm
4681 {
4682 mov edx, [iBit]
4683# ifdef RT_ARCH_AMD64
4684 mov rax, [pvBitmap]
4685 lock btc [rax], edx
4686# else
4687 mov eax, [pvBitmap]
4688 lock btc [eax], edx
4689# endif
4690 setc al
4691 and eax, 1
4692 mov [rc.u32], eax
4693 }
4694# endif
4695 return rc.f;
4696}
4697#endif
4698
4699
4700/**
4701 * Tests if a bit in a bitmap is set.
4702 *
4703 * @returns true if the bit is set.
4704 * @returns false if the bit is clear.
4705 * @param pvBitmap Pointer to the bitmap.
4706 * @param iBit The bit to test.
4707 */
4708#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4709DECLASM(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit);
4710#else
4711DECLINLINE(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit)
4712{
4713 union { bool f; uint32_t u32; uint8_t u8; } rc;
4714# if RT_INLINE_ASM_USES_INTRIN
4715 rc.u32 = _bittest((long *)pvBitmap, iBit);
4716# elif RT_INLINE_ASM_GNU_STYLE
4717
4718 __asm__ __volatile__ ("btl %2, %1\n\t"
4719 "setc %b0\n\t"
4720 "andl $1, %0\n\t"
4721 : "=q" (rc.u32),
4722 "=m" (*(volatile long *)pvBitmap)
4723 : "Ir" (iBit)
4724 : "memory");
4725# else
4726 __asm
4727 {
4728 mov edx, [iBit]
4729# ifdef RT_ARCH_AMD64
4730 mov rax, [pvBitmap]
4731 bt [rax], edx
4732# else
4733 mov eax, [pvBitmap]
4734 bt [eax], edx
4735# endif
4736 setc al
4737 and eax, 1
4738 mov [rc.u32], eax
4739 }
4740# endif
4741 return rc.f;
4742}
4743#endif
4744
4745
4746/**
4747 * Clears a bit range within a bitmap.
4748 *
4749 * @param pvBitmap Pointer to the bitmap.
4750 * @param iBitStart The First bit to clear.
4751 * @param iBitEnd The first bit not to clear.
4752 */
4753DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4754{
4755 if (iBitStart < iBitEnd)
4756 {
4757 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4758 int iStart = iBitStart & ~31;
4759 int iEnd = iBitEnd & ~31;
4760 if (iStart == iEnd)
4761 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
4762 else
4763 {
4764 /* bits in first dword. */
4765 if (iBitStart & 31)
4766 {
4767 *pu32 &= (1 << (iBitStart & 31)) - 1;
4768 pu32++;
4769 iBitStart = iStart + 32;
4770 }
4771
4772 /* whole dword. */
4773 if (iBitStart != iEnd)
4774 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4775
4776 /* bits in last dword. */
4777 if (iBitEnd & 31)
4778 {
4779 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4780 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
4781 }
4782 }
4783 }
4784}
4785
4786
4787/**
4788 * Finds the first clear bit in a bitmap.
4789 *
4790 * @returns Index of the first zero bit.
4791 * @returns -1 if no clear bit was found.
4792 * @param pvBitmap Pointer to the bitmap.
4793 * @param cBits The number of bits in the bitmap. Multiple of 32.
4794 */
4795#if RT_INLINE_ASM_EXTERNAL
4796DECLASM(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits);
4797#else
4798DECLINLINE(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits)
4799{
4800 if (cBits)
4801 {
4802 int32_t iBit;
4803# if RT_INLINE_ASM_GNU_STYLE
4804 RTCCUINTREG uEAX, uECX, uEDI;
4805 cBits = RT_ALIGN_32(cBits, 32);
4806 __asm__ __volatile__("repe; scasl\n\t"
4807 "je 1f\n\t"
4808# ifdef RT_ARCH_AMD64
4809 "lea -4(%%rdi), %%rdi\n\t"
4810 "xorl (%%rdi), %%eax\n\t"
4811 "subq %5, %%rdi\n\t"
4812# else
4813 "lea -4(%%edi), %%edi\n\t"
4814 "xorl (%%edi), %%eax\n\t"
4815 "subl %5, %%edi\n\t"
4816# endif
4817 "shll $3, %%edi\n\t"
4818 "bsfl %%eax, %%edx\n\t"
4819 "addl %%edi, %%edx\n\t"
4820 "1:\t\n"
4821 : "=d" (iBit),
4822 "=&c" (uECX),
4823 "=&D" (uEDI),
4824 "=&a" (uEAX)
4825 : "0" (0xffffffff),
4826 "mr" (pvBitmap),
4827 "1" (cBits >> 5),
4828 "2" (pvBitmap),
4829 "3" (0xffffffff));
4830# else
4831 cBits = RT_ALIGN_32(cBits, 32);
4832 __asm
4833 {
4834# ifdef RT_ARCH_AMD64
4835 mov rdi, [pvBitmap]
4836 mov rbx, rdi
4837# else
4838 mov edi, [pvBitmap]
4839 mov ebx, edi
4840# endif
4841 mov edx, 0ffffffffh
4842 mov eax, edx
4843 mov ecx, [cBits]
4844 shr ecx, 5
4845 repe scasd
4846 je done
4847
4848# ifdef RT_ARCH_AMD64
4849 lea rdi, [rdi - 4]
4850 xor eax, [rdi]
4851 sub rdi, rbx
4852# else
4853 lea edi, [edi - 4]
4854 xor eax, [edi]
4855 sub edi, ebx
4856# endif
4857 shl edi, 3
4858 bsf edx, eax
4859 add edx, edi
4860 done:
4861 mov [iBit], edx
4862 }
4863# endif
4864 return iBit;
4865 }
4866 return -1;
4867}
4868#endif
4869
4870
4871/**
4872 * Finds the next clear bit in a bitmap.
4873 *
4874 * @returns Index of the first zero bit.
4875 * @returns -1 if no clear bit was found.
4876 * @param pvBitmap Pointer to the bitmap.
4877 * @param cBits The number of bits in the bitmap. Multiple of 32.
4878 * @param iBitPrev The bit returned from the last search.
4879 * The search will start at iBitPrev + 1.
4880 */
4881#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4882DECLASM(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4883#else
4884DECLINLINE(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4885{
4886 int iBit = ++iBitPrev & 31;
4887 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
4888 cBits -= iBitPrev & ~31;
4889 if (iBit)
4890 {
4891 /* inspect the first dword. */
4892 uint32_t u32 = (~*(volatile uint32_t *)pvBitmap) >> iBit;
4893# if RT_INLINE_ASM_USES_INTRIN
4894 unsigned long ulBit = 0;
4895 if (_BitScanForward(&ulBit, u32))
4896 return ulBit + iBitPrev;
4897 iBit = -1;
4898# else
4899# if RT_INLINE_ASM_GNU_STYLE
4900 __asm__ __volatile__("bsf %1, %0\n\t"
4901 "jnz 1f\n\t"
4902 "movl $-1, %0\n\t"
4903 "1:\n\t"
4904 : "=r" (iBit)
4905 : "r" (u32));
4906# else
4907 __asm
4908 {
4909 mov edx, [u32]
4910 bsf eax, edx
4911 jnz done
4912 mov eax, 0ffffffffh
4913 done:
4914 mov [iBit], eax
4915 }
4916# endif
4917 if (iBit >= 0)
4918 return iBit + iBitPrev;
4919# endif
4920 /* Search the rest of the bitmap, if there is anything. */
4921 if (cBits > 32)
4922 {
4923 iBit = ASMBitFirstClear((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
4924 if (iBit >= 0)
4925 return iBit + (iBitPrev & ~31) + 32;
4926 }
4927 }
4928 else
4929 {
4930 /* Search the rest of the bitmap. */
4931 iBit = ASMBitFirstClear(pvBitmap, cBits);
4932 if (iBit >= 0)
4933 return iBit + (iBitPrev & ~31);
4934 }
4935 return iBit;
4936}
4937#endif
4938
4939
4940/**
4941 * Finds the first set bit in a bitmap.
4942 *
4943 * @returns Index of the first set bit.
4944 * @returns -1 if no clear bit was found.
4945 * @param pvBitmap Pointer to the bitmap.
4946 * @param cBits The number of bits in the bitmap. Multiple of 32.
4947 */
4948#if RT_INLINE_ASM_EXTERNAL
4949DECLASM(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits);
4950#else
4951DECLINLINE(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits)
4952{
4953 if (cBits)
4954 {
4955 int32_t iBit;
4956# if RT_INLINE_ASM_GNU_STYLE
4957 RTCCUINTREG uEAX, uECX, uEDI;
4958 cBits = RT_ALIGN_32(cBits, 32);
4959 __asm__ __volatile__("repe; scasl\n\t"
4960 "je 1f\n\t"
4961# ifdef RT_ARCH_AMD64
4962 "lea -4(%%rdi), %%rdi\n\t"
4963 "movl (%%rdi), %%eax\n\t"
4964 "subq %5, %%rdi\n\t"
4965# else
4966 "lea -4(%%edi), %%edi\n\t"
4967 "movl (%%edi), %%eax\n\t"
4968 "subl %5, %%edi\n\t"
4969# endif
4970 "shll $3, %%edi\n\t"
4971 "bsfl %%eax, %%edx\n\t"
4972 "addl %%edi, %%edx\n\t"
4973 "1:\t\n"
4974 : "=d" (iBit),
4975 "=&c" (uECX),
4976 "=&D" (uEDI),
4977 "=&a" (uEAX)
4978 : "0" (0xffffffff),
4979 "mr" (pvBitmap),
4980 "1" (cBits >> 5),
4981 "2" (pvBitmap),
4982 "3" (0));
4983# else
4984 cBits = RT_ALIGN_32(cBits, 32);
4985 __asm
4986 {
4987# ifdef RT_ARCH_AMD64
4988 mov rdi, [pvBitmap]
4989 mov rbx, rdi
4990# else
4991 mov edi, [pvBitmap]
4992 mov ebx, edi
4993# endif
4994 mov edx, 0ffffffffh
4995 xor eax, eax
4996 mov ecx, [cBits]
4997 shr ecx, 5
4998 repe scasd
4999 je done
5000# ifdef RT_ARCH_AMD64
5001 lea rdi, [rdi - 4]
5002 mov eax, [rdi]
5003 sub rdi, rbx
5004# else
5005 lea edi, [edi - 4]
5006 mov eax, [edi]
5007 sub edi, ebx
5008# endif
5009 shl edi, 3
5010 bsf edx, eax
5011 add edx, edi
5012 done:
5013 mov [iBit], edx
5014 }
5015# endif
5016 return iBit;
5017 }
5018 return -1;
5019}
5020#endif
5021
5022
5023/**
5024 * Finds the next set bit in a bitmap.
5025 *
5026 * @returns Index of the next set bit.
5027 * @returns -1 if no set bit was found.
5028 * @param pvBitmap Pointer to the bitmap.
5029 * @param cBits The number of bits in the bitmap. Multiple of 32.
5030 * @param iBitPrev The bit returned from the last search.
5031 * The search will start at iBitPrev + 1.
5032 */
5033#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5034DECLASM(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5035#else
5036DECLINLINE(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5037{
5038 int iBit = ++iBitPrev & 31;
5039 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
5040 cBits -= iBitPrev & ~31;
5041 if (iBit)
5042 {
5043 /* inspect the first dword. */
5044 uint32_t u32 = *(volatile uint32_t *)pvBitmap >> iBit;
5045# if RT_INLINE_ASM_USES_INTRIN
5046 unsigned long ulBit = 0;
5047 if (_BitScanForward(&ulBit, u32))
5048 return ulBit + iBitPrev;
5049 iBit = -1;
5050# else
5051# if RT_INLINE_ASM_GNU_STYLE
5052 __asm__ __volatile__("bsf %1, %0\n\t"
5053 "jnz 1f\n\t"
5054 "movl $-1, %0\n\t"
5055 "1:\n\t"
5056 : "=r" (iBit)
5057 : "r" (u32));
5058# else
5059 __asm
5060 {
5061 mov edx, u32
5062 bsf eax, edx
5063 jnz done
5064 mov eax, 0ffffffffh
5065 done:
5066 mov [iBit], eax
5067 }
5068# endif
5069 if (iBit >= 0)
5070 return iBit + iBitPrev;
5071# endif
5072 /* Search the rest of the bitmap, if there is anything. */
5073 if (cBits > 32)
5074 {
5075 iBit = ASMBitFirstSet((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
5076 if (iBit >= 0)
5077 return iBit + (iBitPrev & ~31) + 32;
5078 }
5079
5080 }
5081 else
5082 {
5083 /* Search the rest of the bitmap. */
5084 iBit = ASMBitFirstSet(pvBitmap, cBits);
5085 if (iBit >= 0)
5086 return iBit + (iBitPrev & ~31);
5087 }
5088 return iBit;
5089}
5090#endif
5091
5092
5093/**
5094 * Finds the first bit which is set in the given 32-bit integer.
5095 * Bits are numbered from 1 (least significant) to 32.
5096 *
5097 * @returns index [1..32] of the first set bit.
5098 * @returns 0 if all bits are cleared.
5099 * @param u32 Integer to search for set bits.
5100 * @remark Similar to ffs() in BSD.
5101 */
5102DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5103{
5104# if RT_INLINE_ASM_USES_INTRIN
5105 unsigned long iBit;
5106 if (_BitScanForward(&iBit, u32))
5107 iBit++;
5108 else
5109 iBit = 0;
5110# elif RT_INLINE_ASM_GNU_STYLE
5111 uint32_t iBit;
5112 __asm__ __volatile__("bsf %1, %0\n\t"
5113 "jnz 1f\n\t"
5114 "xorl %0, %0\n\t"
5115 "jmp 2f\n"
5116 "1:\n\t"
5117 "incl %0\n"
5118 "2:\n\t"
5119 : "=r" (iBit)
5120 : "rm" (u32));
5121# else
5122 uint32_t iBit;
5123 _asm
5124 {
5125 bsf eax, [u32]
5126 jnz found
5127 xor eax, eax
5128 jmp done
5129 found:
5130 inc eax
5131 done:
5132 mov [iBit], eax
5133 }
5134# endif
5135 return iBit;
5136}
5137
5138
5139/**
5140 * Finds the first bit which is set in the given 32-bit integer.
5141 * Bits are numbered from 1 (least significant) to 32.
5142 *
5143 * @returns index [1..32] of the first set bit.
5144 * @returns 0 if all bits are cleared.
5145 * @param i32 Integer to search for set bits.
5146 * @remark Similar to ffs() in BSD.
5147 */
5148DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5149{
5150 return ASMBitFirstSetU32((uint32_t)i32);
5151}
5152
5153
5154/**
5155 * Finds the last bit which is set in the given 32-bit integer.
5156 * Bits are numbered from 1 (least significant) to 32.
5157 *
5158 * @returns index [1..32] of the last set bit.
5159 * @returns 0 if all bits are cleared.
5160 * @param u32 Integer to search for set bits.
5161 * @remark Similar to fls() in BSD.
5162 */
5163DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5164{
5165# if RT_INLINE_ASM_USES_INTRIN
5166 unsigned long iBit;
5167 if (_BitScanReverse(&iBit, u32))
5168 iBit++;
5169 else
5170 iBit = 0;
5171# elif RT_INLINE_ASM_GNU_STYLE
5172 uint32_t iBit;
5173 __asm__ __volatile__("bsrl %1, %0\n\t"
5174 "jnz 1f\n\t"
5175 "xorl %0, %0\n\t"
5176 "jmp 2f\n"
5177 "1:\n\t"
5178 "incl %0\n"
5179 "2:\n\t"
5180 : "=r" (iBit)
5181 : "rm" (u32));
5182# else
5183 uint32_t iBit;
5184 _asm
5185 {
5186 bsr eax, [u32]
5187 jnz found
5188 xor eax, eax
5189 jmp done
5190 found:
5191 inc eax
5192 done:
5193 mov [iBit], eax
5194 }
5195# endif
5196 return iBit;
5197}
5198
5199
5200/**
5201 * Finds the last bit which is set in the given 32-bit integer.
5202 * Bits are numbered from 1 (least significant) to 32.
5203 *
5204 * @returns index [1..32] of the last set bit.
5205 * @returns 0 if all bits are cleared.
5206 * @param i32 Integer to search for set bits.
5207 * @remark Similar to fls() in BSD.
5208 */
5209DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5210{
5211 return ASMBitLastSetS32((uint32_t)i32);
5212}
5213
5214
5215/**
5216 * Reverse the byte order of the given 32-bit integer.
5217 * @param u32 Integer
5218 */
5219DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5220{
5221#if RT_INLINE_ASM_USES_INTRIN
5222 u32 = _byteswap_ulong(u32);
5223#elif RT_INLINE_ASM_GNU_STYLE
5224 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5225#else
5226 _asm
5227 {
5228 mov eax, [u32]
5229 bswap eax
5230 mov [u32], eax
5231 }
5232#endif
5233 return u32;
5234}
5235
5236/** @} */
5237
5238
5239/** @} */
5240#endif
5241
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette