/* $Id: tstInlineAsm.cpp 31412 2010-08-05 15:19:09Z vboxsync $ */ /** @file * IPRT Testcase - inline assembly. */ /* * Copyright (C) 2006-2010 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; * you can redistribute it and/or modify it under the terms of the GNU * General Public License (GPL) as published by the Free Software * Foundation, in version 2 as it comes in the "COPYING" file of the * VirtualBox OSE distribution. VirtualBox OSE is distributed in the * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. * * The contents of this file may alternatively be used under the terms * of the Common Development and Distribution License Version 1.0 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the * VirtualBox OSE distribution, in which case the provisions of the * CDDL are applicable instead of those of the GPL. * * You may elect to license modified versions of this file under the * terms and conditions of either the GPL or the CDDL or both. */ /******************************************************************************* * Header Files * *******************************************************************************/ #include #include /* See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=44018. Only gcc version 4.4 * is affected. No harm for the VBox code: If the cpuid code compiles, it works * fine. */ #if defined(__GNUC__) && defined(RT_ARCH_X86) && defined(__PIC__) # if __GNUC__ == 4 && __GNUC_MINOR__ == 4 # define GCC44_32BIT_PIC # endif #endif #if !defined(GCC44_32BIT_PIC) && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)) # include #else # include #endif #include #include #include #include #include #include /******************************************************************************* * Defined Constants And Macros * *******************************************************************************/ #define CHECKVAL(val, expect, fmt) \ do \ { \ if ((val) != (expect)) \ { \ RTTestIErrorInc(); \ RTPrintf("%s, %d: " #val ": expected " fmt " got " fmt "\n", __FUNCTION__, __LINE__, (expect), (val)); \ } \ } while (0) #define CHECKOP(op, expect, fmt, type) \ do \ { \ type val = op; \ if (val != (type)(expect)) \ { \ RTTestIErrorInc(); \ RTPrintf("%s, %d: " #op ": expected " fmt " got " fmt "\n", __FUNCTION__, __LINE__, (type)(expect), val); \ } \ } while (0) #if !defined(GCC44_32BIT_PIC) && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)) const char *getCacheAss(unsigned u) { if (u == 0) return "res0 "; if (u == 1) return "direct"; if (u >= 256) return "???"; char *pszRet; RTStrAPrintf(&pszRet, "%d way", u); /* intentional leak! */ return pszRet; } const char *getL2CacheAss(unsigned u) { switch (u) { case 0: return "off "; case 1: return "direct"; case 2: return "2 way "; case 3: return "res3 "; case 4: return "4 way "; case 5: return "res5 "; case 6: return "8 way "; case 7: return "res7 "; case 8: return "16 way"; case 9: return "res9 "; case 10: return "res10 "; case 11: return "res11 "; case 12: return "res12 "; case 13: return "res13 "; case 14: return "res14 "; case 15: return "fully "; default: return "????"; } } /** * Test and dump all possible info from the CPUID instruction. * * @remark Bits shared with the libc cpuid.c program. This all written by me, so no worries. * @todo transform the dumping into a generic runtime function. We'll need it for logging! */ void tstASMCpuId(void) { unsigned iBit; struct { uint32_t uEBX, uEAX, uEDX, uECX; } s; if (!ASMHasCpuId()) { RTPrintf("tstInlineAsm: warning! CPU doesn't support CPUID\n"); return; } /* * Try the 0 function and use that for checking the ASMCpuId_* variants. */ ASMCpuId(0, &s.uEAX, &s.uEBX, &s.uECX, &s.uEDX); uint32_t u32; u32 = ASMCpuId_EAX(0); CHECKVAL(u32, s.uEAX, "%x"); u32 = ASMCpuId_EBX(0); CHECKVAL(u32, s.uEBX, "%x"); u32 = ASMCpuId_ECX(0); CHECKVAL(u32, s.uECX, "%x"); u32 = ASMCpuId_EDX(0); CHECKVAL(u32, s.uEDX, "%x"); uint32_t uECX2 = s.uECX - 1; uint32_t uEDX2 = s.uEDX - 1; ASMCpuId_ECX_EDX(0, &uECX2, &uEDX2); CHECKVAL(uECX2, s.uECX, "%x"); CHECKVAL(uEDX2, s.uEDX, "%x"); /* * Done testing, dump the information. */ RTPrintf("tstInlineAsm: CPUID Dump\n"); ASMCpuId(0, &s.uEAX, &s.uEBX, &s.uECX, &s.uEDX); const uint32_t cFunctions = s.uEAX; /* raw dump */ RTPrintf("\n" " RAW Standard CPUIDs\n" "Function eax ebx ecx edx\n"); for (unsigned iStd = 0; iStd <= cFunctions + 3; iStd++) { if (iStd == 4) continue; /* Leaf 04 output depends on the initial value of ECX */ ASMCpuId(iStd, &s.uEAX, &s.uEBX, &s.uECX, &s.uEDX); RTPrintf("%08x %08x %08x %08x %08x%s\n", iStd, s.uEAX, s.uEBX, s.uECX, s.uEDX, iStd <= cFunctions ? "" : "*"); u32 = ASMCpuId_EAX(iStd); CHECKVAL(u32, s.uEAX, "%x"); u32 = ASMCpuId_EBX(iStd); CHECKVAL(u32, s.uEBX, "%x"); u32 = ASMCpuId_ECX(iStd); CHECKVAL(u32, s.uECX, "%x"); u32 = ASMCpuId_EDX(iStd); CHECKVAL(u32, s.uEDX, "%x"); uECX2 = s.uECX - 1; uEDX2 = s.uEDX - 1; ASMCpuId_ECX_EDX(iStd, &uECX2, &uEDX2); CHECKVAL(uECX2, s.uECX, "%x"); CHECKVAL(uEDX2, s.uEDX, "%x"); } /* * Understandable output */ ASMCpuId(0, &s.uEAX, &s.uEBX, &s.uECX, &s.uEDX); RTPrintf("Name: %.04s%.04s%.04s\n" "Support: 0-%u\n", &s.uEBX, &s.uEDX, &s.uECX, s.uEAX); bool const fIntel = ASMIsIntelCpuEx(s.uEBX, s.uECX, s.uEDX); /* * Get Features. */ if (cFunctions >= 1) { static const char * const s_apszTypes[4] = { "primary", "overdrive", "MP", "reserved" }; ASMCpuId(1, &s.uEAX, &s.uEBX, &s.uECX, &s.uEDX); RTPrintf("Family: %#x \tExtended: %#x \tEffective: %#x\n" "Model: %#x \tExtended: %#x \tEffective: %#x\n" "Stepping: %d\n" "Type: %d (%s)\n" "APIC ID: %#04x\n" "Logical CPUs: %d\n" "CLFLUSH Size: %d\n" "Brand ID: %#04x\n", (s.uEAX >> 8) & 0xf, (s.uEAX >> 20) & 0x7f, ASMGetCpuFamily(s.uEAX), (s.uEAX >> 4) & 0xf, (s.uEAX >> 16) & 0x0f, ASMGetCpuModel(s.uEAX, fIntel), ASMGetCpuStepping(s.uEAX), (s.uEAX >> 12) & 0x3, s_apszTypes[(s.uEAX >> 12) & 0x3], (s.uEBX >> 24) & 0xff, (s.uEBX >> 16) & 0xff, (s.uEBX >> 8) & 0xff, (s.uEBX >> 0) & 0xff); RTPrintf("Features EDX: "); if (s.uEDX & RT_BIT(0)) RTPrintf(" FPU"); if (s.uEDX & RT_BIT(1)) RTPrintf(" VME"); if (s.uEDX & RT_BIT(2)) RTPrintf(" DE"); if (s.uEDX & RT_BIT(3)) RTPrintf(" PSE"); if (s.uEDX & RT_BIT(4)) RTPrintf(" TSC"); if (s.uEDX & RT_BIT(5)) RTPrintf(" MSR"); if (s.uEDX & RT_BIT(6)) RTPrintf(" PAE"); if (s.uEDX & RT_BIT(7)) RTPrintf(" MCE"); if (s.uEDX & RT_BIT(8)) RTPrintf(" CX8"); if (s.uEDX & RT_BIT(9)) RTPrintf(" APIC"); if (s.uEDX & RT_BIT(10)) RTPrintf(" 10"); if (s.uEDX & RT_BIT(11)) RTPrintf(" SEP"); if (s.uEDX & RT_BIT(12)) RTPrintf(" MTRR"); if (s.uEDX & RT_BIT(13)) RTPrintf(" PGE"); if (s.uEDX & RT_BIT(14)) RTPrintf(" MCA"); if (s.uEDX & RT_BIT(15)) RTPrintf(" CMOV"); if (s.uEDX & RT_BIT(16)) RTPrintf(" PAT"); if (s.uEDX & RT_BIT(17)) RTPrintf(" PSE36"); if (s.uEDX & RT_BIT(18)) RTPrintf(" PSN"); if (s.uEDX & RT_BIT(19)) RTPrintf(" CLFSH"); if (s.uEDX & RT_BIT(20)) RTPrintf(" 20"); if (s.uEDX & RT_BIT(21)) RTPrintf(" DS"); if (s.uEDX & RT_BIT(22)) RTPrintf(" ACPI"); if (s.uEDX & RT_BIT(23)) RTPrintf(" MMX"); if (s.uEDX & RT_BIT(24)) RTPrintf(" FXSR"); if (s.uEDX & RT_BIT(25)) RTPrintf(" SSE"); if (s.uEDX & RT_BIT(26)) RTPrintf(" SSE2"); if (s.uEDX & RT_BIT(27)) RTPrintf(" SS"); if (s.uEDX & RT_BIT(28)) RTPrintf(" HTT"); if (s.uEDX & RT_BIT(29)) RTPrintf(" 29"); if (s.uEDX & RT_BIT(30)) RTPrintf(" 30"); if (s.uEDX & RT_BIT(31)) RTPrintf(" 31"); RTPrintf("\n"); /** @todo check intel docs. */ RTPrintf("Features ECX: "); if (s.uECX & RT_BIT(0)) RTPrintf(" SSE3"); for (iBit = 1; iBit < 13; iBit++) if (s.uECX & RT_BIT(iBit)) RTPrintf(" %d", iBit); if (s.uECX & RT_BIT(13)) RTPrintf(" CX16"); for (iBit = 14; iBit < 32; iBit++) if (s.uECX & RT_BIT(iBit)) RTPrintf(" %d", iBit); RTPrintf("\n"); } /* * Extended. * Implemented after AMD specs. */ /** @todo check out the intel specs. */ ASMCpuId(0x80000000, &s.uEAX, &s.uEBX, &s.uECX, &s.uEDX); if (!s.uEAX && !s.uEBX && !s.uECX && !s.uEDX) { RTPrintf("No extended CPUID info? Check the manual on how to detect this...\n"); return; } const uint32_t cExtFunctions = s.uEAX | 0x80000000; /* raw dump */ RTPrintf("\n" " RAW Extended CPUIDs\n" "Function eax ebx ecx edx\n"); for (unsigned iExt = 0x80000000; iExt <= cExtFunctions + 3; iExt++) { ASMCpuId(iExt, &s.uEAX, &s.uEBX, &s.uECX, &s.uEDX); RTPrintf("%08x %08x %08x %08x %08x%s\n", iExt, s.uEAX, s.uEBX, s.uECX, s.uEDX, iExt <= cExtFunctions ? "" : "*"); u32 = ASMCpuId_EAX(iExt); CHECKVAL(u32, s.uEAX, "%x"); u32 = ASMCpuId_EBX(iExt); CHECKVAL(u32, s.uEBX, "%x"); u32 = ASMCpuId_ECX(iExt); CHECKVAL(u32, s.uECX, "%x"); u32 = ASMCpuId_EDX(iExt); CHECKVAL(u32, s.uEDX, "%x"); uECX2 = s.uECX - 1; uEDX2 = s.uEDX - 1; ASMCpuId_ECX_EDX(iExt, &uECX2, &uEDX2); CHECKVAL(uECX2, s.uECX, "%x"); CHECKVAL(uEDX2, s.uEDX, "%x"); } /* * Understandable output */ ASMCpuId(0x80000000, &s.uEAX, &s.uEBX, &s.uECX, &s.uEDX); RTPrintf("Ext Name: %.4s%.4s%.4s\n" "Ext Supports: 0x80000000-%#010x\n", &s.uEBX, &s.uEDX, &s.uECX, s.uEAX); if (cExtFunctions >= 0x80000001) { ASMCpuId(0x80000001, &s.uEAX, &s.uEBX, &s.uECX, &s.uEDX); RTPrintf("Family: %#x \tExtended: %#x \tEffective: %#x\n" "Model: %#x \tExtended: %#x \tEffective: %#x\n" "Stepping: %d\n" "Brand ID: %#05x\n", (s.uEAX >> 8) & 0xf, (s.uEAX >> 20) & 0x7f, ASMGetCpuFamily(s.uEAX), (s.uEAX >> 4) & 0xf, (s.uEAX >> 16) & 0x0f, ASMGetCpuModel(s.uEAX, fIntel), ASMGetCpuStepping(s.uEAX), s.uEBX & 0xfff); RTPrintf("Features EDX: "); if (s.uEDX & RT_BIT(0)) RTPrintf(" FPU"); if (s.uEDX & RT_BIT(1)) RTPrintf(" VME"); if (s.uEDX & RT_BIT(2)) RTPrintf(" DE"); if (s.uEDX & RT_BIT(3)) RTPrintf(" PSE"); if (s.uEDX & RT_BIT(4)) RTPrintf(" TSC"); if (s.uEDX & RT_BIT(5)) RTPrintf(" MSR"); if (s.uEDX & RT_BIT(6)) RTPrintf(" PAE"); if (s.uEDX & RT_BIT(7)) RTPrintf(" MCE"); if (s.uEDX & RT_BIT(8)) RTPrintf(" CMPXCHG8B"); if (s.uEDX & RT_BIT(9)) RTPrintf(" APIC"); if (s.uEDX & RT_BIT(10)) RTPrintf(" 10"); if (s.uEDX & RT_BIT(11)) RTPrintf(" SysCallSysRet"); if (s.uEDX & RT_BIT(12)) RTPrintf(" MTRR"); if (s.uEDX & RT_BIT(13)) RTPrintf(" PGE"); if (s.uEDX & RT_BIT(14)) RTPrintf(" MCA"); if (s.uEDX & RT_BIT(15)) RTPrintf(" CMOV"); if (s.uEDX & RT_BIT(16)) RTPrintf(" PAT"); if (s.uEDX & RT_BIT(17)) RTPrintf(" PSE36"); if (s.uEDX & RT_BIT(18)) RTPrintf(" 18"); if (s.uEDX & RT_BIT(19)) RTPrintf(" 19"); if (s.uEDX & RT_BIT(20)) RTPrintf(" NX"); if (s.uEDX & RT_BIT(21)) RTPrintf(" 21"); if (s.uEDX & RT_BIT(22)) RTPrintf(" MmxExt"); if (s.uEDX & RT_BIT(23)) RTPrintf(" MMX"); if (s.uEDX & RT_BIT(24)) RTPrintf(" FXSR"); if (s.uEDX & RT_BIT(25)) RTPrintf(" FastFXSR"); if (s.uEDX & RT_BIT(26)) RTPrintf(" 26"); if (s.uEDX & RT_BIT(27)) RTPrintf(" RDTSCP"); if (s.uEDX & RT_BIT(28)) RTPrintf(" 28"); if (s.uEDX & RT_BIT(29)) RTPrintf(" LongMode"); if (s.uEDX & RT_BIT(30)) RTPrintf(" 3DNowExt"); if (s.uEDX & RT_BIT(31)) RTPrintf(" 3DNow"); RTPrintf("\n"); RTPrintf("Features ECX: "); if (s.uECX & RT_BIT(0)) RTPrintf(" LahfSahf"); if (s.uECX & RT_BIT(1)) RTPrintf(" CmpLegacy"); if (s.uECX & RT_BIT(2)) RTPrintf(" SVM"); if (s.uECX & RT_BIT(3)) RTPrintf(" 3"); if (s.uECX & RT_BIT(4)) RTPrintf(" AltMovCr8"); for (iBit = 5; iBit < 32; iBit++) if (s.uECX & RT_BIT(iBit)) RTPrintf(" %d", iBit); RTPrintf("\n"); } char szString[4*4*3+1] = {0}; if (cExtFunctions >= 0x80000002) ASMCpuId(0x80000002, &szString[0 + 0], &szString[0 + 4], &szString[0 + 8], &szString[0 + 12]); if (cExtFunctions >= 0x80000003) ASMCpuId(0x80000003, &szString[16 + 0], &szString[16 + 4], &szString[16 + 8], &szString[16 + 12]); if (cExtFunctions >= 0x80000004) ASMCpuId(0x80000004, &szString[32 + 0], &szString[32 + 4], &szString[32 + 8], &szString[32 + 12]); if (cExtFunctions >= 0x80000002) RTPrintf("Full Name: %s\n", szString); if (cExtFunctions >= 0x80000005) { ASMCpuId(0x80000005, &s.uEAX, &s.uEBX, &s.uECX, &s.uEDX); RTPrintf("TLB 2/4M Instr/Uni: %s %3d entries\n" "TLB 2/4M Data: %s %3d entries\n", getCacheAss((s.uEAX >> 8) & 0xff), (s.uEAX >> 0) & 0xff, getCacheAss((s.uEAX >> 24) & 0xff), (s.uEAX >> 16) & 0xff); RTPrintf("TLB 4K Instr/Uni: %s %3d entries\n" "TLB 4K Data: %s %3d entries\n", getCacheAss((s.uEBX >> 8) & 0xff), (s.uEBX >> 0) & 0xff, getCacheAss((s.uEBX >> 24) & 0xff), (s.uEBX >> 16) & 0xff); RTPrintf("L1 Instr Cache Line Size: %d bytes\n" "L1 Instr Cache Lines Per Tag: %d\n" "L1 Instr Cache Associativity: %s\n" "L1 Instr Cache Size: %d KB\n", (s.uEDX >> 0) & 0xff, (s.uEDX >> 8) & 0xff, getCacheAss((s.uEDX >> 16) & 0xff), (s.uEDX >> 24) & 0xff); RTPrintf("L1 Data Cache Line Size: %d bytes\n" "L1 Data Cache Lines Per Tag: %d\n" "L1 Data Cache Associativity: %s\n" "L1 Data Cache Size: %d KB\n", (s.uECX >> 0) & 0xff, (s.uECX >> 8) & 0xff, getCacheAss((s.uECX >> 16) & 0xff), (s.uECX >> 24) & 0xff); } if (cExtFunctions >= 0x80000006) { ASMCpuId(0x80000006, &s.uEAX, &s.uEBX, &s.uECX, &s.uEDX); RTPrintf("L2 TLB 2/4M Instr/Uni: %s %4d entries\n" "L2 TLB 2/4M Data: %s %4d entries\n", getL2CacheAss((s.uEAX >> 12) & 0xf), (s.uEAX >> 0) & 0xfff, getL2CacheAss((s.uEAX >> 28) & 0xf), (s.uEAX >> 16) & 0xfff); RTPrintf("L2 TLB 4K Instr/Uni: %s %4d entries\n" "L2 TLB 4K Data: %s %4d entries\n", getL2CacheAss((s.uEBX >> 12) & 0xf), (s.uEBX >> 0) & 0xfff, getL2CacheAss((s.uEBX >> 28) & 0xf), (s.uEBX >> 16) & 0xfff); RTPrintf("L2 Cache Line Size: %d bytes\n" "L2 Cache Lines Per Tag: %d\n" "L2 Cache Associativity: %s\n" "L2 Cache Size: %d KB\n", (s.uEDX >> 0) & 0xff, (s.uEDX >> 8) & 0xf, getL2CacheAss((s.uEDX >> 12) & 0xf), (s.uEDX >> 16) & 0xffff); } if (cExtFunctions >= 0x80000007) { ASMCpuId(0x80000007, &s.uEAX, &s.uEBX, &s.uECX, &s.uEDX); RTPrintf("APM Features: "); if (s.uEDX & RT_BIT(0)) RTPrintf(" TS"); if (s.uEDX & RT_BIT(1)) RTPrintf(" FID"); if (s.uEDX & RT_BIT(2)) RTPrintf(" VID"); if (s.uEDX & RT_BIT(3)) RTPrintf(" TTP"); if (s.uEDX & RT_BIT(4)) RTPrintf(" TM"); if (s.uEDX & RT_BIT(5)) RTPrintf(" STC"); if (s.uEDX & RT_BIT(6)) RTPrintf(" 6"); if (s.uEDX & RT_BIT(7)) RTPrintf(" 7"); if (s.uEDX & RT_BIT(8)) RTPrintf(" TscInvariant"); for (iBit = 9; iBit < 32; iBit++) if (s.uEDX & RT_BIT(iBit)) RTPrintf(" %d", iBit); RTPrintf("\n"); } if (cExtFunctions >= 0x80000008) { ASMCpuId(0x80000008, &s.uEAX, &s.uEBX, &s.uECX, &s.uEDX); RTPrintf("Physical Address Width: %d bits\n" "Virtual Address Width: %d bits\n" "Guest Physical Address Width: %d bits\n", (s.uEAX >> 0) & 0xff, (s.uEAX >> 8) & 0xff, (s.uEAX >> 16) & 0xff); RTPrintf("Physical Core Count: %d\n", ((s.uECX >> 0) & 0xff) + 1); if ((s.uECX >> 12) & 0xf) RTPrintf("ApicIdCoreIdSize: %d bits\n", (s.uECX >> 12) & 0xf); } if (cExtFunctions >= 0x8000000a) { ASMCpuId(0x8000000a, &s.uEAX, &s.uEBX, &s.uECX, &s.uEDX); RTPrintf("SVM Revision: %d (%#x)\n" "Number of Address Space IDs: %d (%#x)\n", s.uEAX & 0xff, s.uEAX & 0xff, s.uEBX, s.uEBX); } } #endif /* AMD64 || X86 */ static void tstASMAtomicXchgU8(void) { struct { uint8_t u8Dummy0; uint8_t u8; uint8_t u8Dummy1; } s; s.u8 = 0; s.u8Dummy0 = s.u8Dummy1 = 0x42; CHECKOP(ASMAtomicXchgU8(&s.u8, 1), 0, "%#x", uint8_t); CHECKVAL(s.u8, 1, "%#x"); CHECKOP(ASMAtomicXchgU8(&s.u8, 0), 1, "%#x", uint8_t); CHECKVAL(s.u8, 0, "%#x"); CHECKOP(ASMAtomicXchgU8(&s.u8, 0xff), 0, "%#x", uint8_t); CHECKVAL(s.u8, 0xff, "%#x"); CHECKOP(ASMAtomicXchgU8(&s.u8, 0x87), 0xffff, "%#x", uint8_t); CHECKVAL(s.u8, 0x87, "%#x"); CHECKVAL(s.u8Dummy0, 0x42, "%#x"); CHECKVAL(s.u8Dummy1, 0x42, "%#x"); } static void tstASMAtomicXchgU16(void) { struct { uint16_t u16Dummy0; uint16_t u16; uint16_t u16Dummy1; } s; s.u16 = 0; s.u16Dummy0 = s.u16Dummy1 = 0x1234; CHECKOP(ASMAtomicXchgU16(&s.u16, 1), 0, "%#x", uint16_t); CHECKVAL(s.u16, 1, "%#x"); CHECKOP(ASMAtomicXchgU16(&s.u16, 0), 1, "%#x", uint16_t); CHECKVAL(s.u16, 0, "%#x"); CHECKOP(ASMAtomicXchgU16(&s.u16, 0xffff), 0, "%#x", uint16_t); CHECKVAL(s.u16, 0xffff, "%#x"); CHECKOP(ASMAtomicXchgU16(&s.u16, 0x8765), 0xffff, "%#x", uint16_t); CHECKVAL(s.u16, 0x8765, "%#x"); CHECKVAL(s.u16Dummy0, 0x1234, "%#x"); CHECKVAL(s.u16Dummy1, 0x1234, "%#x"); } static void tstASMAtomicXchgU32(void) { struct { uint32_t u32Dummy0; uint32_t u32; uint32_t u32Dummy1; } s; s.u32 = 0; s.u32Dummy0 = s.u32Dummy1 = 0x11223344; CHECKOP(ASMAtomicXchgU32(&s.u32, 1), 0, "%#x", uint32_t); CHECKVAL(s.u32, 1, "%#x"); CHECKOP(ASMAtomicXchgU32(&s.u32, 0), 1, "%#x", uint32_t); CHECKVAL(s.u32, 0, "%#x"); CHECKOP(ASMAtomicXchgU32(&s.u32, ~0U), 0, "%#x", uint32_t); CHECKVAL(s.u32, ~0U, "%#x"); CHECKOP(ASMAtomicXchgU32(&s.u32, 0x87654321), ~0U, "%#x", uint32_t); CHECKVAL(s.u32, 0x87654321, "%#x"); CHECKVAL(s.u32Dummy0, 0x11223344, "%#x"); CHECKVAL(s.u32Dummy1, 0x11223344, "%#x"); } static void tstASMAtomicXchgU64(void) { struct { uint64_t u64Dummy0; uint64_t u64; uint64_t u64Dummy1; } s; s.u64 = 0; s.u64Dummy0 = s.u64Dummy1 = 0x1122334455667788ULL; CHECKOP(ASMAtomicXchgU64(&s.u64, 1), 0ULL, "%#llx", uint64_t); CHECKVAL(s.u64, 1ULL, "%#llx"); CHECKOP(ASMAtomicXchgU64(&s.u64, 0), 1ULL, "%#llx", uint64_t); CHECKVAL(s.u64, 0ULL, "%#llx"); CHECKOP(ASMAtomicXchgU64(&s.u64, ~0ULL), 0ULL, "%#llx", uint64_t); CHECKVAL(s.u64, ~0ULL, "%#llx"); CHECKOP(ASMAtomicXchgU64(&s.u64, 0xfedcba0987654321ULL), ~0ULL, "%#llx", uint64_t); CHECKVAL(s.u64, 0xfedcba0987654321ULL, "%#llx"); CHECKVAL(s.u64Dummy0, 0x1122334455667788ULL, "%#llx"); CHECKVAL(s.u64Dummy1, 0x1122334455667788ULL, "%#llx"); } static void tstASMAtomicXchgPtr(void) { void *pv = NULL; CHECKOP(ASMAtomicXchgPtr(&pv, (void *)(~(uintptr_t)0)), NULL, "%p", void *); CHECKVAL(pv, (void *)(~(uintptr_t)0), "%p"); CHECKOP(ASMAtomicXchgPtr(&pv, (void *)0x87654321), (void *)(~(uintptr_t)0), "%p", void *); CHECKVAL(pv, (void *)0x87654321, "%p"); CHECKOP(ASMAtomicXchgPtr(&pv, NULL), (void *)0x87654321, "%p", void *); CHECKVAL(pv, NULL, "%p"); } static void tstASMAtomicCmpXchgU8(void) { struct { uint8_t u8Before; uint8_t u8; uint8_t u8After; } u = { 0xcc, 0xff, 0xaa }; CHECKOP(ASMAtomicCmpXchgU8(&u.u8, 0, 0), false, "%d", bool); CHECKVAL(u.u8, 0xff, "%x"); CHECKVAL(u.u8Before, 0xcc, "%x"); CHECKVAL(u.u8After, 0xaa, "%x"); CHECKOP(ASMAtomicCmpXchgU8(&u.u8, 0, 0xff), true, "%d", bool); CHECKVAL(u.u8, 0, "%x"); CHECKVAL(u.u8Before, 0xcc, "%x"); CHECKVAL(u.u8After, 0xaa, "%x"); CHECKOP(ASMAtomicCmpXchgU8(&u.u8, 0x79, 0xff), false, "%d", bool); CHECKVAL(u.u8, 0, "%x"); CHECKVAL(u.u8Before, 0xcc, "%x"); CHECKVAL(u.u8After, 0xaa, "%x"); CHECKOP(ASMAtomicCmpXchgU8(&u.u8, 0x97, 0), true, "%d", bool); CHECKVAL(u.u8, 0x97, "%x"); CHECKVAL(u.u8Before, 0xcc, "%x"); CHECKVAL(u.u8After, 0xaa, "%x"); } static void tstASMAtomicCmpXchgU32(void) { uint32_t u32 = 0xffffffff; CHECKOP(ASMAtomicCmpXchgU32(&u32, 0, 0), false, "%d", bool); CHECKVAL(u32, 0xffffffff, "%x"); CHECKOP(ASMAtomicCmpXchgU32(&u32, 0, 0xffffffff), true, "%d", bool); CHECKVAL(u32, 0, "%x"); CHECKOP(ASMAtomicCmpXchgU32(&u32, 0x8008efd, 0xffffffff), false, "%d", bool); CHECKVAL(u32, 0, "%x"); CHECKOP(ASMAtomicCmpXchgU32(&u32, 0x8008efd, 0), true, "%d", bool); CHECKVAL(u32, 0x8008efd, "%x"); } static void tstASMAtomicCmpXchgU64(void) { uint64_t u64 = 0xffffffffffffffULL; CHECKOP(ASMAtomicCmpXchgU64(&u64, 0, 0), false, "%d", bool); CHECKVAL(u64, 0xffffffffffffffULL, "%#llx"); CHECKOP(ASMAtomicCmpXchgU64(&u64, 0, 0xffffffffffffffULL), true, "%d", bool); CHECKVAL(u64, 0, "%x"); CHECKOP(ASMAtomicCmpXchgU64(&u64, 0x80040008008efdULL, 0xffffffff), false, "%d", bool); CHECKVAL(u64, 0, "%x"); CHECKOP(ASMAtomicCmpXchgU64(&u64, 0x80040008008efdULL, 0xffffffff00000000ULL), false, "%d", bool); CHECKVAL(u64, 0, "%x"); CHECKOP(ASMAtomicCmpXchgU64(&u64, 0x80040008008efdULL, 0), true, "%d", bool); CHECKVAL(u64, 0x80040008008efdULL, "%#llx"); } static void tstASMAtomicCmpXchgExU32(void) { uint32_t u32 = 0xffffffff; uint32_t u32Old = 0x80005111; CHECKOP(ASMAtomicCmpXchgExU32(&u32, 0, 0, &u32Old), false, "%d", bool); CHECKVAL(u32, 0xffffffff, "%x"); CHECKVAL(u32Old, 0xffffffff, "%x"); CHECKOP(ASMAtomicCmpXchgExU32(&u32, 0, 0xffffffff, &u32Old), true, "%d", bool); CHECKVAL(u32, 0, "%x"); CHECKVAL(u32Old, 0xffffffff, "%x"); CHECKOP(ASMAtomicCmpXchgExU32(&u32, 0x8008efd, 0xffffffff, &u32Old), false, "%d", bool); CHECKVAL(u32, 0, "%x"); CHECKVAL(u32Old, 0, "%x"); CHECKOP(ASMAtomicCmpXchgExU32(&u32, 0x8008efd, 0, &u32Old), true, "%d", bool); CHECKVAL(u32, 0x8008efd, "%x"); CHECKVAL(u32Old, 0, "%x"); CHECKOP(ASMAtomicCmpXchgExU32(&u32, 0, 0x8008efd, &u32Old), true, "%d", bool); CHECKVAL(u32, 0, "%x"); CHECKVAL(u32Old, 0x8008efd, "%x"); } static void tstASMAtomicCmpXchgExU64(void) { uint64_t u64 = 0xffffffffffffffffULL; uint64_t u64Old = 0x8000000051111111ULL; CHECKOP(ASMAtomicCmpXchgExU64(&u64, 0, 0, &u64Old), false, "%d", bool); CHECKVAL(u64, 0xffffffffffffffffULL, "%llx"); CHECKVAL(u64Old, 0xffffffffffffffffULL, "%llx"); CHECKOP(ASMAtomicCmpXchgExU64(&u64, 0, 0xffffffffffffffffULL, &u64Old), true, "%d", bool); CHECKVAL(u64, 0ULL, "%llx"); CHECKVAL(u64Old, 0xffffffffffffffffULL, "%llx"); CHECKOP(ASMAtomicCmpXchgExU64(&u64, 0x80040008008efdULL, 0xffffffff, &u64Old), false, "%d", bool); CHECKVAL(u64, 0ULL, "%llx"); CHECKVAL(u64Old, 0ULL, "%llx"); CHECKOP(ASMAtomicCmpXchgExU64(&u64, 0x80040008008efdULL, 0xffffffff00000000ULL, &u64Old), false, "%d", bool); CHECKVAL(u64, 0ULL, "%llx"); CHECKVAL(u64Old, 0ULL, "%llx"); CHECKOP(ASMAtomicCmpXchgExU64(&u64, 0x80040008008efdULL, 0, &u64Old), true, "%d", bool); CHECKVAL(u64, 0x80040008008efdULL, "%llx"); CHECKVAL(u64Old, 0ULL, "%llx"); CHECKOP(ASMAtomicCmpXchgExU64(&u64, 0, 0x80040008008efdULL, &u64Old), true, "%d", bool); CHECKVAL(u64, 0ULL, "%llx"); CHECKVAL(u64Old, 0x80040008008efdULL, "%llx"); } static void tstASMAtomicReadU64(void) { uint64_t u64 = 0; CHECKOP(ASMAtomicReadU64(&u64), 0ULL, "%#llx", uint64_t); CHECKVAL(u64, 0ULL, "%#llx"); u64 = ~0ULL; CHECKOP(ASMAtomicReadU64(&u64), ~0ULL, "%#llx", uint64_t); CHECKVAL(u64, ~0ULL, "%#llx"); u64 = 0xfedcba0987654321ULL; CHECKOP(ASMAtomicReadU64(&u64), 0xfedcba0987654321ULL, "%#llx", uint64_t); CHECKVAL(u64, 0xfedcba0987654321ULL, "%#llx"); } static void tstASMAtomicUoReadU64(void) { uint64_t u64 = 0; CHECKOP(ASMAtomicUoReadU64(&u64), 0ULL, "%#llx", uint64_t); CHECKVAL(u64, 0ULL, "%#llx"); u64 = ~0ULL; CHECKOP(ASMAtomicUoReadU64(&u64), ~0ULL, "%#llx", uint64_t); CHECKVAL(u64, ~0ULL, "%#llx"); u64 = 0xfedcba0987654321ULL; CHECKOP(ASMAtomicUoReadU64(&u64), 0xfedcba0987654321ULL, "%#llx", uint64_t); CHECKVAL(u64, 0xfedcba0987654321ULL, "%#llx"); } static void tstASMAtomicAddS32(void) { int32_t i32Rc; int32_t i32 = 10; #define MYCHECK(op, rc, val) \ do { \ i32Rc = op; \ if (i32Rc != (rc)) \ { \ RTPrintf("%s, %d: FAILURE: %s -> %d expected %d\n", __FUNCTION__, __LINE__, #op, i32Rc, rc); \ RTTestIErrorInc(); \ } \ if (i32 != (val)) \ { \ RTPrintf("%s, %d: FAILURE: %s => i32=%d expected %d\n", __FUNCTION__, __LINE__, #op, i32, val); \ RTTestIErrorInc(); \ } \ } while (0) MYCHECK(ASMAtomicAddS32(&i32, 1), 10, 11); MYCHECK(ASMAtomicAddS32(&i32, -2), 11, 9); MYCHECK(ASMAtomicAddS32(&i32, -9), 9, 0); MYCHECK(ASMAtomicAddS32(&i32, -0x7fffffff), 0, -0x7fffffff); MYCHECK(ASMAtomicAddS32(&i32, 0), -0x7fffffff, -0x7fffffff); MYCHECK(ASMAtomicAddS32(&i32, 0x7fffffff), -0x7fffffff, 0); MYCHECK(ASMAtomicAddS32(&i32, 0), 0, 0); #undef MYCHECK } static void tstASMAtomicDecIncS32(void) { int32_t i32Rc; int32_t i32 = 10; #define MYCHECK(op, rc) \ do { \ i32Rc = op; \ if (i32Rc != (rc)) \ { \ RTPrintf("%s, %d: FAILURE: %s -> %d expected %d\n", __FUNCTION__, __LINE__, #op, i32Rc, rc); \ RTTestIErrorInc(); \ } \ if (i32 != (rc)) \ { \ RTPrintf("%s, %d: FAILURE: %s => i32=%d expected %d\n", __FUNCTION__, __LINE__, #op, i32, rc); \ RTTestIErrorInc(); \ } \ } while (0) MYCHECK(ASMAtomicDecS32(&i32), 9); MYCHECK(ASMAtomicDecS32(&i32), 8); MYCHECK(ASMAtomicDecS32(&i32), 7); MYCHECK(ASMAtomicDecS32(&i32), 6); MYCHECK(ASMAtomicDecS32(&i32), 5); MYCHECK(ASMAtomicDecS32(&i32), 4); MYCHECK(ASMAtomicDecS32(&i32), 3); MYCHECK(ASMAtomicDecS32(&i32), 2); MYCHECK(ASMAtomicDecS32(&i32), 1); MYCHECK(ASMAtomicDecS32(&i32), 0); MYCHECK(ASMAtomicDecS32(&i32), -1); MYCHECK(ASMAtomicDecS32(&i32), -2); MYCHECK(ASMAtomicIncS32(&i32), -1); MYCHECK(ASMAtomicIncS32(&i32), 0); MYCHECK(ASMAtomicIncS32(&i32), 1); MYCHECK(ASMAtomicIncS32(&i32), 2); MYCHECK(ASMAtomicIncS32(&i32), 3); MYCHECK(ASMAtomicDecS32(&i32), 2); MYCHECK(ASMAtomicIncS32(&i32), 3); MYCHECK(ASMAtomicDecS32(&i32), 2); MYCHECK(ASMAtomicIncS32(&i32), 3); #undef MYCHECK } static void tstASMAtomicAndOrU32(void) { uint32_t u32 = 0xffffffff; ASMAtomicOrU32(&u32, 0xffffffff); CHECKVAL(u32, 0xffffffff, "%x"); ASMAtomicAndU32(&u32, 0xffffffff); CHECKVAL(u32, 0xffffffff, "%x"); ASMAtomicAndU32(&u32, 0x8f8f8f8f); CHECKVAL(u32, 0x8f8f8f8f, "%x"); ASMAtomicOrU32(&u32, 0x70707070); CHECKVAL(u32, 0xffffffff, "%x"); ASMAtomicAndU32(&u32, 1); CHECKVAL(u32, 1, "%x"); ASMAtomicOrU32(&u32, 0x80000000); CHECKVAL(u32, 0x80000001, "%x"); ASMAtomicAndU32(&u32, 0x80000000); CHECKVAL(u32, 0x80000000, "%x"); ASMAtomicAndU32(&u32, 0); CHECKVAL(u32, 0, "%x"); ASMAtomicOrU32(&u32, 0x42424242); CHECKVAL(u32, 0x42424242, "%x"); } void tstASMMemZeroPage(void) { struct { uint64_t u64Magic1; uint8_t abPage[PAGE_SIZE]; uint64_t u64Magic2; } Buf1, Buf2, Buf3; Buf1.u64Magic1 = UINT64_C(0xffffffffffffffff); memset(Buf1.abPage, 0x55, sizeof(Buf1.abPage)); Buf1.u64Magic2 = UINT64_C(0xffffffffffffffff); Buf2.u64Magic1 = UINT64_C(0xffffffffffffffff); memset(Buf2.abPage, 0x77, sizeof(Buf2.abPage)); Buf2.u64Magic2 = UINT64_C(0xffffffffffffffff); Buf3.u64Magic1 = UINT64_C(0xffffffffffffffff); memset(Buf3.abPage, 0x99, sizeof(Buf3.abPage)); Buf3.u64Magic2 = UINT64_C(0xffffffffffffffff); ASMMemZeroPage(Buf1.abPage); ASMMemZeroPage(Buf2.abPage); ASMMemZeroPage(Buf3.abPage); if ( Buf1.u64Magic1 != UINT64_C(0xffffffffffffffff) || Buf1.u64Magic2 != UINT64_C(0xffffffffffffffff) || Buf2.u64Magic1 != UINT64_C(0xffffffffffffffff) || Buf2.u64Magic2 != UINT64_C(0xffffffffffffffff) || Buf3.u64Magic1 != UINT64_C(0xffffffffffffffff) || Buf3.u64Magic2 != UINT64_C(0xffffffffffffffff)) { RTPrintf("tstInlineAsm: ASMMemZeroPage violated one/both magic(s)!\n"); RTTestIErrorInc(); } for (unsigned i = 0; i < sizeof(Buf1.abPage); i++) if (Buf1.abPage[i]) { RTPrintf("tstInlineAsm: ASMMemZeroPage didn't clear byte at offset %#x!\n", i); RTTestIErrorInc(); } for (unsigned i = 0; i < sizeof(Buf2.abPage); i++) if (Buf2.abPage[i]) { RTPrintf("tstInlineAsm: ASMMemZeroPage didn't clear byte at offset %#x!\n", i); RTTestIErrorInc(); } for (unsigned i = 0; i < sizeof(Buf3.abPage); i++) if (Buf3.abPage[i]) { RTPrintf("tstInlineAsm: ASMMemZeroPage didn't clear byte at offset %#x!\n", i); RTTestIErrorInc(); } } void tstASMMemIsZeroPage(RTTEST hTest) { RTTestSub(hTest, "ASMMemIsZeroPage"); void *pvPage1 = RTTestGuardedAllocHead(hTest, PAGE_SIZE); void *pvPage2 = RTTestGuardedAllocTail(hTest, PAGE_SIZE); RTTESTI_CHECK_RETV(pvPage1 && pvPage2); memset(pvPage1, 0, PAGE_SIZE); memset(pvPage2, 0, PAGE_SIZE); RTTESTI_CHECK(ASMMemIsZeroPage(pvPage1)); RTTESTI_CHECK(ASMMemIsZeroPage(pvPage2)); memset(pvPage1, 0xff, PAGE_SIZE); memset(pvPage2, 0xff, PAGE_SIZE); RTTESTI_CHECK(!ASMMemIsZeroPage(pvPage1)); RTTESTI_CHECK(!ASMMemIsZeroPage(pvPage2)); memset(pvPage1, 0, PAGE_SIZE); memset(pvPage2, 0, PAGE_SIZE); for (unsigned off = 0; off < PAGE_SIZE; off++) { ((uint8_t *)pvPage1)[off] = 1; RTTESTI_CHECK(!ASMMemIsZeroPage(pvPage1)); ((uint8_t *)pvPage1)[off] = 0; ((uint8_t *)pvPage2)[off] = 0x80; RTTESTI_CHECK(!ASMMemIsZeroPage(pvPage2)); ((uint8_t *)pvPage2)[off] = 0; } RTTestSubDone(hTest); } void tstASMMemZero32(void) { struct { uint64_t u64Magic1; uint8_t abPage[PAGE_SIZE - 32]; uint64_t u64Magic2; } Buf1, Buf2, Buf3; Buf1.u64Magic1 = UINT64_C(0xffffffffffffffff); memset(Buf1.abPage, 0x55, sizeof(Buf1.abPage)); Buf1.u64Magic2 = UINT64_C(0xffffffffffffffff); Buf2.u64Magic1 = UINT64_C(0xffffffffffffffff); memset(Buf2.abPage, 0x77, sizeof(Buf2.abPage)); Buf2.u64Magic2 = UINT64_C(0xffffffffffffffff); Buf3.u64Magic1 = UINT64_C(0xffffffffffffffff); memset(Buf3.abPage, 0x99, sizeof(Buf3.abPage)); Buf3.u64Magic2 = UINT64_C(0xffffffffffffffff); ASMMemZero32(Buf1.abPage, sizeof(Buf1.abPage)); ASMMemZero32(Buf2.abPage, sizeof(Buf2.abPage)); ASMMemZero32(Buf3.abPage, sizeof(Buf3.abPage)); if ( Buf1.u64Magic1 != UINT64_C(0xffffffffffffffff) || Buf1.u64Magic2 != UINT64_C(0xffffffffffffffff) || Buf2.u64Magic1 != UINT64_C(0xffffffffffffffff) || Buf2.u64Magic2 != UINT64_C(0xffffffffffffffff) || Buf3.u64Magic1 != UINT64_C(0xffffffffffffffff) || Buf3.u64Magic2 != UINT64_C(0xffffffffffffffff)) { RTPrintf("tstInlineAsm: ASMMemZero32 violated one/both magic(s)!\n"); RTTestIErrorInc(); } for (unsigned i = 0; i < RT_ELEMENTS(Buf1.abPage); i++) if (Buf1.abPage[i]) { RTPrintf("tstInlineAsm: ASMMemZero32 didn't clear byte at offset %#x!\n", i); RTTestIErrorInc(); } for (unsigned i = 0; i < RT_ELEMENTS(Buf2.abPage); i++) if (Buf2.abPage[i]) { RTPrintf("tstInlineAsm: ASMMemZero32 didn't clear byte at offset %#x!\n", i); RTTestIErrorInc(); } for (unsigned i = 0; i < RT_ELEMENTS(Buf3.abPage); i++) if (Buf3.abPage[i]) { RTPrintf("tstInlineAsm: ASMMemZero32 didn't clear byte at offset %#x!\n", i); RTTestIErrorInc(); } } void tstASMMemFill32(void) { struct { uint64_t u64Magic1; uint32_t au32Page[PAGE_SIZE / 4]; uint64_t u64Magic2; } Buf1; struct { uint64_t u64Magic1; uint32_t au32Page[(PAGE_SIZE / 4) - 3]; uint64_t u64Magic2; } Buf2; struct { uint64_t u64Magic1; uint32_t au32Page[(PAGE_SIZE / 4) - 1]; uint64_t u64Magic2; } Buf3; Buf1.u64Magic1 = UINT64_C(0xffffffffffffffff); memset(Buf1.au32Page, 0x55, sizeof(Buf1.au32Page)); Buf1.u64Magic2 = UINT64_C(0xffffffffffffffff); Buf2.u64Magic1 = UINT64_C(0xffffffffffffffff); memset(Buf2.au32Page, 0x77, sizeof(Buf2.au32Page)); Buf2.u64Magic2 = UINT64_C(0xffffffffffffffff); Buf3.u64Magic1 = UINT64_C(0xffffffffffffffff); memset(Buf3.au32Page, 0x99, sizeof(Buf3.au32Page)); Buf3.u64Magic2 = UINT64_C(0xffffffffffffffff); ASMMemFill32(Buf1.au32Page, sizeof(Buf1.au32Page), 0xdeadbeef); ASMMemFill32(Buf2.au32Page, sizeof(Buf2.au32Page), 0xcafeff01); ASMMemFill32(Buf3.au32Page, sizeof(Buf3.au32Page), 0xf00dd00f); if ( Buf1.u64Magic1 != UINT64_C(0xffffffffffffffff) || Buf1.u64Magic2 != UINT64_C(0xffffffffffffffff) || Buf2.u64Magic1 != UINT64_C(0xffffffffffffffff) || Buf2.u64Magic2 != UINT64_C(0xffffffffffffffff) || Buf3.u64Magic1 != UINT64_C(0xffffffffffffffff) || Buf3.u64Magic2 != UINT64_C(0xffffffffffffffff)) { RTPrintf("tstInlineAsm: ASMMemFill32 violated one/both magic(s)!\n"); RTTestIErrorInc(); } for (unsigned i = 0; i < RT_ELEMENTS(Buf1.au32Page); i++) if (Buf1.au32Page[i] != 0xdeadbeef) { RTPrintf("tstInlineAsm: ASMMemFill32 %#x: %#x exepcted %#x\n", i, Buf1.au32Page[i], 0xdeadbeef); RTTestIErrorInc(); } for (unsigned i = 0; i < RT_ELEMENTS(Buf2.au32Page); i++) if (Buf2.au32Page[i] != 0xcafeff01) { RTPrintf("tstInlineAsm: ASMMemFill32 %#x: %#x exepcted %#x\n", i, Buf2.au32Page[i], 0xcafeff01); RTTestIErrorInc(); } for (unsigned i = 0; i < RT_ELEMENTS(Buf3.au32Page); i++) if (Buf3.au32Page[i] != 0xf00dd00f) { RTPrintf("tstInlineAsm: ASMMemFill32 %#x: %#x exepcted %#x\n", i, Buf3.au32Page[i], 0xf00dd00f); RTTestIErrorInc(); } } void tstASMMath(void) { uint64_t u64 = ASMMult2xU32RetU64(UINT32_C(0x80000000), UINT32_C(0x10000000)); CHECKVAL(u64, UINT64_C(0x0800000000000000), "%#018RX64"); uint32_t u32 = ASMDivU64ByU32RetU32(UINT64_C(0x0800000000000000), UINT32_C(0x10000000)); CHECKVAL(u32, UINT32_C(0x80000000), "%#010RX32"); #if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) u64 = ASMMultU64ByU32DivByU32(UINT64_C(0x0000000000000001), UINT32_C(0x00000001), UINT32_C(0x00000001)); CHECKVAL(u64, UINT64_C(0x0000000000000001), "%#018RX64"); u64 = ASMMultU64ByU32DivByU32(UINT64_C(0x0000000100000000), UINT32_C(0x80000000), UINT32_C(0x00000002)); CHECKVAL(u64, UINT64_C(0x4000000000000000), "%#018RX64"); u64 = ASMMultU64ByU32DivByU32(UINT64_C(0xfedcba9876543210), UINT32_C(0xffffffff), UINT32_C(0xffffffff)); CHECKVAL(u64, UINT64_C(0xfedcba9876543210), "%#018RX64"); u64 = ASMMultU64ByU32DivByU32(UINT64_C(0xffffffffffffffff), UINT32_C(0xffffffff), UINT32_C(0xffffffff)); CHECKVAL(u64, UINT64_C(0xffffffffffffffff), "%#018RX64"); u64 = ASMMultU64ByU32DivByU32(UINT64_C(0xffffffffffffffff), UINT32_C(0xfffffff0), UINT32_C(0xffffffff)); CHECKVAL(u64, UINT64_C(0xfffffff0fffffff0), "%#018RX64"); u64 = ASMMultU64ByU32DivByU32(UINT64_C(0x3415934810359583), UINT32_C(0x58734981), UINT32_C(0xf8694045)); CHECKVAL(u64, UINT64_C(0x128b9c3d43184763), "%#018RX64"); u64 = ASMMultU64ByU32DivByU32(UINT64_C(0x3415934810359583), UINT32_C(0xf8694045), UINT32_C(0x58734981)); CHECKVAL(u64, UINT64_C(0x924719355cd35a27), "%#018RX64"); # if 0 /* bird: question is whether this should trap or not: * * frank: Of course it must trap: * * 0xfffffff8 * 0x77d7daf8 = 0x77d7daf441412840 * * During the following division, the quotient must fit into a 32-bit register. * Therefore the smallest valid divisor is * * (0x77d7daf441412840 >> 32) + 1 = 0x77d7daf5 * * which is definitely greater than 0x3b9aca00. * * bird: No, the C version does *not* crash. So, the question is whether there's any * code depending on it not crashing. * * Of course the assembly versions of the code crash right now for the reasons you've * given, but the 32-bit MSC version does not crash. * * frank: The C version does not crash but delivers incorrect results for this case. * The reason is * * u.s.Hi = (unsigned long)(u64Hi / u32C); * * Here the division is actually 64-bit by 64-bit but the 64-bit result is truncated * to 32 bit. If using this (optimized and fast) function we should just be sure that * the operands are in a valid range. */ u64 = ASMMultU64ByU32DivByU32(UINT64_C(0xfffffff8c65d6731), UINT32_C(0x77d7daf8), UINT32_C(0x3b9aca00)); CHECKVAL(u64, UINT64_C(0x02b8f9a2aa74e3dc), "%#018RX64"); # endif #endif /* AMD64 || X86 */ u32 = ASMModU64ByU32RetU32(UINT64_C(0x0ffffff8c65d6731), UINT32_C(0x77d7daf8)); CHECKVAL(u32, UINT32_C(0x3B642451), "%#010RX32"); int32_t i32; i32 = ASMModS64ByS32RetS32(INT64_C(-11), INT32_C(-2)); CHECKVAL(i32, INT32_C(-1), "%010RI32"); i32 = ASMModS64ByS32RetS32(INT64_C(-11), INT32_C(2)); CHECKVAL(i32, INT32_C(-1), "%010RI32"); i32 = ASMModS64ByS32RetS32(INT64_C(11), INT32_C(-2)); CHECKVAL(i32, INT32_C(1), "%010RI32"); i32 = ASMModS64ByS32RetS32(INT64_C(92233720368547758), INT32_C(2147483647)); CHECKVAL(i32, INT32_C(2104533974), "%010RI32"); i32 = ASMModS64ByS32RetS32(INT64_C(-92233720368547758), INT32_C(2147483647)); CHECKVAL(i32, INT32_C(-2104533974), "%010RI32"); } void tstASMByteSwap(void) { RTPrintf("tstInlineASM: TESTING - ASMByteSwap*\n"); uint64_t u64In = UINT64_C(0x0011223344556677); uint64_t u64Out = ASMByteSwapU64(u64In); CHECKVAL(u64In, UINT64_C(0x0011223344556677), "%#018RX64"); CHECKVAL(u64Out, UINT64_C(0x7766554433221100), "%#018RX64"); u64Out = ASMByteSwapU64(u64Out); CHECKVAL(u64Out, u64In, "%#018RX64"); u64In = UINT64_C(0x0123456789abcdef); u64Out = ASMByteSwapU64(u64In); CHECKVAL(u64In, UINT64_C(0x0123456789abcdef), "%#018RX64"); CHECKVAL(u64Out, UINT64_C(0xefcdab8967452301), "%#018RX64"); u64Out = ASMByteSwapU64(u64Out); CHECKVAL(u64Out, u64In, "%#018RX64"); u64In = 0; u64Out = ASMByteSwapU64(u64In); CHECKVAL(u64Out, u64In, "%#018RX64"); u64In = ~(uint64_t)0; u64Out = ASMByteSwapU64(u64In); CHECKVAL(u64Out, u64In, "%#018RX64"); uint32_t u32In = UINT32_C(0x00112233); uint32_t u32Out = ASMByteSwapU32(u32In); CHECKVAL(u32In, UINT32_C(0x00112233), "%#010RX32"); CHECKVAL(u32Out, UINT32_C(0x33221100), "%#010RX32"); u32Out = ASMByteSwapU32(u32Out); CHECKVAL(u32Out, u32In, "%#010RX32"); u32In = UINT32_C(0x12345678); u32Out = ASMByteSwapU32(u32In); CHECKVAL(u32In, UINT32_C(0x12345678), "%#010RX32"); CHECKVAL(u32Out, UINT32_C(0x78563412), "%#010RX32"); u32Out = ASMByteSwapU32(u32Out); CHECKVAL(u32Out, u32In, "%#010RX32"); u32In = 0; u32Out = ASMByteSwapU32(u32In); CHECKVAL(u32Out, u32In, "%#010RX32"); u32In = ~(uint32_t)0; u32Out = ASMByteSwapU32(u32In); CHECKVAL(u32Out, u32In, "%#010RX32"); uint16_t u16In = UINT16_C(0x0011); uint16_t u16Out = ASMByteSwapU16(u16In); CHECKVAL(u16In, UINT16_C(0x0011), "%#06RX16"); CHECKVAL(u16Out, UINT16_C(0x1100), "%#06RX16"); u16Out = ASMByteSwapU16(u16Out); CHECKVAL(u16Out, u16In, "%#06RX16"); u16In = UINT16_C(0x1234); u16Out = ASMByteSwapU16(u16In); CHECKVAL(u16In, UINT16_C(0x1234), "%#06RX16"); CHECKVAL(u16Out, UINT16_C(0x3412), "%#06RX16"); u16Out = ASMByteSwapU16(u16Out); CHECKVAL(u16Out, u16In, "%#06RX16"); u16In = 0; u16Out = ASMByteSwapU16(u16In); CHECKVAL(u16Out, u16In, "%#06RX16"); u16In = ~(uint16_t)0; u16Out = ASMByteSwapU16(u16In); CHECKVAL(u16Out, u16In, "%#06RX16"); } void tstASMBench(void) { /* * Make this static. We don't want to have this located on the stack. */ static uint8_t volatile s_u8; static int8_t volatile s_i8; static uint16_t volatile s_u16; static int16_t volatile s_i16; static uint32_t volatile s_u32; static int32_t volatile s_i32; static uint64_t volatile s_u64; static int64_t volatile s_i64; register unsigned i; const unsigned cRounds = 2000000; register uint64_t u64Elapsed; RTPrintf("tstInlineASM: Benchmarking:\n"); #if !defined(GCC44_32BIT_PIC) && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)) # define BENCH(op, str) \ do { \ RTThreadYield(); \ u64Elapsed = ASMReadTSC(); \ for (i = cRounds; i > 0; i--) \ op; \ u64Elapsed = ASMReadTSC() - u64Elapsed; \ RTPrintf(" %-30s %3llu cycles\n", str, u64Elapsed / cRounds); \ } while (0) #else # define BENCH(op, str) \ do { \ RTThreadYield(); \ u64Elapsed = RTTimeNanoTS(); \ for (i = cRounds; i > 0; i--) \ op; \ u64Elapsed = RTTimeNanoTS() - u64Elapsed; \ RTPrintf(" %-30s %3llu ns\n", str, u64Elapsed / cRounds); \ } while (0) #endif BENCH(s_u32 = 0, "s_u32 = 0:"); BENCH(ASMAtomicUoReadU8(&s_u8), "ASMAtomicUoReadU8:"); BENCH(ASMAtomicUoReadS8(&s_i8), "ASMAtomicUoReadS8:"); BENCH(ASMAtomicUoReadU16(&s_u16), "ASMAtomicUoReadU16:"); BENCH(ASMAtomicUoReadS16(&s_i16), "ASMAtomicUoReadS16:"); BENCH(ASMAtomicUoReadU32(&s_u32), "ASMAtomicUoReadU32:"); BENCH(ASMAtomicUoReadS32(&s_i32), "ASMAtomicUoReadS32:"); BENCH(ASMAtomicUoReadU64(&s_u64), "ASMAtomicUoReadU64:"); BENCH(ASMAtomicUoReadS64(&s_i64), "ASMAtomicUoReadS64:"); BENCH(ASMAtomicReadU8(&s_u8), "ASMAtomicReadU8:"); BENCH(ASMAtomicReadS8(&s_i8), "ASMAtomicReadS8:"); BENCH(ASMAtomicReadU16(&s_u16), "ASMAtomicReadU16:"); BENCH(ASMAtomicReadS16(&s_i16), "ASMAtomicReadS16:"); BENCH(ASMAtomicReadU32(&s_u32), "ASMAtomicReadU32:"); BENCH(ASMAtomicReadS32(&s_i32), "ASMAtomicReadS32:"); BENCH(ASMAtomicReadU64(&s_u64), "ASMAtomicReadU64:"); BENCH(ASMAtomicReadS64(&s_i64), "ASMAtomicReadS64:"); BENCH(ASMAtomicUoWriteU8(&s_u8, 0), "ASMAtomicUoWriteU8:"); BENCH(ASMAtomicUoWriteS8(&s_i8, 0), "ASMAtomicUoWriteS8:"); BENCH(ASMAtomicUoWriteU16(&s_u16, 0), "ASMAtomicUoWriteU16:"); BENCH(ASMAtomicUoWriteS16(&s_i16, 0), "ASMAtomicUoWriteS16:"); BENCH(ASMAtomicUoWriteU32(&s_u32, 0), "ASMAtomicUoWriteU32:"); BENCH(ASMAtomicUoWriteS32(&s_i32, 0), "ASMAtomicUoWriteS32:"); BENCH(ASMAtomicUoWriteU64(&s_u64, 0), "ASMAtomicUoWriteU64:"); BENCH(ASMAtomicUoWriteS64(&s_i64, 0), "ASMAtomicUoWriteS64:"); BENCH(ASMAtomicWriteU8(&s_u8, 0), "ASMAtomicWriteU8:"); BENCH(ASMAtomicWriteS8(&s_i8, 0), "ASMAtomicWriteS8:"); BENCH(ASMAtomicWriteU16(&s_u16, 0), "ASMAtomicWriteU16:"); BENCH(ASMAtomicWriteS16(&s_i16, 0), "ASMAtomicWriteS16:"); BENCH(ASMAtomicWriteU32(&s_u32, 0), "ASMAtomicWriteU32:"); BENCH(ASMAtomicWriteS32(&s_i32, 0), "ASMAtomicWriteS32:"); BENCH(ASMAtomicWriteU64(&s_u64, 0), "ASMAtomicWriteU64:"); BENCH(ASMAtomicWriteS64(&s_i64, 0), "ASMAtomicWriteS64:"); BENCH(ASMAtomicXchgU8(&s_u8, 0), "ASMAtomicXchgU8:"); BENCH(ASMAtomicXchgS8(&s_i8, 0), "ASMAtomicXchgS8:"); BENCH(ASMAtomicXchgU16(&s_u16, 0), "ASMAtomicXchgU16:"); BENCH(ASMAtomicXchgS16(&s_i16, 0), "ASMAtomicXchgS16:"); BENCH(ASMAtomicXchgU32(&s_u32, 0), "ASMAtomicXchgU32:"); BENCH(ASMAtomicXchgS32(&s_i32, 0), "ASMAtomicXchgS32:"); BENCH(ASMAtomicXchgU64(&s_u64, 0), "ASMAtomicXchgU64:"); BENCH(ASMAtomicXchgS64(&s_i64, 0), "ASMAtomicXchgS64:"); BENCH(ASMAtomicCmpXchgU32(&s_u32, 0, 0), "ASMAtomicCmpXchgU32:"); BENCH(ASMAtomicCmpXchgS32(&s_i32, 0, 0), "ASMAtomicCmpXchgS32:"); BENCH(ASMAtomicCmpXchgU64(&s_u64, 0, 0), "ASMAtomicCmpXchgU64:"); BENCH(ASMAtomicCmpXchgS64(&s_i64, 0, 0), "ASMAtomicCmpXchgS64:"); BENCH(ASMAtomicCmpXchgU32(&s_u32, 0, 1), "ASMAtomicCmpXchgU32/neg:"); BENCH(ASMAtomicCmpXchgS32(&s_i32, 0, 1), "ASMAtomicCmpXchgS32/neg:"); BENCH(ASMAtomicCmpXchgU64(&s_u64, 0, 1), "ASMAtomicCmpXchgU64/neg:"); BENCH(ASMAtomicCmpXchgS64(&s_i64, 0, 1), "ASMAtomicCmpXchgS64/neg:"); BENCH(ASMAtomicIncU32(&s_u32), "ASMAtomicIncU32:"); BENCH(ASMAtomicIncS32(&s_i32), "ASMAtomicIncS32:"); BENCH(ASMAtomicDecU32(&s_u32), "ASMAtomicDecU32:"); BENCH(ASMAtomicDecS32(&s_i32), "ASMAtomicDecS32:"); BENCH(ASMAtomicAddU32(&s_u32, 5), "ASMAtomicAddU32:"); BENCH(ASMAtomicAddS32(&s_i32, 5), "ASMAtomicAddS32:"); /* The Darwin gcc does not like this ... */ #if !defined(RT_OS_DARWIN) && !defined(GCC44_32BIT_PIC) && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)) BENCH(s_u8 = ASMGetApicId(), "ASMGetApicId:"); #endif RTPrintf("Done.\n"); #undef BENCH } int main(int argc, char *argv[]) { RTTEST hTest; int rc = RTTestInitAndCreate("tstInlineAsm", &hTest); if (rc) return rc; RTTestBanner(hTest); /* * Execute the tests. */ #if !defined(GCC44_32BIT_PIC) && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)) tstASMCpuId(); #endif tstASMAtomicXchgU8(); tstASMAtomicXchgU16(); tstASMAtomicXchgU32(); tstASMAtomicXchgU64(); tstASMAtomicXchgPtr(); tstASMAtomicCmpXchgU8(); tstASMAtomicCmpXchgU32(); tstASMAtomicCmpXchgU64(); tstASMAtomicCmpXchgExU32(); tstASMAtomicCmpXchgExU64(); tstASMAtomicReadU64(); tstASMAtomicUoReadU64(); tstASMAtomicAddS32(); tstASMAtomicDecIncS32(); tstASMAtomicAndOrU32(); tstASMMemZeroPage(); tstASMMemIsZeroPage(hTest); tstASMMemZero32(); tstASMMemFill32(); tstASMMath(); tstASMByteSwap(); tstASMBench(); /* * Show the result. */ return RTTestSummaryAndDestroy(hTest); }