/* $Id: cpu-numa.cpp 69111 2017-10-17 14:26:02Z vboxsync $ */ /** @file * numa - NUMA / memory benchmark. */ /* * Copyright (C) 2011-2017 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; * you can redistribute it and/or modify it under the terms of the GNU * General Public License (GPL) as published by the Free Software * Foundation, in version 2 as it comes in the "COPYING" file of the * VirtualBox OSE distribution. VirtualBox OSE is distributed in the * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. * * The contents of this file may alternatively be used under the terms * of the Common Development and Distribution License Version 1.0 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the * VirtualBox OSE distribution, in which case the provisions of the * CDDL are applicable instead of those of the GPL. * * You may elect to license modified versions of this file under the * terms and conditions of either the GPL or the CDDL or both. */ /********************************************************************************************************************************* * Header Files * *********************************************************************************************************************************/ #include #include //#if defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64) //# include //#endif #include #include #include #include #include /********************************************************************************************************************************* * Global Variables * *********************************************************************************************************************************/ /** The number of threads to skip when testing. */ static uint32_t g_cThreadsToSkip = 1; /** * Gets the next online CPU. * * @returns Next CPU index or RTCPUSET_MAX_CPUS. * @param iCurCpu The current CPU (index). */ static int getNextCpu(unsigned iCurCpu) { /* Skip to the next chip. */ iCurCpu = (iCurCpu / g_cThreadsToSkip) * g_cThreadsToSkip; iCurCpu += g_cThreadsToSkip; /* Skip offline cpus. */ while ( iCurCpu < RTCPUSET_MAX_CPUS && !RTMpIsCpuOnline(iCurCpu) ) iCurCpu++; /* Make sure we're within bounds (in case of bad input). */ if (iCurCpu > RTCPUSET_MAX_CPUS) iCurCpu = RTCPUSET_MAX_CPUS; return iCurCpu; } static void doTest(RTTEST hTest) { NOREF(hTest); uint32_t iAllocCpu = 0; while (iAllocCpu < RTCPUSET_MAX_CPUS) { const uint32_t cbTestSet = _1M * 32; const uint32_t cIterations = 384; /* * Change CPU and allocate a chunk of memory. */ RTTESTI_CHECK_RC_OK_RETV(RTThreadSetAffinityToCpu(RTMpCpuIdFromSetIndex(iAllocCpu))); void *pvTest = RTMemPageAlloc(cbTestSet); /* may be leaked, who cares */ RTTESTI_CHECK_RETV(pvTest != NULL); memset(pvTest, 0xef, cbTestSet); /* * Do the tests. */ uint32_t iAccessCpu = 0; while (iAccessCpu < RTCPUSET_MAX_CPUS) { RTTESTI_CHECK_RC_OK_RETV(RTThreadSetAffinityToCpu(RTMpCpuIdFromSetIndex(iAccessCpu))); /* * The write test. */ RTTimeNanoTS(); RTThreadYield(); uint64_t u64StartTS = RTTimeNanoTS(); for (uint32_t i = 0; i < cIterations; i++) { ASMCompilerBarrier(); /* paranoia */ memset(pvTest, i, cbTestSet); } uint64_t const cNsElapsedWrite = RTTimeNanoTS() - u64StartTS; uint64_t cMBPerSec = (uint64_t)( ((uint64_t)cIterations * cbTestSet) /* bytes */ / ((long double)cNsElapsedWrite / RT_NS_1SEC_64) /* seconds */ / _1M /* MB */ ); RTTestIValueF(cMBPerSec, RTTESTUNIT_MEGABYTES_PER_SEC, "cpu%02u-mem%02u-write", iAllocCpu, iAccessCpu); /* * The read test. */ memset(pvTest, 0, cbTestSet); RTTimeNanoTS(); RTThreadYield(); u64StartTS = RTTimeNanoTS(); for (uint32_t i = 0; i < cIterations; i++) { #if 1 size_t register u = 0; size_t volatile *puCur = (size_t volatile *)pvTest; size_t volatile *puEnd = puCur + cbTestSet / sizeof(size_t); while (puCur != puEnd) u += *puCur++; #else ASMCompilerBarrier(); /* paranoia */ void *pvFound = memchr(pvTest, (i & 127) + 1, cbTestSet); RTTESTI_CHECK(pvFound == NULL); #endif } uint64_t const cNsElapsedRead = RTTimeNanoTS() - u64StartTS; cMBPerSec = (uint64_t)( ((uint64_t)cIterations * cbTestSet) /* bytes */ / ((long double)cNsElapsedRead / RT_NS_1SEC_64) /* seconds */ / _1M /* MB */ ); RTTestIValueF(cMBPerSec, RTTESTUNIT_MEGABYTES_PER_SEC, "cpu%02u-mem%02u-read", iAllocCpu, iAccessCpu); /* * The read/write test. */ RTTimeNanoTS(); RTThreadYield(); u64StartTS = RTTimeNanoTS(); for (uint32_t i = 0; i < cIterations; i++) { ASMCompilerBarrier(); /* paranoia */ memcpy(pvTest, (uint8_t *)pvTest + cbTestSet / 2, cbTestSet / 2); } uint64_t const cNsElapsedRW = RTTimeNanoTS() - u64StartTS; cMBPerSec = (uint64_t)( ((uint64_t)cIterations * cbTestSet) /* bytes */ / ((long double)cNsElapsedRW / RT_NS_1SEC_64) /* seconds */ / _1M /* MB */ ); RTTestIValueF(cMBPerSec, RTTESTUNIT_MEGABYTES_PER_SEC, "cpu%02u-mem%02u-read-write", iAllocCpu, iAccessCpu); /* * Total time. */ RTTestIValueF(cNsElapsedRead + cNsElapsedWrite + cNsElapsedRW, RTTESTUNIT_NS, "cpu%02u-mem%02u-time", iAllocCpu, iAccessCpu); /* advance */ iAccessCpu = getNextCpu(iAccessCpu); } /* * Clean up and advance to the next CPU. */ RTMemPageFree(pvTest, cbTestSet); iAllocCpu = getNextCpu(iAllocCpu); } } int main(int argc, char **argv) { RTTEST hTest; RTEXITCODE rcExit = RTTestInitAndCreate("numa-1", &hTest); if (rcExit != RTEXITCODE_SUCCESS) return rcExit; RTTestBanner(hTest); #if defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64) /** @todo figure basic topology. */ #endif if (argc == 2) g_cThreadsToSkip = RTStrToUInt8(argv[1]); doTest(hTest); return RTTestSummaryAndDestroy(hTest); }