From ef7b8c68d52fcb0d8efa378fa00b9ebb11c474d5 Mon Sep 17 00:00:00 2001 From: Lukas W Date: Wed, 18 Apr 2018 08:18:23 +0200 Subject: [PATCH] Add naive benchmarks --- CMakeLists.txt | 2 + benchmarks/CMakeLists.txt | 15 +++++ benchmarks/benchmark.cpp | 130 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 147 insertions(+) create mode 100644 benchmarks/CMakeLists.txt create mode 100644 benchmarks/benchmark.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index bd9d376e2..387b926d7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -140,6 +140,7 @@ CHECK_INCLUDE_FILES(locale.h LMMS_HAVE_LOCALE_H) LIST(APPEND CMAKE_PREFIX_PATH "${CMAKE_INSTALL_PREFIX}") + FIND_PACKAGE(Qt5 COMPONENTS Core Gui Widgets Xml REQUIRED) FIND_PACKAGE(Qt5 COMPONENTS LinguistTools QUIET) @@ -561,6 +562,7 @@ include(CompileCache) ADD_SUBDIRECTORY(cmake) ADD_SUBDIRECTORY(src) ADD_SUBDIRECTORY(plugins) +ADD_SUBDIRECTORY(benchmarks) ADD_SUBDIRECTORY(tests) ADD_SUBDIRECTORY(data) ADD_SUBDIRECTORY(doc) diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt new file mode 100644 index 000000000..49cc4ee2f --- /dev/null +++ b/benchmarks/CMakeLists.txt @@ -0,0 +1,15 @@ +INCLUDE_DIRECTORIES("${CMAKE_CURRENT_SOURCE_DIR}") +INCLUDE_DIRECTORIES("${CMAKE_CURRENT_BINARY_DIR}") +INCLUDE_DIRECTORIES("${CMAKE_SOURCE_DIR}/include") +INCLUDE_DIRECTORIES("${CMAKE_BINARY_DIR}") + +SET(CMAKE_CXX_STANDARD 11) + +SET(CMAKE_AUTOMOC ON) + +ADD_EXECUTABLE(benchmarks + EXCLUDE_FROM_ALL + benchmark.cpp + $ +) +TARGET_LINK_LIBRARIES(benchmarks ${QT_LIBRARIES} ${LMMS_REQUIRED_LIBS}) diff --git a/benchmarks/benchmark.cpp b/benchmarks/benchmark.cpp new file mode 100644 index 000000000..da4af8f88 --- /dev/null +++ b/benchmarks/benchmark.cpp @@ -0,0 +1,130 @@ +#include + +#include +#include + +#include "libcds.h" +#include + +#include "Engine.h" +#include "PerfLog.h" + +#include "LocklessList.h" +#include "MemoryPool.h" + +#include "NotePlayHandle.h" + +template +using LocklessQueue = cds::container::VyukovMPMCCycleQueue; + +template +void benchmark_allocator(QString name, Alloc&& alloc, size_t n, size_t I) +{ + using T = typename Alloc::value_type; + constexpr size_t S = sizeof(T); + + std::vector ptrs{n}; + PerfLogTimer timer(QString("Allocate: %1 x %2 x %3 bytes, %4") + .arg(I).arg(n).arg(S).arg(name)); + + for (size_t i=0; i < I; i++) + { + for (size_t j=0; j < n; j++) { + ptrs[j] = alloc.allocate(1); + } + for (size_t j=0; j < n; j++) { + alloc.deallocate(ptrs[j], 1); + } + } +} + + +template +void benchmark_allocator_threaded(QString name, Alloc&& alloc, size_t n, size_t t) +{ + using T = typename Alloc::value_type; + constexpr size_t S = sizeof(T); + + LocklessQueue ptrs{n}; + + PerfLogTimer timer(QString("Allocate multi-threaded: %1 x %2 bytes using %3 threads, %4") + .arg(n).arg(S).arg(t).arg(name)); + + std::vector threads; threads.reserve(t*2); + + std::atomic_uint_fast64_t allocated{0}; + std::atomic_uint_fast64_t deallocated{0}; + + for (size_t i=0; i < t; i++) { + threads.emplace_back([&]() { + while(allocated++ < n) { + auto ptr = alloc.allocate(1); + ptrs.push(ptr); + } + }); + } + for (size_t i=0; i < t; i++) { + threads.emplace_back([&]() { + while(deallocated++ < n) { + T* ptr; + while (! ptrs.pop(ptr)); + alloc.deallocate(ptr, 1); + } + }); + } + + for (std::thread& thread : threads) { + thread.join(); + } +} + +int main(int argc, char* argv[]) +{ + new QCoreApplication(argc, argv); + + using Stack = LocklessList; + { + size_t n = 100 * 1000 * 1000; + Stack stack{n}; + PerfLogTimer timer("LocklessList: Insert 100m entries, single-threaded, pre-allocated"); + for (size_t i=0; i < n; i++) { + stack.push(i); + } + } + { + size_t n = 50 * 1000 * 1000; + size_t t = 5; + Stack stack{n}; + std::vector threads; threads.reserve(t); + PerfLogTimer timer("LocklessList: Push 50m entries, multi-threaded, pre-allocated"); + + for (int i=0; i < 5; i++) { + threads.emplace_back([&]() { + for (size_t j=0; j < n / t; j++) { + stack.push(j); + } + }); + } + + for (int i=0; i < 5; i++) { + threads.at(i).join(); + } + } + + { + size_t n = 10 * 1000 * 1000; + constexpr size_t S = 256; + using T = std::array; + benchmark_allocator("std::allocator", std::allocator{}, n, 1); + benchmark_allocator("MmAllocator", MmAllocator{}, n, 1); + benchmark_allocator("MemoryPool", MemoryPool{n}, n, 1); + } + { + size_t n = 10 * 1000 * 1000; + constexpr size_t S = 256; + using T = std::array; + benchmark_allocator_threaded("std::allocator", std::allocator{}, n, 4); + benchmark_allocator_threaded("MmAllocator", MmAllocator{}, n, 4); + benchmark_allocator_threaded("MemoryPool", MemoryPool{n}, n, 4); + } +}