Add naive benchmarks

2018-04-18 08:18:23 +02:00
parent 2cb4455a5f
commit ef7b8c68d5
3 changed files with 147 additions and 0 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -140,6 +140,7 @@ CHECK_INCLUDE_FILES(locale.h LMMS_HAVE_LOCALE_H)

 LIST(APPEND CMAKE_PREFIX_PATH "${CMAKE_INSTALL_PREFIX}")

+
 FIND_PACKAGE(Qt5 COMPONENTS Core Gui Widgets Xml REQUIRED)
 FIND_PACKAGE(Qt5 COMPONENTS LinguistTools QUIET)

@@ -561,6 +562,7 @@ include(CompileCache)
 ADD_SUBDIRECTORY(cmake)
 ADD_SUBDIRECTORY(src)
 ADD_SUBDIRECTORY(plugins)
+ADD_SUBDIRECTORY(benchmarks)
 ADD_SUBDIRECTORY(tests)
 ADD_SUBDIRECTORY(data)
 ADD_SUBDIRECTORY(doc)
--- a/benchmarks/CMakeLists.txt
+++ b/benchmarks/CMakeLists.txt
@@ -0,0 +1,15 @@
+INCLUDE_DIRECTORIES("${CMAKE_CURRENT_SOURCE_DIR}")
+INCLUDE_DIRECTORIES("${CMAKE_CURRENT_BINARY_DIR}")
+INCLUDE_DIRECTORIES("${CMAKE_SOURCE_DIR}/include")
+INCLUDE_DIRECTORIES("${CMAKE_BINARY_DIR}")
+
+SET(CMAKE_CXX_STANDARD 11)
+
+SET(CMAKE_AUTOMOC ON)
+
+ADD_EXECUTABLE(benchmarks
+	EXCLUDE_FROM_ALL
+	benchmark.cpp
+	$<TARGET_OBJECTS:lmmsobjs>
+)
+TARGET_LINK_LIBRARIES(benchmarks ${QT_LIBRARIES} ${LMMS_REQUIRED_LIBS})
--- a/benchmarks/benchmark.cpp
+++ b/benchmarks/benchmark.cpp
@@ -0,0 +1,130 @@
+#include <thread>
+
+#include <QtCore/QCoreApplication>
+#include <QtCore/QDebug>
+
+#include "libcds.h"
+#include <cds/container/vyukov_mpmc_cycle_queue.h>
+
+#include "Engine.h"
+#include "PerfLog.h"
+
+#include "LocklessList.h"
+#include "MemoryPool.h"
+
+#include "NotePlayHandle.h"
+
+template<typename T>
+using LocklessQueue = cds::container::VyukovMPMCCycleQueue<T>;
+
+template<typename Alloc>
+void benchmark_allocator(QString name, Alloc&& alloc, size_t n, size_t I)
+{
+	using T = typename Alloc::value_type;
+	constexpr size_t S = sizeof(T);
+
+	std::vector<T*> ptrs{n};
+	PerfLogTimer timer(QString("Allocate: %1 x %2 x %3 bytes, %4")
+					   .arg(I).arg(n).arg(S).arg(name));
+
+	for (size_t i=0; i < I; i++)
+	{
+		for (size_t j=0; j < n; j++) {
+			ptrs[j] = alloc.allocate(1);
+		}
+		for (size_t j=0; j < n; j++) {
+			alloc.deallocate(ptrs[j], 1);
+		}
+	}
+}
+
+
+template<typename Alloc>
+void benchmark_allocator_threaded(QString name, Alloc&& alloc, size_t n, size_t t)
+{
+	using T = typename Alloc::value_type;
+	constexpr size_t S = sizeof(T);
+
+	LocklessQueue<T*> ptrs{n};
+
+	PerfLogTimer timer(QString("Allocate multi-threaded: %1 x %2 bytes using %3 threads, %4")
+					   .arg(n).arg(S).arg(t).arg(name));
+
+	std::vector<std::thread> threads; threads.reserve(t*2);
+
+	std::atomic_uint_fast64_t allocated{0};
+	std::atomic_uint_fast64_t deallocated{0};
+
+	for (size_t i=0; i < t; i++) {
+		threads.emplace_back([&]() {
+			while(allocated++ < n) {
+				auto ptr = alloc.allocate(1);
+				ptrs.push(ptr);
+			}
+		});
+	}
+	for (size_t i=0; i < t; i++) {
+		threads.emplace_back([&]() {
+			while(deallocated++ < n) {
+				T* ptr;
+				while (! ptrs.pop(ptr));
+				alloc.deallocate(ptr, 1);
+			}
+		});
+	}
+
+	for (std::thread& thread : threads) {
+		thread.join();
+	}
+}
+
+int main(int argc, char* argv[])
+{
+	new QCoreApplication(argc, argv);
+
+	using Stack = LocklessList<size_t>;
+	{
+		size_t n = 100 * 1000 * 1000;
+		Stack stack{n};
+		PerfLogTimer timer("LocklessList: Insert 100m entries, single-threaded, pre-allocated");
+		for (size_t i=0; i < n; i++) {
+			stack.push(i);
+		}
+	}
+	{
+		size_t n = 50 * 1000 * 1000;
+		size_t t = 5;
+		Stack stack{n};
+		std::vector<std::thread> threads; threads.reserve(t);
+		PerfLogTimer timer("LocklessList: Push 50m entries, multi-threaded, pre-allocated");
+
+		for (int i=0; i < 5; i++) {
+			threads.emplace_back([&]() {
+				for (size_t j=0; j < n / t; j++) {
+					stack.push(j);
+				}
+			});
+		}
+
+		for (int i=0; i < 5; i++) {
+			threads.at(i).join();
+		}
+	}
+
+	{
+		size_t n = 10 * 1000 * 1000;
+		constexpr size_t S = 256;
+		using T = std::array<char, S>;
+		benchmark_allocator("std::allocator", std::allocator<T>{}, n, 1);
+		benchmark_allocator("MmAllocator", MmAllocator<T>{}, n, 1);
+		benchmark_allocator("MemoryPool", MemoryPool<T>{n}, n, 1);
+	}
+	{
+		size_t n = 10 * 1000 * 1000;
+		constexpr size_t S = 256;
+		using T = std::array<char, S>;
+		benchmark_allocator_threaded("std::allocator", std::allocator<T>{}, n, 4);
+		benchmark_allocator_threaded("MmAllocator", MmAllocator<T>{}, n, 4);
+		benchmark_allocator_threaded("MemoryPool", MemoryPool<T>{n}, n, 4);
+	}
+}