From b250ef67bea084c214f5c61f06eec9b9422e3d44 Mon Sep 17 00:00:00 2001 From: Bill Date: Wed, 2 Nov 2022 05:00:09 +0800 Subject: [PATCH] initial commit --- README.md | 24 +++++ bench.py | 63 ++++++++++++ gcc12.2.0deadlockbug.cpp | 26 +++++ test.cpp | 204 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 317 insertions(+) create mode 100644 README.md create mode 100644 bench.py create mode 100644 gcc12.2.0deadlockbug.cpp create mode 100644 test.cpp diff --git a/README.md b/README.md new file mode 100644 index 0000000..9b4f929 --- /dev/null +++ b/README.md @@ -0,0 +1,24 @@ +# Inter-thread Communication Benchmark + +This code tests the best way to collaborate between threads + +Platform tested: +- Windows (MSVC, gcc-12.2, WSL-gcc11.2, WSL-clang-14) +- macOS (clang-13, gcc-12.2) +- Linux (gcc-12.1, gcc-11.3, clang-12) +- arm64 Linux (gcc-12.1, clang-12) + +Method tested: +- C++20 std::semaphores +- Native semaphores +- Condition Variables +- Busy waiting (spinlock) with or without delay + +Results: + macOS linux win, msc win, gcc +cv 6000 10000 700 26000 +busy 360 300 550 300 +nsmph 600 4418 6000 1200 +smph 240 1000/dl 660 dl + +Bug on g++ 12.2.0 might cause deadlock, see: gcc12.2.0deadlockbug.cpp diff --git a/bench.py b/bench.py new file mode 100644 index 0000000..02caffc --- /dev/null +++ b/bench.py @@ -0,0 +1,63 @@ +import ctypes +import threading +import os +# os.add_dll_directory('c:/mingw64/bin') + +test = ctypes.CDLL('./a.so') +N = 100000 +print(0) +h = threading.Thread(target=test['loop_acquire'], args = (N,), daemon = True) +lock = test['acquire'] +h.start() +for _ in range(N): + lock() +h.join() + +print(1) +h = threading.Thread(target=test['loop_aacquire'], args = (N,), daemon = True) +lock = test['aacquire'] +h.start() +for _ in range(N): + lock() +h.join() + +print(2) +h = threading.Thread(target=test['loop_lock'], args = (N,), daemon = True) +lock = test['lock'] +h.start() +for _ in range(N): + lock() +h.join() + +print(3) +h = threading.Thread(target=test['loop_flag'], args = (N,), daemon = True) +set = test['set'] +h.start() +for _ in range(N): + set() +h.join() + +print(4) +h = threading.Thread(target=test['loop_slp'], args = (N, 0), daemon = True) +set = test['set'] +h.start() +for _ in range(N): + set() +h.join() + +#print(5) +#h = threading.Thread(target=test['loop_slp'], args = (N, 1), daemon = True) +#set = test['set'] +#h.start() +#for _ in range(N): +# set() +#h.join() + +print(6) +h = threading.Thread(target=test['loop_atomic'], args = (N, ), daemon = True) +set = test['atomic_set'] +h.start() +for _ in range(N): + set() +h.join() + diff --git a/gcc12.2.0deadlockbug.cpp b/gcc12.2.0deadlockbug.cpp new file mode 100644 index 0000000..0b62157 --- /dev/null +++ b/gcc12.2.0deadlockbug.cpp @@ -0,0 +1,26 @@ +#include +#include +constexpr int loop = 100000; +std::binary_semaphore a{0}, b{1}; + +void producer() { + for(int i = 0; i < loop; ++i) { + a.acquire(); + b.release(); + } +} + +void consumer() { + for(int i = 0; i < loop; ++i) { + b.acquire(); + a.release(); + } +} + +int main() { + std::thread t1(producer); + std::thread t2(consumer); + t1.join(); + t2.join(); + puts("done"); +} diff --git a/test.cpp b/test.cpp new file mode 100644 index 0000000..82970f4 --- /dev/null +++ b/test.cpp @@ -0,0 +1,204 @@ + +#ifdef _MSC_VER +#define EXPORT _declspec(dllexport) +#include +class A_Semaphore { +private: + HANDLE native_handle; +public: + A_Semaphore(bool v = false) { + native_handle = CreateSemaphore(NULL, v, 1, NULL); + } + void acquire() { + WaitForSingleObject(native_handle, INFINITE); + } + void release() { + ReleaseSemaphore(native_handle, 1, NULL); + } + ~A_Semaphore() { + CloseHandle(native_handle); + } +}; +#else +#define EXPORT +#ifdef __APPLE__ +#include +class A_Semaphore { +private: + dispatch_semaphore_t native_handle; +public: + A_Semaphore(bool v = false) { + native_handle = dispatch_semaphore_create(v); + } + void acquire() { + dispatch_semaphore_wait(&native_handle, DISPATCH_TIME_FOREVER); + } + void release() { + dispatch_semaphore_signal(&native_handle); + } + ~A_Semaphore() { + } +}; +#else +#include +class A_Semaphore { +private: + sem_t native_handle; +public: + A_Semaphore(bool v = false) { + sem_init(&native_handle, v, 1); + } + void acquire() { + sem_wait(&native_handle); + } + void release() { + sem_post(&native_handle); + } + ~A_Semaphore() { + sem_destroy(&native_handle); + } +}; +#endif +#endif +A_Semaphore pp{ 0 }, cc{ 1 }; +#include +#include +#include +#include +#include + +using namespace std; +using namespace std::chrono_literals; +mutex m; +condition_variable cv; +binary_semaphore producer{ 0 }, consumer{ 1 }; +chrono::high_resolution_clock::time_point now; +int idx; +bool ready = false; + +extern "C" EXPORT void acquire() { + consumer.acquire(); + // work + producer.release(); +} + +extern "C" EXPORT void loop_acquire(int n) { + int i = n; + chrono::nanoseconds sum = 0ns; + unsigned long long k = 0; + now = chrono::high_resolution_clock::now(); + while (i-- > 0) { + producer.acquire(); + // work + // printf("%d ", i); + consumer.release(); + } + sum += chrono::high_resolution_clock::now() - now; + printf("std::semaphore: %lld sum: %llu\n", sum.count(), k); +} + + +extern "C" EXPORT void aacquire() { + cc.acquire(); + // work + pp.release(); +} + +extern "C" EXPORT void loop_aacquire(int n) { + int i = n; + chrono::nanoseconds sum = 0ns; + unsigned long long k = 0; + now = chrono::high_resolution_clock::now(); + while (i-- > 0) { + pp.acquire(); + // work + // printf("%d ", i); + cc.release(); + } + sum += chrono::high_resolution_clock::now() - now; + printf("native semaphore: %lld sum: %llu\n", sum.count(), k); +} + +extern "C" EXPORT void lock(){ + static int n = 0; + unique_lock lk(m); + cv.wait(lk, [] { return ready; }); + ready = false; + lk.unlock(); + cv.notify_one(); +} + +extern "C" EXPORT void loop_lock(int n){ + int i = n; + chrono::nanoseconds sum = 0ns; + unsigned long long k = 0; + now = chrono::high_resolution_clock::now(); + while(i-- > 0){ + unique_lock lk(m); + ready = true; + lk.unlock(); + cv.notify_one(); + lk.lock(); + cv.wait(lk, [] {return !ready; }); + } + sum += chrono::high_resolution_clock::now() - now; + printf("lock: %lld sum: %llu\n", sum.count(), k); +} + +volatile bool flag = 0; +extern "C" EXPORT void set(){ + while (!flag); flag = false; +} + + +extern "C" EXPORT void loop_flag(int n){ + int i = n; + unsigned s = 0; + chrono::nanoseconds sum = 0ns; + now = chrono::high_resolution_clock::now(); + while(i > 0){ + if (!flag) { + flag = true; + --i; + } + } + sum += chrono::high_resolution_clock::now() - now; + printf("flag: %lld s: %u\n", sum.count(), s); +} + +#include +std::atomic af {0}; +extern "C" EXPORT void atomic_set(){ + while (!af); af = false; +} + + +extern "C" EXPORT void loop_atomic(int n){ + int i = n; + chrono::nanoseconds sum = 0ns; + now = chrono::high_resolution_clock::now(); + while(i > 0){ + if (!af) { + af = true; + --i; + } + } + sum += chrono::high_resolution_clock::now() - now; + printf("flag: %lld\n", sum.count()); +} + + +extern "C" EXPORT void loop_slp(int n, int slp){ + int i = n; + chrono::nanoseconds sum = 0ns; + now = chrono::high_resolution_clock::now(); + while(i > 0){ + if (!flag) { + flag = true; + --i; + } + std::this_thread::sleep_for(std::chrono::nanoseconds(slp)); + } + sum += chrono::high_resolution_clock::now() - now; + printf("flag: %lld slp: %d\n", sum.count(), slp); +}