commit
b250ef67be
@ -0,0 +1,24 @@
|
||||
# Inter-thread Communication Benchmark
|
||||
|
||||
This code tests the best way to collaborate between threads
|
||||
|
||||
Platform tested:
|
||||
- Windows (MSVC, gcc-12.2, WSL-gcc11.2, WSL-clang-14)
|
||||
- macOS (clang-13, gcc-12.2)
|
||||
- Linux (gcc-12.1, gcc-11.3, clang-12)
|
||||
- arm64 Linux (gcc-12.1, clang-12)
|
||||
|
||||
Method tested:
|
||||
- C++20 std::semaphores
|
||||
- Native semaphores
|
||||
- Condition Variables
|
||||
- Busy waiting (spinlock) with or without delay
|
||||
|
||||
Results:
|
||||
macOS linux win, msc win, gcc
|
||||
cv 6000 10000 700 26000
|
||||
busy 360 300 550 300
|
||||
nsmph 600 4418 6000 1200
|
||||
smph 240 1000/dl 660 dl
|
||||
|
||||
Bug on g++ 12.2.0 might cause deadlock, see: gcc12.2.0deadlockbug.cpp
|
@ -0,0 +1,63 @@
|
||||
import ctypes
|
||||
import threading
|
||||
import os
|
||||
# os.add_dll_directory('c:/mingw64/bin')
|
||||
|
||||
test = ctypes.CDLL('./a.so')
|
||||
N = 100000
|
||||
print(0)
|
||||
h = threading.Thread(target=test['loop_acquire'], args = (N,), daemon = True)
|
||||
lock = test['acquire']
|
||||
h.start()
|
||||
for _ in range(N):
|
||||
lock()
|
||||
h.join()
|
||||
|
||||
print(1)
|
||||
h = threading.Thread(target=test['loop_aacquire'], args = (N,), daemon = True)
|
||||
lock = test['aacquire']
|
||||
h.start()
|
||||
for _ in range(N):
|
||||
lock()
|
||||
h.join()
|
||||
|
||||
print(2)
|
||||
h = threading.Thread(target=test['loop_lock'], args = (N,), daemon = True)
|
||||
lock = test['lock']
|
||||
h.start()
|
||||
for _ in range(N):
|
||||
lock()
|
||||
h.join()
|
||||
|
||||
print(3)
|
||||
h = threading.Thread(target=test['loop_flag'], args = (N,), daemon = True)
|
||||
set = test['set']
|
||||
h.start()
|
||||
for _ in range(N):
|
||||
set()
|
||||
h.join()
|
||||
|
||||
print(4)
|
||||
h = threading.Thread(target=test['loop_slp'], args = (N, 0), daemon = True)
|
||||
set = test['set']
|
||||
h.start()
|
||||
for _ in range(N):
|
||||
set()
|
||||
h.join()
|
||||
|
||||
#print(5)
|
||||
#h = threading.Thread(target=test['loop_slp'], args = (N, 1), daemon = True)
|
||||
#set = test['set']
|
||||
#h.start()
|
||||
#for _ in range(N):
|
||||
# set()
|
||||
#h.join()
|
||||
|
||||
print(6)
|
||||
h = threading.Thread(target=test['loop_atomic'], args = (N, ), daemon = True)
|
||||
set = test['atomic_set']
|
||||
h.start()
|
||||
for _ in range(N):
|
||||
set()
|
||||
h.join()
|
||||
|
@ -0,0 +1,26 @@
|
||||
#include <thread>
|
||||
#include <semaphore>
|
||||
constexpr int loop = 100000;
|
||||
std::binary_semaphore a{0}, b{1};
|
||||
|
||||
void producer() {
|
||||
for(int i = 0; i < loop; ++i) {
|
||||
a.acquire();
|
||||
b.release();
|
||||
}
|
||||
}
|
||||
|
||||
void consumer() {
|
||||
for(int i = 0; i < loop; ++i) {
|
||||
b.acquire();
|
||||
a.release();
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
std::thread t1(producer);
|
||||
std::thread t2(consumer);
|
||||
t1.join();
|
||||
t2.join();
|
||||
puts("done");
|
||||
}
|
@ -0,0 +1,204 @@
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define EXPORT _declspec(dllexport)
|
||||
#include <Windows.h>
|
||||
class A_Semaphore {
|
||||
private:
|
||||
HANDLE native_handle;
|
||||
public:
|
||||
A_Semaphore(bool v = false) {
|
||||
native_handle = CreateSemaphore(NULL, v, 1, NULL);
|
||||
}
|
||||
void acquire() {
|
||||
WaitForSingleObject(native_handle, INFINITE);
|
||||
}
|
||||
void release() {
|
||||
ReleaseSemaphore(native_handle, 1, NULL);
|
||||
}
|
||||
~A_Semaphore() {
|
||||
CloseHandle(native_handle);
|
||||
}
|
||||
};
|
||||
#else
|
||||
#define EXPORT
|
||||
#ifdef __APPLE__
|
||||
#include <dispatch/dispatch.h>
|
||||
class A_Semaphore {
|
||||
private:
|
||||
dispatch_semaphore_t native_handle;
|
||||
public:
|
||||
A_Semaphore(bool v = false) {
|
||||
native_handle = dispatch_semaphore_create(v);
|
||||
}
|
||||
void acquire() {
|
||||
dispatch_semaphore_wait(&native_handle, DISPATCH_TIME_FOREVER);
|
||||
}
|
||||
void release() {
|
||||
dispatch_semaphore_signal(&native_handle);
|
||||
}
|
||||
~A_Semaphore() {
|
||||
}
|
||||
};
|
||||
#else
|
||||
#include <semaphore.h>
|
||||
class A_Semaphore {
|
||||
private:
|
||||
sem_t native_handle;
|
||||
public:
|
||||
A_Semaphore(bool v = false) {
|
||||
sem_init(&native_handle, v, 1);
|
||||
}
|
||||
void acquire() {
|
||||
sem_wait(&native_handle);
|
||||
}
|
||||
void release() {
|
||||
sem_post(&native_handle);
|
||||
}
|
||||
~A_Semaphore() {
|
||||
sem_destroy(&native_handle);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
#endif
|
||||
A_Semaphore pp{ 0 }, cc{ 1 };
|
||||
#include <mutex>
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
#include <condition_variable>
|
||||
#include <semaphore>
|
||||
|
||||
using namespace std;
|
||||
using namespace std::chrono_literals;
|
||||
mutex m;
|
||||
condition_variable cv;
|
||||
binary_semaphore producer{ 0 }, consumer{ 1 };
|
||||
chrono::high_resolution_clock::time_point now;
|
||||
int idx;
|
||||
bool ready = false;
|
||||
|
||||
extern "C" EXPORT void acquire() {
|
||||
consumer.acquire();
|
||||
// work
|
||||
producer.release();
|
||||
}
|
||||
|
||||
extern "C" EXPORT void loop_acquire(int n) {
|
||||
int i = n;
|
||||
chrono::nanoseconds sum = 0ns;
|
||||
unsigned long long k = 0;
|
||||
now = chrono::high_resolution_clock::now();
|
||||
while (i-- > 0) {
|
||||
producer.acquire();
|
||||
// work
|
||||
// printf("%d ", i);
|
||||
consumer.release();
|
||||
}
|
||||
sum += chrono::high_resolution_clock::now() - now;
|
||||
printf("std::semaphore: %lld sum: %llu\n", sum.count(), k);
|
||||
}
|
||||
|
||||
|
||||
extern "C" EXPORT void aacquire() {
|
||||
cc.acquire();
|
||||
// work
|
||||
pp.release();
|
||||
}
|
||||
|
||||
extern "C" EXPORT void loop_aacquire(int n) {
|
||||
int i = n;
|
||||
chrono::nanoseconds sum = 0ns;
|
||||
unsigned long long k = 0;
|
||||
now = chrono::high_resolution_clock::now();
|
||||
while (i-- > 0) {
|
||||
pp.acquire();
|
||||
// work
|
||||
// printf("%d ", i);
|
||||
cc.release();
|
||||
}
|
||||
sum += chrono::high_resolution_clock::now() - now;
|
||||
printf("native semaphore: %lld sum: %llu\n", sum.count(), k);
|
||||
}
|
||||
|
||||
extern "C" EXPORT void lock(){
|
||||
static int n = 0;
|
||||
unique_lock<mutex> lk(m);
|
||||
cv.wait(lk, [] { return ready; });
|
||||
ready = false;
|
||||
lk.unlock();
|
||||
cv.notify_one();
|
||||
}
|
||||
|
||||
extern "C" EXPORT void loop_lock(int n){
|
||||
int i = n;
|
||||
chrono::nanoseconds sum = 0ns;
|
||||
unsigned long long k = 0;
|
||||
now = chrono::high_resolution_clock::now();
|
||||
while(i-- > 0){
|
||||
unique_lock<mutex> lk(m);
|
||||
ready = true;
|
||||
lk.unlock();
|
||||
cv.notify_one();
|
||||
lk.lock();
|
||||
cv.wait(lk, [] {return !ready; });
|
||||
}
|
||||
sum += chrono::high_resolution_clock::now() - now;
|
||||
printf("lock: %lld sum: %llu\n", sum.count(), k);
|
||||
}
|
||||
|
||||
volatile bool flag = 0;
|
||||
extern "C" EXPORT void set(){
|
||||
while (!flag); flag = false;
|
||||
}
|
||||
|
||||
|
||||
extern "C" EXPORT void loop_flag(int n){
|
||||
int i = n;
|
||||
unsigned s = 0;
|
||||
chrono::nanoseconds sum = 0ns;
|
||||
now = chrono::high_resolution_clock::now();
|
||||
while(i > 0){
|
||||
if (!flag) {
|
||||
flag = true;
|
||||
--i;
|
||||
}
|
||||
}
|
||||
sum += chrono::high_resolution_clock::now() - now;
|
||||
printf("flag: %lld s: %u\n", sum.count(), s);
|
||||
}
|
||||
|
||||
#include <atomic>
|
||||
std::atomic<bool> af {0};
|
||||
extern "C" EXPORT void atomic_set(){
|
||||
while (!af); af = false;
|
||||
}
|
||||
|
||||
|
||||
extern "C" EXPORT void loop_atomic(int n){
|
||||
int i = n;
|
||||
chrono::nanoseconds sum = 0ns;
|
||||
now = chrono::high_resolution_clock::now();
|
||||
while(i > 0){
|
||||
if (!af) {
|
||||
af = true;
|
||||
--i;
|
||||
}
|
||||
}
|
||||
sum += chrono::high_resolution_clock::now() - now;
|
||||
printf("flag: %lld\n", sum.count());
|
||||
}
|
||||
|
||||
|
||||
extern "C" EXPORT void loop_slp(int n, int slp){
|
||||
int i = n;
|
||||
chrono::nanoseconds sum = 0ns;
|
||||
now = chrono::high_resolution_clock::now();
|
||||
while(i > 0){
|
||||
if (!flag) {
|
||||
flag = true;
|
||||
--i;
|
||||
}
|
||||
std::this_thread::sleep_for(std::chrono::nanoseconds(slp));
|
||||
}
|
||||
sum += chrono::high_resolution_clock::now() - now;
|
||||
printf("flag: %lld slp: %d\n", sum.count(), slp);
|
||||
}
|
Loading…
Reference in new issue