diff --git a/Makefile b/Makefile index 7eea0e9..74ca2a1 100644 --- a/Makefile +++ b/Makefile @@ -93,7 +93,7 @@ ifeq ($(AQ_DEBUG), 1) OPTFLAGS = -g3 #-static-libsan -fsanitize=address # LINKFLAGS = else - OPTFLAGS += -Ofast -DNDEBUG -fno-stack-protector + OPTFLAGS += -Ofast -DNDEBUG -fno-stack-protector -fopenmp LINKFLAGS += -flto -s endif diff --git a/copyright_stamper.py b/copyright_stamper.py new file mode 100644 index 0000000..4263dbe --- /dev/null +++ b/copyright_stamper.py @@ -0,0 +1,53 @@ +import os + +payload = ('''\ +(C) Bill Sun 2022 - 2023 +All rights reserved. (or some other license stuff) +''' ).strip().split('\n') + +comment_factory = lambda mark, enclosure = '': (f'''\ +{enclosure}{mark} +{mark} {f'{chr(10)}{mark} '.join(payload)} +{mark}{enclosure}\n +''' ).encode() + +py_payload = comment_factory('#') +c_payload = comment_factory('*', '/') + +curr = ['.'] +while curr: + next = [] + for dir in curr: + items = os.listdir(dir) + for file in items: + fullpath = f'{dir}{os.sep}{file}' + if os.path.isdir(fullpath): + next.append(fullpath) + else: + def write_to_file(payload: str): + with open(fullpath, 'rb+') as f: + content = f.read() + if not content.startswith(payload): + f.seek(0) + f.write(payload + content) + print('processed', fullpath) + else: + print('not processed', fullpath) + + if ( + file.lower() == 'makefile' or + file.lower() == 'dockerfile' or + '.' in file and + file[file.rfind('.') + 1:].lower() + in + ['py', 'sh'] + ): + write_to_file(py_payload) + elif ( + '.' in file and + file[file.rfind('.') + 1:].lower() + in + ['cc', 'c', 'cpp', 'cxx', 'hpp', 'h'] + ): + write_to_file(c_payload) + curr = next diff --git a/engine/TODO.md b/engine/TODO.md index f0c16b4..45aa440 100644 --- a/engine/TODO.md +++ b/engine/TODO.md @@ -8,4 +8,8 @@ ## 2. ColRef supports multiple objects - A.a = B.b then in projection A.a B.b will refer to same projection - - Colref::ProjEq(ColRef v) => this == v or v in this.proj_eqs \ No newline at end of file + - Colref::ProjEq(ColRef v) => this == v or v in this.proj_eqs + +## 3. External Optimizing Tools + - mold/sold instead of builtin linker will boost linker speed + - bolt that have binary optimizer \ No newline at end of file diff --git a/engine/ast.py b/engine/ast.py index b666048..8ef2b17 100644 --- a/engine/ast.py +++ b/engine/ast.py @@ -566,6 +566,7 @@ class scan(ast_node): self.parent.context.scans.append(self) def produce(self, node): + self.start += '#pragma openmp simd\n' if self.loop_style == scan.LoopStyle.foreach: self.colref = node self.start += f'for ({self.const}auto& {self.it_var} : {node}) {{\n' @@ -595,7 +596,7 @@ class scan(ast_node): self.start + self.front + b + - '}' + '\n}' ) for b in self.body]) + self.end @@ -606,7 +607,7 @@ class scan(ast_node): self.start + self.front + '\n'.join(self.body) + - '}' + + '\n}' + self.end ) self.context.remove_scan(self, scan_assembly) @@ -657,11 +658,13 @@ class groupby_c(ast_node): self.context.emitc(f'AQHashTable<{self.group_type}, ' f'transTypes<{self.group_type}, hasher>> {self.group} {{{self.total_sz}}};') self.n_grps = len(self.glist) - self.scanner = scan(self, self.total_sz, it_name=scanner_itname) - self.scanner.add(f'{self.group}.hashtable_push(forward_as_tuple({g_contents}), {self.scanner.it_var});') - + + # self.scanner = scan(self, self.total_sz, it_name=scanner_itname) + # self.scanner.add(f'{self.group}.hashtable_push(forward_as_tuple({g_contents}), {self.scanner.it_var});') + self.context.emitc(f'{self.group}.hashtable_push_all({g_contents}, {self.total_sz});') + def consume(self, _): - self.scanner.finalize() + # self.scanner.finalize() self.context.emitc('printf("ht_construct: %lld\\n", (chrono::high_resolution_clock::now() - timer).count()); timer = chrono::high_resolution_clock::now();') self.context.emitc(f'auto {self.vecs} = {self.group}.ht_postproc({self.total_sz});') self.context.emitc('printf("ht_postproc: %lld\\n", (chrono::high_resolution_clock::now() - timer).count()); timer = chrono::high_resolution_clock::now();') diff --git a/sdk/aquery.h b/sdk/aquery.h index 42ce09a..13f9e13 100644 --- a/sdk/aquery.h +++ b/sdk/aquery.h @@ -1,13 +1,13 @@ #ifndef _AQUERY_H #define _AQUERY_H -enum Log_level { +enum Log_level : int { LOG_INFO, LOG_ERROR, LOG_SILENT }; -enum Backend_Type { +enum Backend_Type : int { BACKEND_AQuery, BACKEND_MonetDB, BACKEND_MariaDB diff --git a/server/DataSource_conn.h b/server/DataSource_conn.h index b6a51fc..7ca7fa7 100644 --- a/server/DataSource_conn.h +++ b/server/DataSource_conn.h @@ -14,7 +14,7 @@ struct AQQueryResult { #ifndef __AQBACKEND_TYPE__ #define __AQBACKEND_TYPE__ 1 -enum Backend_Type { +enum Backend_Type : int { BACKEND_AQuery, BACKEND_MonetDB, BACKEND_MariaDB, diff --git a/server/hasher.h b/server/hasher.h index 066fab3..e493f3e 100644 --- a/server/hasher.h +++ b/server/hasher.h @@ -1,3 +1,7 @@ +/* +* (C) Bill Sun 2022 - 2023 +*/ + #pragma once #include @@ -5,6 +9,7 @@ #include #include #include "types.h" +#include "vector_type.hpp" // #include "robin_hood.h" #include "unordered_dense.h" @@ -138,68 +143,145 @@ namespace ankerl::unordered_dense{ struct hash> : public hasher{ }; } +template +class AQHashTable : public ankerl::unordered_dense::set { +public: + uint32_t* reversemap, *mapbase, *ht_base; + AQHashTable() = default; + explicit AQHashTable(uint32_t sz) + : ankerl::unordered_dense::set{} { + this->reserve(sz); + this->m_values.reserve(sz); + reversemap = static_cast(malloc(sizeof(uint32_t) * sz * 2)); + mapbase = reversemap + sz; + ht_base = static_cast(calloc(sz, sizeof(uint32_t))); + } + + void init(uint32_t sz) { + ankerl::unordered_dense::set::reserve(sz); + reversemap = static_cast(malloc(sizeof(uint32_t) * sz * 2)); + mapbase = reversemap + sz; + ht_base = static_cast(calloc(sz, sizeof(uint32_t))); + } + + template + inline void hashtable_push_all(Keys_t& ... keys, uint32_t len) { + for(uint32_t i = 0; i < len; ++i) + reversemap[i] = ankerl::unordered_dense::set::hashtable_push(keys[i]...); + for(uint32_t i = 0; i < len; ++i) + ++ht_base[reversemap[i]]; + } + inline void hashtable_push(Key&& k, uint32_t i){ + reversemap[i] = ankerl::unordered_dense::set::hashtable_push(k); + ++ht_base[reversemap[i]]; // do this seperately? + } + + auto ht_postproc(uint32_t sz) { + auto& arr_values = this->values(); + const auto& len = this->size(); + + auto vecs = static_cast*>(malloc(sizeof(vector_type) * len)); + vecs[0].init_from(ht_base[0], mapbase); + for (uint32_t i = 1; i < len; ++i) { + vecs[i].init_from(ht_base[i], mapbase + ht_base[i - 1]); + ht_base[i] += ht_base[i - 1]; + } + for (uint32_t i = 0; i < sz; ++i) { + auto id = reversemap[i]; + mapbase[--ht_base[id]] = i; + } + return vecs; + } +}; + + template < - typename ValueType = bool, + typename ValueType = uint32_t, int PerfectHashingThreshold = 12 > struct PerfectHashTable { - // static int m_PerfectHashingThreshold = 12; - using key_t = std::conditional_t>>; - - int n_cols, n_rows = 0; - // char bits[32]; - ValueType table[1 << PerfectHashingThreshold]; - // PerfectHashTable(int n_cols, char* bits) { - // this->n_cols = n_cols; - // memcpy(this->bits, bits, 32); - // } - // template class VT> - // PerfectHashTable(VT ... args) { - - // } + constexpr static uint32_t tbl_sz = 1 << PerfectHashingThreshold; template class VT> - // std::enable_if_t, void> - void + static vector_type* construct(VT&... args) { // construct a hash set - ((this->n_cols = args.size), ...); + AQTmr(); + int n_cols, n_rows = 0; + + ((n_cols = args.size), ...); static_assert( (sizeof...(Types) < PerfectHashingThreshold) && - //(sizeof(Types) + ...) < PerfectHashingThreshold && (std::is_integral_v && ...), "Types must be integral and less than 12 wide in total." ); - // this should be an attrib of VT. - key_t* // this better be automatically determined by Threshould - hash_values = static_cast( - calloc(this->n_cols, sizeof(key_t)) - ); - //new short[this->n_cols] {0}; // use calloc/delete + key_t* + hash_values = static_cast( + calloc(n_cols, sizeof(key_t)) + ); auto get_hash = [&hash_values](auto& arg, int idx) { - uint32_t i = 0; - if(idx > 0) - for (auto& a : arg) { + + if (idx > 0) { +#pragma omp simd + for (uint32_t i = 0; i < arg.size; ++i) { hash_values[i] = (hash_values[i] << arg.stats.bits) + - (a - arg.stats.minima); - ++i; + (arg.container[i] - arg.stats.minima); } - else - for (auto& a : arg) { - hash_values[i] = a - arg.stats.minima; - ++i; + } + else { +#pragma omp simd + for (uint32_t i = 0; i < arg.size; ++i) { + hash_values[i] = arg.container[i] - arg.stats.minima; + } } - }; + }; int idx = 0; (get_hash(args, idx++), ...); - for (uint32_t i = 0; i < this->n_cols; ++i) { - this->table[hash_values[i]] = true; - // problem: random memory access + uint32_t cnt[tbl_sz]; + uint32_t n_grps = 0; + memset(cnt, 0, tbl_sz * sizeof(tbl_sz)); +#pragma omp simd + for (uint32_t i = 0; i < n_cols; ++i) { + ++cnt[hash_values[i]]; + } + ValueType grp_ids[tbl_sz]; +#pragma omp simd + for (ValueType i = 0; i < tbl_sz; ++i) { + if (cnt[i] != 0) { + cnt[n_grps] = cnt[i]; + grp_ids[i] = n_grps++; + } + } + uint32_t* idxs = static_cast( + malloc(n_cols * sizeof(uint32_t)) + ); + uint32_t** idxs_ptr = static_cast( + malloc(n_grps * sizeof(uint32_t*)) + ); + idxs_ptr[0] = idxs; +#ifdef _MSCVER +#pragma omp simd +#endif + for (int i = 1; i < n_grps; ++i) { + idxs_ptr[i] = idxs_ptr[i - 1] + cnt[i - 1]; + } +#pragma omp simd + for (int i = 0; i < n_cols; ++i) { + *(idxs_ptr[grp_ids[hash_values[i]]]++) = i; + } + vector_type* idxs_vec = static_cast*>( + malloc(n_grps * sizeof(vector_type)) + ); +#pragma omp simd + for (int i = 0; i < n_grps; ++i) { + idxs_vec[i].container = idxs_ptr[i]; + idxs_vec[i].size = cnt[i]; } - // delete[] hash_values; - free(hash_values); // dispatch to gc + free(hash_values); + return idxs_vec; } }; diff --git a/server/libaquery.h b/server/libaquery.h index d08d130..4be5577 100644 --- a/server/libaquery.h +++ b/server/libaquery.h @@ -57,7 +57,7 @@ char* intToString(T val, char* buf){ } -enum Log_level { +enum Log_level : int { LOG_INFO, LOG_ERROR, LOG_SILENT @@ -65,7 +65,7 @@ enum Log_level { #ifndef __AQBACKEND_TYPE__ #define __AQBACKEND_TYPE__ 1 -enum Backend_Type { +enum Backend_Type : int { BACKEND_AQuery, BACKEND_MonetDB, BACKEND_MariaDB, diff --git a/server/monetdb_ext.c b/server/monetdb_ext.c index 89c7441..8331f35 100644 --- a/server/monetdb_ext.c +++ b/server/monetdb_ext.c @@ -1,3 +1,7 @@ +/* +* (C) Bill Sun 2022 - 2023 +*/ + #include "pch_msc.hpp" // Non-standard Extensions for MonetDBe, may break concurrency control! diff --git a/server/server.cpp b/server/server.cpp index 57a9164..d0c8c00 100644 --- a/server/server.cpp +++ b/server/server.cpp @@ -80,7 +80,7 @@ extern "C" void __DLLEXPORT__ receive_args(int argc, char**argv){ n_recvd = argv; } -enum BinaryInfo_t { +enum BinaryInfo_t : int { // For ABI consistency between compiler MSVC, MSYS, GCC, CLANG, AppleClang }; diff --git a/server/table.h b/server/table.h index 6fadd84..0bbc638 100644 --- a/server/table.h +++ b/server/table.h @@ -35,7 +35,7 @@ struct ColRef_cstorage { void* container; unsigned int size, capacity; const char* name; - int ty; // what if enum is not int? + int ty; }; template