/* * (C) Bill Sun 2022 - 2023 */ #pragma once #include #include #include #include #include "types.h" #include "vector_type.hpp" // #include "robin_hood.h" #include "unordered_dense.h" template using aq_map = ankerl::unordered_dense::map; template using aq_set = ankerl::unordered_dense::set; // only works for 64 bit systems namespace hasher_consts{ constexpr size_t _FNV_offset_basis = 14695981039346656037ULL; constexpr size_t _FNV_prime = 1099511628211ULL; } inline size_t append_bytes(const unsigned char* _First) noexcept { size_t _Val = hasher_consts::_FNV_offset_basis; for (; *_First; ++_First) { _Val ^= static_cast(*_First); _Val *= hasher_consts::_FNV_prime; } return _Val; } inline size_t append_bytes(const astring_view& view) noexcept { return append_bytes(view.str); } #ifdef __SIZEOF_INT128__ union int128_struct { struct { uint64_t low, high; }__struct; __int128_t value = 0; __uint128_t uvalue; constexpr int128_struct() : value(0) {} constexpr int128_struct(const __int128_t &value) noexcept : value(value) {} constexpr int128_struct(const __uint128_t &value) noexcept : uvalue(value) {} operator __int128_t () const { return value; } operator __uint128_t () const { return uvalue; } operator __int128_t& () { return value; } operator __uint128_t& () { return uvalue; } }; #endif template struct hasher { template typename std::enable_if< i == sizeof...(Types), size_t>::type hashi(const std::tuple&) const { return 534235245539ULL; } template typename std::enable_if < i < sizeof ...(Types), size_t>::type hashi(const std::tuple& record) const { using current_type = typename std::decay>::type>::type; #ifdef __SIZEOF_INT128__ using _current_type = typename std::conditional_t< std::is_same_v || std::is_same_v, int128_struct, current_type>; #else #define _current_type current_type #endif return ankerl::unordered_dense::hash<_current_type>()(std::get(record)) ^ hashi(record); } size_t operator()(const std::tuple& record) const { return hashi(record); } }; template struct hasher{ size_t operator()(const std::tuple& record) const { return ankerl::unordered_dense::hash()(std::get<0>(record)); } }; namespace ankerl::unordered_dense{ template<> struct hash { size_t operator()(const astring_view& _Keyval) const noexcept { return ankerl::unordered_dense::hash()(_Keyval.rstr); //return append_bytes(_Keyval.str); } }; template<> struct hash { size_t operator() (const types::date_t& _Keyval) const noexcept { return ankerl::unordered_dense::hash()(*(unsigned int*)(&_Keyval)); } }; template<> struct hash { size_t operator() (const types::time_t& _Keyval) const noexcept { return ankerl::unordered_dense::hash()(_Keyval.ms) ^ ankerl::unordered_dense::hash()(_Keyval.seconds) ^ ankerl::unordered_dense::hash()(_Keyval.minutes) ^ ankerl::unordered_dense::hash()(_Keyval.hours) ; } }; template<> struct hash{ size_t operator() (const types::timestamp_t& _Keyval) const noexcept { return ankerl::unordered_dense::hash()(_Keyval.date) ^ ankerl::unordered_dense::hash()(_Keyval.time); } }; #ifdef __SIZEOF_INT128__ template<> struct hash{ size_t operator() (const int128_struct& _Keyval) const noexcept { return ankerl::unordered_dense::hash()(_Keyval.__struct.low) ^ ankerl::unordered_dense::hash()(_Keyval.__struct.high); } }; #endif template struct hash> : public hasher{ }; } template class AQHashTable : public ankerl::unordered_dense::set { public: uint32_t* reversemap, *mapbase, *ht_base; AQHashTable() = default; explicit AQHashTable(uint32_t sz) : ankerl::unordered_dense::set{} { this->reserve(sz); this->m_values.reserve(sz); reversemap = static_cast(malloc(sizeof(uint32_t) * sz * 2)); mapbase = reversemap + sz; ht_base = static_cast(calloc(sz, sizeof(uint32_t))); } void init(uint32_t sz) { ankerl::unordered_dense::set::reserve(sz); reversemap = static_cast(malloc(sizeof(uint32_t) * sz * 2)); mapbase = reversemap + sz; ht_base = static_cast(calloc(sz, sizeof(uint32_t))); } template inline void hashtable_push_all(Keys_t& ... keys, uint32_t len) { #pragma omp simd for(uint32_t i = 0; i < len; ++i) reversemap[i] = ankerl::unordered_dense::set::hashtable_push(keys[i]...); #pragma omp simd for(uint32_t i = 0; i < len; ++i) ++ht_base[reversemap[i]]; } inline void hashtable_push(Key&& k, uint32_t i){ reversemap[i] = ankerl::unordered_dense::set::hashtable_push(k); ++ht_base[reversemap[i]]; // do this seperately? } auto ht_postproc(uint32_t sz) { auto& arr_values = this->values(); const auto& len = this->size(); auto vecs = static_cast*>(malloc(sizeof(vector_type) * len)); vecs[0].init_from(ht_base[0], mapbase); #pragma omp simd for (uint32_t i = 1; i < len; ++i) { vecs[i].init_from(ht_base[i], mapbase + ht_base[i - 1]); ht_base[i] += ht_base[i - 1]; } #pragma omp simd for (uint32_t i = 0; i < sz; ++i) { auto id = reversemap[i]; mapbase[--ht_base[id]] = i; } return vecs; } }; template struct HashTableComponents { uint32_t size; std::vector>* keys; vector_type* values; uint32_t* offsets; }; template < typename ValueType = uint32_t, int PerfectHashingThreshold = 18 > // default < 1M table size struct PerfectHashTable { using key_t = std::conditional_t>>; constexpr static uint32_t tbl_sz = 1 << PerfectHashingThreshold; template class VT> static HashTableComponents //vector_type* construct(VT&... args) { // construct a hash set // AQTmr(); int n_cols, n_rows = 0; ((n_cols = args.size), ...); static_assert( (sizeof...(Types) < PerfectHashingThreshold) && (std::is_integral_v && ...), "Types must be integral and less than \"PerfectHashingThreshold\" wide in total." ); key_t* hash_values = static_cast( calloc(n_cols, sizeof(key_t)) ); auto get_hash = [&hash_values](auto& arg, int idx) { if (idx > 0) { #pragma omp simd for (uint32_t i = 0; i < arg.size; ++i) { hash_values[i] = (hash_values[i] << arg.stats.bits) + (arg.container[i] - arg.stats.minima); } } else { #pragma omp simd for (uint32_t i = 0; i < arg.size; ++i) { hash_values[i] = arg.container[i] - arg.stats.minima; } } }; int idx = 0; (get_hash(args, idx++), ...); uint32_t *cnt_ext = static_cast( calloc(tbl_sz, sizeof(uint32_t)) ), *cnt = cnt_ext + 1; uint32_t n_grps = 0; #pragma omp simd for (uint32_t i = 0; i < n_cols; ++i) { ++cnt[hash_values[i]]; } ValueType grp_ids[tbl_sz]; #pragma omp simd for (ValueType i = 0; i < tbl_sz; ++i) { if (cnt[i] != 0) { cnt[n_grps] = cnt[i]; grp_ids[i] = n_grps++; } } std::vector>* keys = new std::vector>(n_grps); // Memory leak here, cleanup after module is done. const char bits[] = {0, args.stats.bits ... }; auto decode = [](auto &val, const char prev, const char curr) -> Ret { val >>= prev; const auto mask = (1 << curr) - 1; return val & mask; }; #pragma omp simd for (ValueType i = 0; i < n_grps; ++ i) { int idx2 = 1; ValueType curr_val = grp_ids[i]; keys[i] = std::make_tuple(( decode.template operator()( curr_val, bits[idx2 - 1], bits[idx2++] ), ...) ); // require C++20 for the calls to be executed sequentially. } uint32_t* idxs = static_cast( malloc(n_cols * sizeof(uint32_t)) ); uint32_t** idxs_ptr = static_cast( malloc(n_grps * sizeof(uint32_t*)) ); idxs_ptr[0] = idxs; #ifdef _MSCVER #pragma omp simd #endif for (int i = 1; i < n_grps; ++i) { idxs_ptr[i] = idxs_ptr[i - 1] + cnt[i - 1]; } #pragma omp simd for (int i = 0; i < n_cols; ++i) { *(idxs_ptr[grp_ids[hash_values[i]]]++) = i; } vector_type* idxs_vec = static_cast*>( malloc(n_grps * sizeof(vector_type)) ); #pragma omp simd for (int i = 0; i < n_grps; ++i) { idxs_vec[i].container = idxs_ptr[i]; idxs_vec[i].size = cnt[i]; } GC::gc_handle->reg(hash_values); #pragma omp simd for(int i = 1; i < n_grps; ++ i) cnt[i] += cnt[i - 1]; cnt_ext[0] = 0; return {.size = n_grps, .keys = keys, .values = idxs_vec, .offset = cnt_ext}; } }; template class ColRef; template < class Key, class Hash, int PerfectHashingThreshold = 18 > class HashTableFactory { public: template static HashTableComponents get(ColRef& ... cols) { // To use Perfect Hash Table if constexpr ((std::is_integral_v && ...)) { if ((cols.stats.bits + ...) <= PerfectHashingThreshold) { return PerfectHashTable< uint32_t, PerfectHashingThreshold >::construct(cols ...); } } // Fallback to regular hash table int n_rows = 0; ((n_rows = cols.size), ...); AQHashTable ht{n_rows}; ht.template hashtable_push_all ...>(cols ..., n_rows); auto vals = ht.ht_postproc(n_rows); return {.size = ht.size(), .keys = ht.values(), .values = vals, .offset = ht.ht_base}; } };