You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
AQuery/server/hasher.h

204 lines
5.9 KiB

2 years ago
#pragma once
#include <type_traits>
2 years ago
#include <tuple>
#include <functional>
#include <string_view>
#include "types.h"
// #include "robin_hood.h"
#include "unordered_dense.h"
2 years ago
2 years ago
template<typename Key, typename Val>
using aq_map = ankerl::unordered_dense::map<Key, Val>;
template<typename Key>
using aq_set = ankerl::unordered_dense::set<Key>;
// only works for 64 bit systems
namespace hasher_consts{
constexpr size_t _FNV_offset_basis = 14695981039346656037ULL;
constexpr size_t _FNV_prime = 1099511628211ULL;
}
inline size_t append_bytes(const unsigned char* _First) noexcept {
size_t _Val = hasher_consts::_FNV_offset_basis;
for (; *_First; ++_First) {
_Val ^= static_cast<size_t>(*_First);
_Val *= hasher_consts::_FNV_prime;
}
return _Val;
}
inline size_t append_bytes(const astring_view& view) noexcept {
return append_bytes(view.str);
}
#ifdef __SIZEOF_INT128__
union int128_struct
{
struct {
uint64_t low, high;
}__struct;
__int128_t value = 0;
__uint128_t uvalue;
constexpr int128_struct() : value(0) {}
constexpr int128_struct(const __int128_t &value) noexcept : value(value) {}
constexpr int128_struct(const __uint128_t &value) noexcept : uvalue(value) {}
operator __int128_t () const {
return value;
}
operator __uint128_t () const {
return uvalue;
}
operator __int128_t& () {
return value;
}
operator __uint128_t& () {
return uvalue;
}
};
#endif
template <class ...Types>
struct hasher {
template <size_t i = 0> typename std::enable_if< i == sizeof...(Types),
size_t>::type hashi(const std::tuple<Types...>&) const {
return 534235245539ULL;
}
template <size_t i = 0> typename std::enable_if < i < sizeof ...(Types),
size_t>::type hashi(const std::tuple<Types...>& record) const {
using current_type = typename std::decay<typename std::tuple_element<i, std::tuple<Types...>>::type>::type;
#ifdef __SIZEOF_INT128__
using _current_type = typename std::conditional_t<
std::is_same_v<current_type, __uint128_t> ||
std::is_same_v<current_type, __int128_t>,
int128_struct, current_type>;
#else
#define _current_type current_type
#endif
return ankerl::unordered_dense::hash<_current_type>()(std::get<i>(record)) ^ hashi<i + 1>(record);
}
size_t operator()(const std::tuple<Types...>& record) const {
return hashi(record);
}
};
template <class T>
struct hasher<T>{
size_t operator()(const std::tuple<T>& record) const {
return ankerl::unordered_dense::hash<T>()(std::get<0>(record));
}
};
namespace ankerl::unordered_dense{
2 years ago
template<>
struct hash<astring_view> {
size_t operator()(const astring_view& _Keyval) const noexcept {
return ankerl::unordered_dense::hash<std::string_view>()(_Keyval.rstr);
//return append_bytes(_Keyval.str);
2 years ago
}
};
2 years ago
template<>
struct hash<types::date_t> {
size_t operator() (const types::date_t& _Keyval) const noexcept {
return ankerl::unordered_dense::hash<unsigned int>()(*(unsigned int*)(&_Keyval));
2 years ago
}
};
2 years ago
template<>
struct hash<types::time_t> {
size_t operator() (const types::time_t& _Keyval) const noexcept {
return ankerl::unordered_dense::hash<unsigned int>()(_Keyval.ms) ^
ankerl::unordered_dense::hash<unsigned char>()(_Keyval.seconds) ^
ankerl::unordered_dense::hash<unsigned char>()(_Keyval.minutes) ^
ankerl::unordered_dense::hash<unsigned char>()(_Keyval.hours)
2 years ago
;
}
};
2 years ago
template<>
struct hash<types::timestamp_t>{
size_t operator() (const types::timestamp_t& _Keyval) const noexcept {
return ankerl::unordered_dense::hash<types::date_t>()(_Keyval.date) ^
ankerl::unordered_dense::hash<types::time_t>()(_Keyval.time);
2 years ago
}
};
#ifdef __SIZEOF_INT128__
2 years ago
template<>
struct hash<int128_struct>{
size_t operator() (const int128_struct& _Keyval) const noexcept {
return ankerl::unordered_dense::hash<uint64_t>()(_Keyval.__struct.low) ^ ankerl::unordered_dense::hash<uint64_t>()(_Keyval.__struct.high);
}
};
#endif
template <class ...Types>
struct hash<std::tuple<Types...>> : public hasher<Types...>{ };
}
2 years ago
template <
typename ValueType = bool,
int PerfectHashingThreshold = 12
>
struct PerfectHashTable {
// static int m_PerfectHashingThreshold = 12;
using key_t = std::conditional_t<PerfectHashingThreshold <= 8, uint8_t,
std::conditional_t<PerfectHashingThreshold <= 16, uint16_t,
std::conditional_t<PerfectHashingThreshold <= 32, uint32_t,
uint64_t
>>>;
int n_cols, n_rows = 0;
// char bits[32];
ValueType table[1 << PerfectHashingThreshold];
// PerfectHashTable(int n_cols, char* bits) {
// this->n_cols = n_cols;
// memcpy(this->bits, bits, 32);
// }
// template<typename ... Types, template <typename> class VT>
// PerfectHashTable(VT<Types> ... args) {
// }
template <typename ... Types, template <typename> class VT>
void construct(VT<Types>&... args) {
((this->n_cols = args.size), ...);
static_assert(
(sizeof...(Types) < PerfectHashingThreshold) &&
//(sizeof(Types) + ...) < PerfectHashingThreshold &&
(std::is_integral_v<Types> && ...),
"Types must be integral and less than 12 wide in total."
);
// this should be an attrib of VT.
key_t* // this better be automatically determined by Threshould
hash_values = static_cast<key_t*>(
calloc(this->n_cols, sizeof(key_t))
);
//new short[this->n_cols] {0}; // use calloc/delete
auto get_hash = [&hash_values](auto& arg, int idx) {
uint32_t i = 0;
if(idx > 0)
for (auto& a : arg) {
hash_values[i] =
(hash_values[i] << arg.stats.bits) +
(a - arg.stats.minima);
++i;
}
else
for (auto& a : arg) {
hash_values[i] = a - arg.stats.minima;
++i;
}
};
int idx = 0;
(get_hash(args, idx++), ...);
for (uint32_t i = 0; i < this->n_cols; ++i) {
this->table[hash_values[i]] = true;
// problem: random memory access
}
// delete[] hash_values;
free(hash_values);
}
};