initial distinct support

dev
Bill 2 years ago
parent ff63be720c
commit ed58c13dc9

@ -248,7 +248,7 @@ def count_behavior(op:OperatorBase, c_code, x, distinct = False):
if not c_code:
return f'{op.sqlname}({"distinct " if distinct else ""}{x})'
elif distinct:
return 'count_'+base62uuid()
return '({x}).distinct_size()'
else:
return '{count()}'

@ -94,6 +94,7 @@ class expr(ast_node):
print(f'Parser Error: {node} has more than 1 dict entry.')
for key, val in node.items():
key = key.lower()
if key in self.operators:
if key in builtin_func:
if self.is_agg_func:
@ -121,9 +122,9 @@ class expr(ast_node):
try:
self.type = op.return_type(*type_vals)
except AttributeError as e:
if type(self.root) is not udf:
if type(self.root.parent) is not udf:
# TODO: do something when this is not an error
# print(f'alert: {e}')
print(f'alert: {e}')
pass
self.type = AnyT
@ -198,7 +199,11 @@ class expr(ast_node):
self.udf_decltypecall = ex_vname.sql
else:
print(f'Undefined expr: {key}{val}')
if 'distinct' in val and key != count:
if self.c_code:
self.sql = 'distinct ' + self.sql
elif self.is_compound:
self.sql = '(' + self.sql + ').distinct()'
if type(node) is str:
if self.is_udfexpr:
curr_udf : udf = self.root.udf
@ -395,4 +400,4 @@ class getrefs(expr):
def consume(self, _):
if self.root == self:
self.rec = self.datasource.rec
self.datasource.rec = None
self.datasource.rec = None

@ -139,7 +139,9 @@ public:
const ColRef<_Ty>& orig;
constexpr Iterator_t(const uint32_t* val, const ColRef<_Ty>& orig) noexcept : val(val), orig(orig) {}
_Ty& operator*() { return orig[*val]; }
bool operator != (const Iterator_t& rhs) { return rhs.val != val; }
bool operator != (const Iterator_t& rhs) const { return rhs.val != val; }
bool operator == (const Iterator_t& rhs) const { return rhs.val == val; }
size_t operator - (const Iterator_t& rhs) const { return val - rhs.val; }
Iterator_t& operator++ () {
++val;
return *this;
@ -180,6 +182,20 @@ public:
subvec[i] = operator[](i);
return subvec;
}
std::unordered_set<_Ty> distinct_common() {
return std::unordered_set<_Ty> {begin(), end()};
}
uint32_t distinct_size(){
return distinct_common().size();
}
ColRef<_Ty> distinct(){
auto set = distinct_common();
ColRef<_Ty> ret(set.size());
uint32_t i = 0;
for (auto& val : set)
ret.container[i++] = val;
return ret;
}
inline ColRef<_Ty> subvec(uint32_t start = 0) { return subvec_deep(start, size); }
};
template <template <class...> class VT, class T>
@ -408,10 +424,43 @@ struct TableInfo {
applyIntegerSequence<sizeof...(Types), applier>::apply(*this, sep, end, view, fp);
}
TableInfo< Types... >* rename(const char* name) {
TableInfo<Types...>* rename(const char* name) {
this->name = name;
return this;
}
template <size_t ...Is>
void inline
reserve(std::index_sequence<Is...>, uint32_t size) {
const auto& assign_sz = [&size](auto& col){ col.size = size;};
(assign_sz(get_col<Is>(*this)), ...);
}
template <size_t ...Is>
decltype(auto) inline
get_record(std::index_sequence<Is...>, uint32_t i) {
return std::forward_as_tuple((get_col<Is>(*this)[i], ...));
}
template <size_t ...Is>
void inline
set_record(std::index_sequence<Is...>, tuple_type t) {
const auto& assign_field =
[](auto& l, const auto& r){
l = r;
};
(assign_field(get_col<Is>(*this)[Is], std::get<Is>(t)), ...);
}
TableInfo<Types ...>* distinct() {
std::unordered_set<tuple_type> d_records;
std::make_index_sequence<sizeof...(Types)> seq;
for (uint32_t j = 0; j < colrefs[0].size; ++j) {
d_records.insert(get_record(seq, j));
}
reserve(seq, d_records.size());
for (const auto& dr : d_records) {
set_record(seq, dr);
}
return this;
}
// defined in monetdb_conn.cpp
void monetdb_append_table(void* srv, const char* alt_name = nullptr);
};
@ -419,6 +468,7 @@ struct TableInfo {
template<class ...Types>
struct TableView {
typedef std::tuple<Types...> tuple_type;
const vector_type<uint32_t>* idxs;
const TableInfo<Types...>& info;
constexpr TableView(const vector_type<uint32_t>* idxs, const TableInfo<Types...>& info) noexcept : idxs(idxs), info(info) {}

@ -11,6 +11,8 @@
#include <cstdint>
#include <iterator>
#include <initializer_list>
#include <unordered_set>
#include "hasher.h"
#include "types.h"
#pragma pack(push, 1)
@ -107,6 +109,34 @@ public:
return *this;
}
inline std::unordered_set<value_t> distinct_common(){
return std::unordered_set<value_t>(container, container + size);
}
vector_type<_Ty>& distinct_inplace(){
uint32_t i = 0;
for(const auto& v : distinct_common()){
container[i++] = v;
}
return *this;
}
vector_type<_Ty> distinct_copy(){
auto d_vals = distinct_common();
vector_type<_Ty> ret(d_vals.size());
uint32_t i = 0;
for(const auto& v : d_vals){
ret[i++] = v;
}
return ret;
}
uint32_t distinct_size(){
return distinct_common().size();
}
vector_type<_Ty> distinct(){
if (capacity)
return distinct_inplace();
else
return distinct_copy();
}
inline void grow() {
if (size >= capacity) { // geometric growth
uint32_t new_capacity = size + 1 + (size >> 1);

Loading…
Cancel
Save