From ed58c13dc93948e6a003aca8088c2d27bfd16fe1 Mon Sep 17 00:00:00 2001 From: Bill Date: Tue, 20 Sep 2022 22:17:56 +0800 Subject: [PATCH] initial distinct support --- engine/types.py | 2 +- reconstruct/expr.py | 13 ++++++---- server/table.h | 54 ++++++++++++++++++++++++++++++++++++++++-- server/vector_type.hpp | 30 +++++++++++++++++++++++ 4 files changed, 92 insertions(+), 7 deletions(-) diff --git a/engine/types.py b/engine/types.py index d99fc56..de80c7d 100644 --- a/engine/types.py +++ b/engine/types.py @@ -248,7 +248,7 @@ def count_behavior(op:OperatorBase, c_code, x, distinct = False): if not c_code: return f'{op.sqlname}({"distinct " if distinct else ""}{x})' elif distinct: - return 'count_'+base62uuid() + return '({x}).distinct_size()' else: return '{count()}' diff --git a/reconstruct/expr.py b/reconstruct/expr.py index 91ed2f9..ce5ea4f 100644 --- a/reconstruct/expr.py +++ b/reconstruct/expr.py @@ -94,6 +94,7 @@ class expr(ast_node): print(f'Parser Error: {node} has more than 1 dict entry.') for key, val in node.items(): + key = key.lower() if key in self.operators: if key in builtin_func: if self.is_agg_func: @@ -121,9 +122,9 @@ class expr(ast_node): try: self.type = op.return_type(*type_vals) except AttributeError as e: - if type(self.root) is not udf: + if type(self.root.parent) is not udf: # TODO: do something when this is not an error - # print(f'alert: {e}') + print(f'alert: {e}') pass self.type = AnyT @@ -198,7 +199,11 @@ class expr(ast_node): self.udf_decltypecall = ex_vname.sql else: print(f'Undefined expr: {key}{val}') - + if 'distinct' in val and key != count: + if self.c_code: + self.sql = 'distinct ' + self.sql + elif self.is_compound: + self.sql = '(' + self.sql + ').distinct()' if type(node) is str: if self.is_udfexpr: curr_udf : udf = self.root.udf @@ -395,4 +400,4 @@ class getrefs(expr): def consume(self, _): if self.root == self: self.rec = self.datasource.rec - self.datasource.rec = None \ No newline at end of file + self.datasource.rec = None diff --git a/server/table.h b/server/table.h index 36b653a..f96c0e6 100644 --- a/server/table.h +++ b/server/table.h @@ -139,7 +139,9 @@ public: const ColRef<_Ty>& orig; constexpr Iterator_t(const uint32_t* val, const ColRef<_Ty>& orig) noexcept : val(val), orig(orig) {} _Ty& operator*() { return orig[*val]; } - bool operator != (const Iterator_t& rhs) { return rhs.val != val; } + bool operator != (const Iterator_t& rhs) const { return rhs.val != val; } + bool operator == (const Iterator_t& rhs) const { return rhs.val == val; } + size_t operator - (const Iterator_t& rhs) const { return val - rhs.val; } Iterator_t& operator++ () { ++val; return *this; @@ -180,6 +182,20 @@ public: subvec[i] = operator[](i); return subvec; } + std::unordered_set<_Ty> distinct_common() { + return std::unordered_set<_Ty> {begin(), end()}; + } + uint32_t distinct_size(){ + return distinct_common().size(); + } + ColRef<_Ty> distinct(){ + auto set = distinct_common(); + ColRef<_Ty> ret(set.size()); + uint32_t i = 0; + for (auto& val : set) + ret.container[i++] = val; + return ret; + } inline ColRef<_Ty> subvec(uint32_t start = 0) { return subvec_deep(start, size); } }; template