From 613941ce064d8b5dd1025327afa7da3032dde9d4 Mon Sep 17 00:00:00 2001 From: Bill Date: Sun, 18 Sep 2022 20:55:43 +0800 Subject: [PATCH] bug fixes --- .gitignore | 1 + data/network.csv | 7 +++++++ engine/types.py | 9 +++++---- prompt.py | 2 +- reconstruct/ast.py | 8 ++++++-- reconstruct/expr.py | 20 ++++++++++++-------- server/aggregations.h | 8 +------- server/hasher.h | 39 +++++++++++++++++++++++++++++++++++---- tests/network.a | 2 +- tests/strings.a | 2 +- 10 files changed, 70 insertions(+), 28 deletions(-) create mode 100644 data/network.csv diff --git a/.gitignore b/.gitignore index 3b53a07..a2ad2b0 100644 --- a/.gitignore +++ b/.gitignore @@ -49,6 +49,7 @@ test*.c* !test2.csv !moving_avg.csv !nyctx100.csv +!network.csv *.out *.asm !mmw.so diff --git a/data/network.csv b/data/network.csv new file mode 100644 index 0000000..ac4d7b0 --- /dev/null +++ b/data/network.csv @@ -0,0 +1,7 @@ +src, dst, len, time +s1, s2, 250, 1 +s1, s2, 270, 20 +s1, s2, 235, 141 +s2, s1, 330, 47 +s2, s1, 280, 150 +s2, s1, 305, 155 diff --git a/engine/types.py b/engine/types.py index d65b1d4..477934d 100644 --- a/engine/types.py +++ b/engine/types.py @@ -88,9 +88,9 @@ class TypeCollection: type_table = dict() AnyT = Types(-1) LazyT = Types(240, name = 'Lazy', cname = '', sqlname = '', ctype_name = '') -LazyT = Types(200, name = 'DATE', cname = 'types::date_t', sqlname = 'DATE', ctype_name = 'types::ADATE') -LazyT = Types(201, name = 'TIME', cname = 'types::time_t', sqlname = 'TIME', ctype_name = 'types::ATIME') -LazyT = Types(202, name = 'TIMESTAMP', cname = 'types::timestamp_t', sqlname = 'TIMESTAMP', ctype_name = 'ATIMESTAMP') +DateT = Types(200, name = 'DATE', cname = 'types::date_t', sqlname = 'DATE', ctype_name = 'types::ADATE') +TimeT = Types(201, name = 'TIME', cname = 'types::time_t', sqlname = 'TIME', ctype_name = 'types::ATIME') +TimeStampT = Types(202, name = 'TIMESTAMP', cname = 'types::timestamp_t', sqlname = 'TIMESTAMP', ctype_name = 'ATIMESTAMP') DoubleT = Types(17, name = 'double', cname='double', sqlname = 'DOUBLE', is_fp = True) LDoubleT = Types(18, name = 'long double', cname='long double', sqlname = 'LDOUBLE', is_fp = True) FloatT = Types(16, name = 'float', cname = 'float', sqlname = 'REAL', @@ -137,7 +137,8 @@ def _ty_make_dict(fn : str, *ty : Types): int_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', LongT, ByteT, ShortT, IntT) uint_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', ULongT, UByteT, UShortT, UIntT) fp_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', FloatT, DoubleT) -builtin_types : Dict[str, Types] = {**_ty_make_dict('t.sqlname.lower()', AnyT, StrT), **int_types, **fp_types} +temporal_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', DateT, TimeT, TimeStampT) +builtin_types : Dict[str, Types] = {**_ty_make_dict('t.sqlname.lower()', AnyT, StrT), **int_types, **fp_types, **temporal_types} def get_int128_support(): for t in int_types.values(): diff --git a/prompt.py b/prompt.py index 9310735..aadbeb7 100644 --- a/prompt.py +++ b/prompt.py @@ -357,7 +357,7 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None): cxt = xengine.exec(state.stmts, cxt, keep) this_udf = cxt.finalize_udf() - if False and this_udf: + if this_udf: with open('udf.hpp', 'wb') as outfile: outfile.write(this_udf.encode('utf-8')) diff --git a/reconstruct/ast.py b/reconstruct/ast.py index df91959..7d5b0c8 100644 --- a/reconstruct/ast.py +++ b/reconstruct/ast.py @@ -233,12 +233,16 @@ class projection(ast_node): out_typenames[key] = decltypestring else: out_typenames[key] = val[0].cname - if (type(val[2].udf_called) is udf and + if (type(val[2].udf_called) is udf and # should bulkret also be colref? val[2].udf_called.return_pattern == udf.ReturnPattern.elemental_return or - self.group_node and self.group_node.use_sp_gb and + self.group_node and + (self.group_node.use_sp_gb and val[2].cols_mentioned.intersection( self.datasource.all_cols.difference(self.group_node.refs)) + ) and val[2].is_compound # compound val not in key + # or + # (not self.group_node and val[2].is_compound) ): out_typenames[key] = f'ColRef<{out_typenames[key]}>' diff --git a/reconstruct/expr.py b/reconstruct/expr.py index 885eef4..b636667 100644 --- a/reconstruct/expr.py +++ b/reconstruct/expr.py @@ -66,7 +66,6 @@ class expr(ast_node): def init(self, _): from reconstruct.ast import projection parent = self.parent - self.isvector = parent.isvector if type(parent) is expr else False self.is_compound = parent.is_compound if type(parent) is expr else False if type(parent) in [projection, expr]: self.datasource = parent.datasource @@ -75,13 +74,16 @@ class expr(ast_node): self.udf_map = parent.context.udf_map self.func_maps = {**builtin_func, **self.udf_map, **user_module_func} self.operators = {**builtin_operators, **self.udf_map, **user_module_func} - self.ext_aggfuncs = ['sum', 'avg', 'count', 'min', 'max'] + self.ext_aggfuncs = ['sum', 'avg', 'count', 'min', 'max', 'last'] def produce(self, node): from engine.utils import enlist from reconstruct.ast import udf if type(node) is dict: + if len(node) > 1: + print(f'Parser Error: {node} has more than 1 dict entry.') + for key, val in node.items(): if key in self.operators: if key in builtin_func: @@ -96,6 +98,11 @@ class expr(ast_node): exp_vals = [expr(self, v, c_code = self.c_code) for v in val] str_vals = [e.sql for e in exp_vals] type_vals = [e.type for e in exp_vals] + is_compound = any([e.is_compound for e in exp_vals]) + if key in self.ext_aggfuncs: + self.is_compound = False + else: + self.is_compound = is_compound try: self.type = op.return_type(*type_vals) except AttributeError as e: @@ -107,7 +114,7 @@ class expr(ast_node): self.sql = op(self.c_code, *str_vals) special_func = [*self.context.udf_map.keys(), *self.context.module_map.keys(), - "maxs", "mins", "avgs", "sums", "deltas", "last"] + "maxs", "mins", "avgs", "sums", "deltas"] if self.context.special_gb: special_func = [*special_func, *self.ext_aggfuncs] @@ -203,10 +210,6 @@ class expr(ast_node): # get the column from the datasource in SQL context else: - p = self.parent - while type(p) is expr and not p.isvector: - p.isvector = True - p = p.parent if self.datasource is not None: self.raw_col = self.datasource.parse_col_names(node) self.raw_col = self.raw_col if type(self.raw_col) is ColRef else None @@ -214,6 +217,7 @@ class expr(ast_node): self.is_ColExpr = True self.sql = self.raw_col.name self.type = self.raw_col.type + self.is_compound = True else: self.sql = node self.type = StrT @@ -234,7 +238,7 @@ class expr(ast_node): self.type = IntT elif type(node) is float: self.type = DoubleT - + def finalize(self, override = False): from reconstruct.ast import udf if self.codebuf is None or override: diff --git a/server/aggregations.h b/server/aggregations.h index e3442d0..0e41fc9 100644 --- a/server/aggregations.h +++ b/server/aggregations.h @@ -172,13 +172,7 @@ decayed_t deltas(const VT& arr) { template class VT> T last(const VT& arr) { const uint32_t& len = arr.size; - decayed_t ret(len); - uint32_t i = 0; - if (len) - ret[i++] = arr[0]; - for (; i < len; ++i) - ret[i] = arr[i-1]; - return ret; + return arr[arr.size - 1]; } // wrong behavior with count(0) diff --git a/server/hasher.h b/server/hasher.h index 8e3f510..2de5555 100644 --- a/server/hasher.h +++ b/server/hasher.h @@ -17,6 +17,39 @@ inline size_t append_bytes(const unsigned char* _First) noexcept { return _Val; } +namespace std{ + template<> + struct hash { + size_t operator()(const astring_view& _Keyval) const noexcept { + return append_bytes(_Keyval.str); + } + }; + template<> + struct hash { + size_t operator() (const types::date_t& _Keyval) const noexcept { + return std::hash()(*(unsigned int*)(&_Keyval)); + } + }; + template<> + struct hash { + size_t operator() (const types::time_t& _Keyval) const noexcept { + return std::hash()(_Keyval.ms) ^ + std::hash()(_Keyval.seconds) ^ + std::hash()(_Keyval.minutes) ^ + std::hash()(_Keyval.hours) + ; + } + }; + template<> + struct hash{ + size_t operator() (const types::timestamp_t& _Keyval) const noexcept { + return std::hash()(_Keyval.date) ^ + std::hash()(_Keyval.time); + } + }; + +} + inline size_t append_bytes(const astring_view& view) noexcept { return append_bytes(view.str); } @@ -32,10 +65,8 @@ struct hasher { template typename std::enable_if< i < sizeof ...(Types), size_t>::type hashi(const std::tuple& record) const { using current_type = typename std::decay>::type>::type; - if constexpr (is_cstr()) - return append_bytes((const unsigned char*)std::get(record)) ^ hashi(record); - else - return std::hash()(std::get(record)) ^ hashi(record); + + return std::hash()(std::get(record)) ^ hashi(record); } size_t operator()(const std::tuple& record) const { return hashi(record); diff --git a/tests/network.a b/tests/network.a index a6238ad..78ecc48 100644 --- a/tests/network.a +++ b/tests/network.a @@ -5,7 +5,7 @@ LOAD DATA INFILE "data/network.csv" INTO TABLE network FIELDS TERMINATED BY "," -SELECT src, dst, avg(len) +SELECT src, dst, avg(len) FROM network ASSUMING ASC src, ASC dst, ASC _time GROUP BY src, dst, sums (deltas(_time) > 120) diff --git a/tests/strings.a b/tests/strings.a index e38630f..e0fd643 100644 --- a/tests/strings.a +++ b/tests/strings.a @@ -6,4 +6,4 @@ FIELDS TERMINATED BY "," select names, val * 10000 + id from types_test -create table date_time(id int, _date date, _time time, _timestamp timestamp); + create table date_time(id int, _date date, _time time, _timestamp timestamp);