From a9b0c185e1c87bff1f24eecd8323876b4374283b Mon Sep 17 00:00:00 2001 From: Bill Date: Sat, 15 Oct 2022 04:12:17 +0800 Subject: [PATCH] pack(*), truncate, bug fixes --- engine/types.py | 7 ++++--- prompt.py | 2 +- reconstruct/ast.py | 25 +++++++++++++++++++++---- reconstruct/expr.py | 19 +++++++++++++++++-- server/aggregations.h | 22 ++++++++++++++++++++++ server/io.cpp | 11 ++++++++++- server/table.h | 2 ++ server/types.h | 21 +++++++++++++++++---- server/vector_type.hpp | 4 ++-- 9 files changed, 96 insertions(+), 17 deletions(-) diff --git a/engine/types.py b/engine/types.py index 106494a..c719e06 100644 --- a/engine/types.py +++ b/engine/types.py @@ -124,7 +124,7 @@ class VectorT(Types): return 'BIGINT' @property def cname(self) -> str: - return self.name + return f'{self.vector_type}<{self.inner_type.cname}>' @property def fp_type(self) -> Types: return VectorT(self.inner_type.fp_type, self.vector_type) @@ -287,7 +287,7 @@ def pack_behavior(op: OperatorBase, c_code, *x): if not c_code: return f'{op.sqlname}({", ".join([f"{xx}" for xx in x])})' else: - return f'decltype({x[0]})::pack(len(x) + 1, {", ".join([f"{xx}.s()" for xx in x])})' + return f'decltype({x[0]})::pack({len(x)}, {", ".join([f"{xx}.s()" for xx in x])})' # arithmetic opadd = OperatorBase('add', 2, auto_extension, cname = '+', sqlname = '+', call = binary_op_behavior) @@ -337,6 +337,7 @@ spnull = OperatorBase('missing', 1, logical, cname = "", sqlname = "", call = is # cstdlib # If in aggregation functions, using monetdb builtins. If in nested agg, inside udfs, using cstdlib. +fntrunc = OperatorBase('truncate', 2, ty_clamp(as_is, 0, 1), cname = 'truncate', sqlname = 'TRUNCATE', call = fn_behavior) fnsqrt = OperatorBase('sqrt', 1, lambda *_ : DoubleT, cname = 'sqrt', sqlname = 'SQRT', call = fn_behavior) fnlog = OperatorBase('log', 2, lambda *_ : DoubleT, cname = 'log', sqlname = 'LOG', call = fn_behavior) fnsin = OperatorBase('sin', 1, lambda *_ : DoubleT, cname = 'sin', sqlname = 'SIN', call = fn_behavior) @@ -357,7 +358,7 @@ builtin_cstdlib = _op_make_dict(fnsqrt, fnlog, fnsin, fncos, fntan, fnpow) builtin_func = _op_make_dict(fnmax, fnmin, fnsum, fnavg, fnmaxs, fnmins, fndeltas, fnratios, fnlast, fnfirst, fnsums, fnavgs, fncnt, - fnpack) + fnpack, fntrunc) user_module_func = {} builtin_operators : Dict[str, OperatorBase] = {**builtin_binary_arith, **builtin_binary_logical, **builtin_unary_arith, **builtin_unary_logical, **builtin_unary_special, **builtin_func, **builtin_cstdlib, diff --git a/prompt.py b/prompt.py index a495eca..58eea0a 100644 --- a/prompt.py +++ b/prompt.py @@ -590,7 +590,7 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None): state.stmts = parser.parse(og_q.strip()) cxt.Info(state.stmts) state.currstats.parse_time = state.currstats.stop() - except (ParseException, KeyError) as e: + except (ParseException) as e: print(e) continue except (ValueError, FileNotFoundError) as e: diff --git a/reconstruct/ast.py b/reconstruct/ast.py index 31ae861..72d91bd 100644 --- a/reconstruct/ast.py +++ b/reconstruct/ast.py @@ -274,10 +274,17 @@ class projection(ast_node): # Create table into context out_typenames = [None] * len(proj_map) + def get_proj_name(proj_name): + if '*' in proj_name: + lst_names = self.datasource.get_cols(proj_name) + return ', '.join([self.pyname2cname[n.name] for n in lst_names]) + else: + return self.pyname2cname[proj_name] + for key, val in proj_map.items(): if type(val[1]) is str: x = True - y = lambda t: self.pyname2cname[t] + y = get_proj_name count = lambda : '0' if vid2cname: count = lambda : f'{vid2cname[0]}.size' @@ -286,7 +293,7 @@ class projection(ast_node): val[1] = val[1](False) if val[0] == LazyT: - decltypestring = val[2].eval(x,y,gettype=True)(True) + decltypestring = val[2].eval(x,y,gettype=True,c_code=True)(True) decltypestring = f'value_type>' out_typenames[key] = decltypestring else: @@ -740,8 +747,18 @@ class join(ast_node): print(f'Error: table {node} not found.') def get_cols(self, colExpr: str) -> Optional[ColRef]: - if colExpr == '*': - return self.all_cols(ordered = True, stripped = True) + if '*' in colExpr: + if colExpr == '*': + return self.all_cols(ordered = True, stripped = True) + elif colExpr.endswith('.*'): + tbl = colExpr.split('.') + if len(tbl) > 2: + raise KeyError(f'Invalid expression: {colExpr}') + if tbl[0] in self.tables_dir: + tbl : TableInfo= self.tables_dir[tbl[0]] + return tbl.all_cols(ordered = True) + else: + raise KeyError(f'Invalid table name: {colExpr}') for t in self.tables: if colExpr in t.columns_byname: col = t.columns_byname[colExpr] diff --git a/reconstruct/expr.py b/reconstruct/expr.py index 4f1e9e3..403ec42 100644 --- a/reconstruct/expr.py +++ b/reconstruct/expr.py @@ -124,8 +124,22 @@ class expr(ast_node): if key == 'count' and type(val) is dict and 'distinct' in val: count_distinct = True val = val['distinct'] + val = enlist(val) - exp_vals = [expr(self, v, c_code = self.c_code) for v in val] + exp_vals = [] + for v in val: + if ( + type(v) is str and + '*' in v and + key != 'count' + ): + cols = self.datasource.get_cols(v) + if cols: + for c in cols: + exp_vals.append(expr(self, c.name, c_code=self.c_code)) + else: + exp_vals.append(expr(self, v, c_code=self.c_code)) + self.children = exp_vals self.opname = key @@ -151,7 +165,8 @@ class expr(ast_node): self.sql = op(self.c_code, *str_vals) special_func = [*self.context.udf_map.keys(), *self.context.module_map.keys(), - "maxs", "mins", "avgs", "sums", "deltas", "last", "first", "ratios"] + "maxs", "mins", "avgs", "sums", "deltas", "last", "first", + "ratios", "pack", "truncate"] if self.context.special_gb: special_func = [*special_func, *self.ext_aggfuncs] diff --git a/server/aggregations.h b/server/aggregations.h index 77b5cf5..b3aecb7 100644 --- a/server/aggregations.h +++ b/server/aggregations.h @@ -37,6 +37,27 @@ VT sqrt(const VT& v) { } return ret; } +template +T truncate(const T& v, const uint32_t precision) { + auto multiplier = pow(10, precision); + if (v >= std::numeric_limits::max()/multiplier || + aq_fp_precision <= precision) + return v; + else + return round(v * multiplier)/multiplier; +} +template class VT> +VT truncate(const VT& v, const uint32_t precision) { + if (aq_fp_precision <= precision) + return v.subvec_memcpy(); + auto multiplier = pow(10, precision); + auto max_truncate = std::numeric_limits::max()/multiplier; + VT ret{ v.size }; + for (uint32_t i = 0; i < v.size; ++i) { // round or trunc?? + ret[i] = v[i] < max_truncate ? round(v[i] * multiplier)/multiplier : v[i]; + } + return ret; +} template class VT> T max(const VT& v) { @@ -207,6 +228,7 @@ T first(const VT& arr) { return arr[0]; } + #define __DEFAULT_AGGREGATE_FUNCTION__(NAME, RET) \ template constexpr inline T NAME(const T& v) { return RET; } diff --git a/server/io.cpp b/server/io.cpp index 694c7ce..878c0b6 100644 --- a/server/io.cpp +++ b/server/io.cpp @@ -241,7 +241,16 @@ std::ostream& operator<<(std::ostream& os, types::timestamp_t & v) print_datetime(v); return os; } - +std::ostream& operator<<(std::ostream& os, int8_t & v) +{ + os<(v); + return os; +} +std::ostream& operator<<(std::ostream& os, uint8_t & v) +{ + os<(v); + return os; +} std::string base62uuid(int l) { using namespace std; diff --git a/server/table.h b/server/table.h index 107e2ae..56c7a4b 100644 --- a/server/table.h +++ b/server/table.h @@ -46,6 +46,8 @@ std::ostream& operator<<(std::ostream& os, __int128& v); std::ostream& operator<<(std::ostream& os, __uint128_t& v); #endif +std::ostream& operator<<(std::ostream& os, int8_t& v); +std::ostream& operator<<(std::ostream& os, uint8_t& v); std::ostream& operator<<(std::ostream& os, types::date_t& v); std::ostream& operator<<(std::ostream& os, types::time_t& v); std::ostream& operator<<(std::ostream& os, types::timestamp_t& v); diff --git a/server/types.h b/server/types.h index 95dc36c..3ae14b6 100644 --- a/server/types.h +++ b/server/types.h @@ -53,12 +53,12 @@ constexpr bool aqis_same = aqis_same_impl::value; namespace types { enum Type_t { AINT32, AFLOAT, ASTR, ADOUBLE, ALDOUBLE, AINT64, AINT128, AINT16, ADATE, ATIME, AINT8, - AUINT32, AUINT64, AUINT128, AUINT16, AUINT8, ABOOL, VECTOR, ATIMESTAMP, NONE, ERROR + AUINT32, AUINT64, AUINT128, AUINT16, AUINT8, ABOOL, VECTOR, ATIMESTAMP, ACHAR, NONE, ERROR }; - static constexpr const char* printf_str[] = { "%d", "%f", "%s", "%lf", "%Lf", "%ld", "%d", "%hi", "%s", "%s", "%c", - "%u", "%lu", "%s", "%hu", "%hhu", "%s", "%s", "Vector<%s>", "%s", "NULL", "ERROR" }; + static constexpr const char* printf_str[] = { "%d", "%f", "%s", "%lf", "%Lf", "%ld", "%d", "%hi", "%s", "%s", "%hhd", + "%u", "%lu", "%s", "%hu", "%hhu", "%s", "%s", "Vector<%s>", "%s", "%c", "NULL", "ERROR" }; static constexpr const char* SQL_Type[] = { "INT", "REAL", "TEXT", "DOUBLE", "DOUBLE", "BIGINT", "HUGEINT", "SMALLINT", "DATE", "TIME", "TINYINT", - "INT", "BIGINT", "HUGEINT", "SMALLINT", "TINYINT", "BOOL", "HUGEINT", "TIMESTAMP", "NULL", "ERROR"}; + "INT", "BIGINT", "HUGEINT", "SMALLINT", "TINYINT", "BOOL", "HUGEINT", "TIMESTAMP", "CHAR", "NULL", "ERROR"}; // TODO: deal with data/time <=> str/uint conversion @@ -434,6 +434,19 @@ struct nullval_impl { constexpr static float value = -std::numeric_limits template<> struct nullval_impl { constexpr static double value = -std::numeric_limits::quiet_NaN(); }; +template +constexpr uint32_t my_rlog10_approx(T v){ + uint32_t r = 0; + while (v + std::numeric_limits::epsilon() < 1){ + v *= 10; + r++; + } + return r; +} +template +inline constexpr uint32_t aq_fp_precision = std::is_floating_point_v ? + my_rlog10_approx(std::numeric_limits::epsilon()) : 0; + constexpr size_t sum_type(size_t a[], size_t sz) { size_t ret = 0; for (int i = 0; i < sz; ++i) diff --git a/server/vector_type.hpp b/server/vector_type.hpp index 7705e52..9b03e89 100644 --- a/server/vector_type.hpp +++ b/server/vector_type.hpp @@ -290,8 +290,8 @@ public: return subvec; } inline vector_type<_Ty> subvec(uint32_t start = 0) { return subvec(start, size); } - inline vector_type<_Ty> subvec_memcpy(uint32_t start = 0) { return subvec_memcpy(start, size); } - inline vector_type<_Ty> subvec_deep(uint32_t start = 0) { return subvec_deep(start, size); } + inline vector_type<_Ty> subvec_memcpy(uint32_t start = 0) const { return subvec_memcpy(start, size); } + inline vector_type<_Ty> subvec_deep(uint32_t start = 0) const { return subvec_deep(start, size); } vector_type<_Ty> getRef() { return vector_type<_Ty>(container, size); } ~vector_type() { if (capacity > 0) free(container);