From fb870c9977f852e88787f92ca8cd769b93963fd9 Mon Sep 17 00:00:00 2001 From: Bill Date: Fri, 23 Sep 2022 16:08:24 +0800 Subject: [PATCH] fixed wildcard compound cols, ratios, etc. --- build.py | 2 +- engine/types.py | 17 +++++++++---- engine/utils.py | 4 ++++ prompt.py | 4 ++-- reconstruct/ast.py | 46 +++++++++++++++++++++++++----------- reconstruct/expr.py | 9 +++---- reconstruct/new_expr.py | 4 ++-- sdk/aquery.h | 5 ++++ server/aggregations.h | 34 ++++++++++++++++++++++++++ server/table.h | 1 + server/table_ext_monetdb.hpp | 4 ++-- server/types.h | 29 +++++++++++++++++++++-- tests/best_profit.a | 3 +-- 13 files changed, 129 insertions(+), 33 deletions(-) diff --git a/build.py b/build.py index 3312a0b..aa10a7a 100644 --- a/build.py +++ b/build.py @@ -69,7 +69,7 @@ class checksums: class build_manager: sourcefiles = [ - 'build.py', + 'build.py', 'Makefile', 'server/server.cpp', 'server/io.cpp', 'server/monetdb_conn.cpp', 'server/threading.cpp', 'server/winhelper.cpp' diff --git a/engine/types.py b/engine/types.py index 3083795..6e49eea 100644 --- a/engine/types.py +++ b/engine/types.py @@ -106,7 +106,9 @@ ULongT = Types(8, name = 'uint64', sqlname = 'UINT64', fp_type=DoubleT) UIntT = Types(7, name = 'uint32', sqlname = 'UINT32', long_type=ULongT, fp_type=FloatT) UShortT = Types(6, name = 'uint16', sqlname = 'UINT16', long_type=ULongT, fp_type=FloatT) UByteT = Types(5, name = 'uint8', sqlname = 'UINT8', long_type=ULongT, fp_type=FloatT) -StrT = Types(200, name = 'str', cname = 'const char*', sqlname='VARCHAR', ctype_name = 'types::ASTR') +StrT = Types(200, name = 'str', cname = 'const char*', sqlname='TEXT', ctype_name = 'types::ASTR') +TextT = Types(200, name = 'text', cname = 'const char*', sqlname='TEXT', ctype_name = 'types::ASTR') +VarcharT = Types(200, name = 'varchar', cname = 'const char*', sqlname='VARCHAR', ctype_name = 'types::ASTR') VoidT = Types(200, name = 'void', cname = 'void', sqlname='Null', ctype_name = 'types::None') class VectorT(Types): @@ -138,7 +140,10 @@ int_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', LongT, ByteT, uint_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', ULongT, UByteT, UShortT, UIntT) fp_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', FloatT, DoubleT) temporal_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', DateT, TimeT, TimeStampT) -builtin_types : Dict[str, Types] = {**_ty_make_dict('t.sqlname.lower()', AnyT, StrT), **int_types, **fp_types, **temporal_types} +builtin_types : Dict[str, Types] = { + 'string' : StrT, + **_ty_make_dict('t.sqlname.lower()', AnyT, TextT, VarcharT), + **int_types, **fp_types, **temporal_types} def get_int128_support(): for t in int_types.values(): @@ -267,7 +272,9 @@ def windowed_fn_behavor(op: OperatorBase, c_code, *x): # arithmetic opadd = OperatorBase('add', 2, auto_extension, cname = '+', sqlname = '+', call = binary_op_behavior) -opdiv = OperatorBase('div', 2, fp(auto_extension), cname = '/', sqlname = '/', call = binary_op_behavior) +# monetdb wont extend int division to fp type +# opdiv = OperatorBase('div', 2, fp(auto_extension), cname = '/', sqlname = '/', call = binary_op_behavior) +opdiv = OperatorBase('div', 2, auto_extension, cname = '/', sqlname = '/', call = binary_op_behavior) opmul = OperatorBase('mul', 2, fp(auto_extension), cname = '*', sqlname = '*', call = binary_op_behavior) opsub = OperatorBase('sub', 2, auto_extension, cname = '-', sqlname = '-', call = binary_op_behavior) opmod = OperatorBase('mod', 2, auto_extension_int, cname = '%', sqlname = '%', call = binary_op_behavior) @@ -288,7 +295,9 @@ opdistinct = OperatorBase('distinct', 1, as_is, cname = '.distinct()', sqlname = fnmax = OperatorBase('max', 1, as_is, cname = 'max', sqlname = 'MAX', call = fn_behavior) fnmin = OperatorBase('min', 1, as_is, cname = 'min', sqlname = 'MIN', call = fn_behavior) fndeltas = OperatorBase('deltas', 1, as_is, cname = 'deltas', sqlname = 'DELTAS', call = fn_behavior) +fnratios = OperatorBase('ratios', [1, 2], fp(ty_clamp(as_is, -1)), cname = 'ratios', sqlname = 'RATIOS', call = windowed_fn_behavor) fnlast = OperatorBase('last', 1, as_is, cname = 'last', sqlname = 'LAST', call = fn_behavior) +fnfirst = OperatorBase('first', 1, as_is, cname = 'frist', sqlname = 'FRIST', call = fn_behavior) #fnsum = OperatorBase('sum', 1, ext(auto_extension), cname = 'sum', sqlname = 'SUM', call = fn_behavior) #fnavg = OperatorBase('avg', 1, fp(ext(auto_extension)), cname = 'avg', sqlname = 'AVG', call = fn_behavior) fnsum = OperatorBase('sum', 1, long_return, cname = 'sum', sqlname = 'SUM', call = fn_behavior) @@ -324,7 +333,7 @@ builtin_unary_logical = _op_make_dict(opnot) builtin_unary_arith = _op_make_dict(opneg) builtin_unary_special = _op_make_dict(spnull, opdistinct) builtin_cstdlib = _op_make_dict(fnsqrt, fnlog, fnsin, fncos, fntan, fnpow) -builtin_func = _op_make_dict(fnmax, fnmin, fnsum, fnavg, fnmaxs, fnmins, fndeltas, fnlast, fnsums, fnavgs, fncnt) +builtin_func = _op_make_dict(fnmax, fnmin, fnsum, fnavg, fnmaxs, fnmins, fndeltas, fnratios, fnlast, fnfirst, fnsums, fnavgs, fncnt) user_module_func = {} builtin_operators : Dict[str, OperatorBase] = {**builtin_binary_arith, **builtin_binary_logical, **builtin_unary_arith, **builtin_unary_logical, **builtin_unary_special, **builtin_func, **builtin_cstdlib, diff --git a/engine/utils.py b/engine/utils.py index 1a8b403..995d354 100644 --- a/engine/utils.py +++ b/engine/utils.py @@ -137,3 +137,7 @@ def add_dll_dir(dll: str): os.environ['PATH'] = os.path.abspath(dll) + os.pathsep + os.environ['PATH'] nullstream = open(os.devnull, 'w') + + +def clamp(val, minval, maxval): + return min(max(val, minval), maxval) \ No newline at end of file diff --git a/prompt.py b/prompt.py index 0f59d33..91fbbe2 100644 --- a/prompt.py +++ b/prompt.py @@ -29,7 +29,7 @@ prompt_help = '''\ ******** AQuery Prompt Help ********* help: - print out this message + print this help message help commandline: print help message for AQuery Commandline : @@ -420,7 +420,7 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None): continue elif q == 'format' or q == 'fmt': subprocess.call(['clang-format', 'out.cpp']) - elif q == 'exit': + elif q == 'exit' or q == 'exit()': rm(state) exit() elif q == 'r': # build and run diff --git a/reconstruct/ast.py b/reconstruct/ast.py index d6293c4..6ed4e63 100644 --- a/reconstruct/ast.py +++ b/reconstruct/ast.py @@ -133,7 +133,7 @@ class projection(ast_node): sql_expr = expr(self, e, c_code=False) this_type = proj_expr.type name = proj_expr.sql - compound = True # compound column + compound = [proj_expr.is_compound > 1] # compound column proj_expr.cols_mentioned = self.datasource.rec alias = '' if 'name' in proj: # renaming column by AS keyword @@ -142,23 +142,28 @@ class projection(ast_node): if not proj_expr.is_special: if proj_expr.node == '*': name = [c.get_full_name() for c in self.datasource.rec] + this_type = [c.type for c in self.datasource.rec] + compound = [c.compound for c in self.datasource.rec] + proj_expr = [expr(self, c.name) for c in self.datasource.rec] else: y = lambda x:x count = lambda : 'count(*)' name = enlist(sql_expr.eval(False, y, count=count)) - for n in name: + this_type = enlist(this_type) + proj_expr = enlist(proj_expr) + for t, n, pexpr in zip(this_type, name, proj_expr): offset = len(col_exprs) if n not in self.var_table: self.var_table[n] = offset - if proj_expr.is_ColExpr and type(proj_expr.raw_col) is ColRef: - for _alias in (proj_expr.raw_col.table.alias): + if pexpr.is_ColExpr and type(pexpr.raw_col) is ColRef: + for _alias in (pexpr.raw_col.table.alias): self.var_table[f'{_alias}.'+n] = offset - proj_map[i] = [this_type, offset, proj_expr] + proj_map[i] = [t, offset, pexpr] col_expr = n + ' AS ' + alias if alias else n if alias: self.var_table[alias] = offset - col_exprs.append((col_expr, proj_expr.type)) + col_exprs.append((col_expr, t)) i += 1 else: self.context.headers.add('"./server/aggregations.h"') @@ -169,7 +174,8 @@ class projection(ast_node): i += 1 name = enlist(name) disp_name = [get_legal_name(alias if alias else n) for n in name] - + this_type = enlist(this_type) + elif type(proj) is str: col = self.datasource.get_col(proj) this_type = col.type @@ -178,8 +184,8 @@ class projection(ast_node): # name = col.name self.datasource.rec = None # TODO: Type deduction in Python - for n in disp_name: - cols.append(ColRef(this_type, self.out_table, None, n, len(cols), compound=compound)) + for t, n, c in zip(this_type, disp_name, compound): + cols.append(ColRef(t, self.out_table, None, n, len(cols), compound=c)) self.out_table.add_cols(cols, new = False) @@ -213,7 +219,7 @@ class projection(ast_node): self.add(self.group_node.sql) if self.col_ext or self.group_node and self.group_node.use_sp_gb: - self.use_postproc = True + self.has_postproc = True o = self.assumptions if 'orderby' in node: @@ -223,7 +229,7 @@ class projection(ast_node): if 'outfile' in node: self.outfile = outfile(self, node['outfile'], sql = self.sql) - if not self.use_postproc: + if not self.has_postproc: self.sql += self.outfile.sql else: self.outfile = None @@ -279,11 +285,12 @@ class projection(ast_node): val[2].cols_mentioned.intersection( self.datasource.all_cols().difference(self.group_node.refs)) ) and val[2].is_compound # compound val not in key - # or + or + val[2].is_compound > 1 # (not self.group_node and val[2].is_compound) ): out_typenames[key] = f'ColRef<{out_typenames[key]}>' - + self.out_table.columns[key].compound = True outtable_col_nameslist = ', '.join([f'"{c.name}"' for c in self.out_table.columns]) self.outtable_col_names = 'names_' + base62uuid(4) self.context.emitc(f'const char* {self.outtable_col_names}[] = {{{outtable_col_nameslist}}};') @@ -729,6 +736,17 @@ class create_table(ast_node): name = 'create_table' first_order = name def init(self, node): + node = node[self.name] + if 'query' in node: + if 'name' not in node: + raise ValueError("Table name not specified") + projection_node = node['query'] + projection_node['into'] = node['name'] + projection(None, projection_node, self.context) + self.produce = lambda *_: None + self.spawn = lambda *_: None + self.consume = lambda *_: None + return if self.parent is None: self.context.sql_begin() self.sql = 'CREATE TABLE ' @@ -851,7 +869,7 @@ class outfile(ast_node): def init(self, _): assert(isinstance(self.parent, projection)) - if not self.parent.use_postproc: + if not self.parent.has_postproc: if self.context.dialect == 'MonetDB': self.produce = self.produce_monetdb else: diff --git a/reconstruct/expr.py b/reconstruct/expr.py index 504ab8e..e5980d4 100644 --- a/reconstruct/expr.py +++ b/reconstruct/expr.py @@ -80,7 +80,7 @@ class expr(ast_node): self.udf_map = parent.context.udf_map self.func_maps = {**builtin_func, **self.udf_map, **user_module_func} self.operators = {**builtin_operators, **self.udf_map, **user_module_func} - self.ext_aggfuncs = ['sum', 'avg', 'count', 'min', 'max', 'last'] + self.ext_aggfuncs = ['sum', 'avg', 'count', 'min', 'max', 'last', 'first'] def produce(self, node): from engine.utils import enlist @@ -114,9 +114,9 @@ class expr(ast_node): str_vals = [e.sql for e in exp_vals] type_vals = [e.type for e in exp_vals] - is_compound = any([e.is_compound for e in exp_vals]) + is_compound = max([e.is_compound for e in exp_vals]) if key in self.ext_aggfuncs: - self.is_compound = False + self.is_compound = max(0, is_compound - 1) else: self.is_compound = is_compound try: @@ -134,7 +134,7 @@ class expr(ast_node): self.sql = op(self.c_code, *str_vals) special_func = [*self.context.udf_map.keys(), *self.context.module_map.keys(), - "maxs", "mins", "avgs", "sums", "deltas", "last"] + "maxs", "mins", "avgs", "sums", "deltas", "last", "first", "ratios"] if self.context.special_gb: special_func = [*special_func, *self.ext_aggfuncs] @@ -259,6 +259,7 @@ class expr(ast_node): self.sql = table_name + self.raw_col.name self.type = self.raw_col.type self.is_compound = True + self.is_compound += self.raw_col.compound self.opname = self.raw_col else: self.sql = '\'' + node + '\'' if node != '*' else '*' diff --git a/reconstruct/new_expr.py b/reconstruct/new_expr.py index d12ef56..b6b02cf 100644 --- a/reconstruct/new_expr.py +++ b/reconstruct/new_expr.py @@ -16,7 +16,7 @@ class expr_base(ast_node, metaclass = abc.ABCMeta): self.udf_map = self.context.udf_map self.func_maps = {**builtin_func, **self.udf_map, **user_module_func} self.operators = {**builtin_operators, **self.udf_map, **user_module_func} - self.narrow_funcs = ['sum', 'avg', 'count', 'min', 'max', 'last'] + self.narrow_funcs = ['sum', 'avg', 'count', 'min', 'max', 'last', 'first'] def get_variable(self): pass @@ -56,7 +56,7 @@ class expr_base(ast_node, metaclass = abc.ABCMeta): raise ValueError(f'Parse Error: more than 1 entry in {node}.') key, val = next(iter(node.items())) if key in self.operators: - self.child_exprs = [__class__(self, v) for v in val] + self.child_exprs = [self.__class__(self, v) for v in val] self.process_child_nodes() else: self.process_non_operator(key, val) diff --git a/sdk/aquery.h b/sdk/aquery.h index 3ef5bb6..2fe0404 100644 --- a/sdk/aquery.h +++ b/sdk/aquery.h @@ -75,7 +75,12 @@ extern void register_memory(void* ptr, deallocator_t deallocator); __AQEXPORT__(void) init_session(Context* cxt); #define __AQ_NO_SESSION__ __AQEXPORT__(void) init_session(Context*) {} + +#ifdef _MSC_VER +void* _cdecl memcpy(void*, void*, size_t); +#else void* memcpy(void*, const void*, unsigned long long); +#endif struct ColRef_storage { void* container; unsigned int capacity, size; diff --git a/server/aggregations.h b/server/aggregations.h index 514e4a6..48a4b35 100644 --- a/server/aggregations.h +++ b/server/aggregations.h @@ -106,6 +106,27 @@ decayed_t maxw(uint32_t w, const VT& arr) { return ret; } +template class VT> +decayed_t> ratiow(uint32_t w, const VT& arr) { + typedef std::decay_t> FPType; + uint32_t len = arr.size; + if (arr.size <= w) + len = 1; + w = w > len ? len : w; + decayed_t ret(arr.size); + ret[0] = 0; + for (uint32_t i = 0; i < w; ++i) + ret[i] = arr[i] / (FPType)arr[0]; + for (uint32_t i = w; i < arr.size; ++i) + ret[i] = arr[i] / (FPType) arr[i - w]; + return ret; +} + +template class VT> +decayed_t> ratios(const VT& arr) { + return ratiow(1, arr); +} + template class VT> decayed_t> sums(const VT& arr) { const uint32_t& len = arr.size; @@ -171,10 +192,21 @@ decayed_t deltas(const VT& arr) { template class VT> T last(const VT& arr) { + if(!arr.size) return 0; const uint32_t& len = arr.size; return arr[arr.size - 1]; } +template class VT> +T first(const VT& arr) { + if(!arr.size) return 0; + const uint32_t& len = arr.size; + return arr[0]; +} + +#define __DEFAULT_AGGREGATE_FUNCTION__(NAME, RET) \ +template constexpr inline T NAME(const T& v) { return RET; } + // wrong behavior with count(0) template constexpr inline T count(const T& v) { return 1; } template constexpr inline T max(const T& v) { return v; } @@ -185,9 +217,11 @@ template constexpr inline T maxw(uint32_t, const T& v) { return v; } template constexpr inline T minw(uint32_t, const T& v) { return v; } template constexpr inline T avgw(uint32_t, const T& v) { return v; } template constexpr inline T sumw(uint32_t, const T& v) { return v; } +template constexpr inline T ratiow(uint32_t, const T& v) { return 1; } template constexpr inline T maxs(const T& v) { return v; } template constexpr inline T mins(const T& v) { return v; } template constexpr inline T avgs(const T& v) { return v; } template constexpr inline T sums(const T& v) { return v; } template constexpr inline T last(const T& v) { return v; } template constexpr inline T daltas(const T& v) { return 0; } +template constexpr inline T ratios(const T& v) { return 1; } diff --git a/server/table.h b/server/table.h index 1e07968..e5502af 100644 --- a/server/table.h +++ b/server/table.h @@ -395,6 +395,7 @@ struct TableInfo { + num_time * types::time_t::string_length() + num_date * types::date_t::string_length() + num_timestamp * types::timestamp_t::string_length() + + 1 // padding for msvc not allowing empty arrays ]; setgbuf(cbuf); if(view) diff --git a/server/table_ext_monetdb.hpp b/server/table_ext_monetdb.hpp index e74cfef..66c925a 100644 --- a/server/table_ext_monetdb.hpp +++ b/server/table_ext_monetdb.hpp @@ -44,8 +44,8 @@ void TableInfo::monetdb_append_table(void* srv, const char* alt_name) { puts("getcols done"); for(int i = 0; i < sizeof...(Ts); ++i) { - printf("no:%d name: %s count:%d data: %p \n", - i, monetdbe_cols[i]->name, monetdbe_cols[i]->count, monetdbe_cols[i]->data); + printf("no:%d name: %s count:%d data: %p type:%d \n", + i, monetdbe_cols[i]->name, monetdbe_cols[i]->count, monetdbe_cols[i]->data, monetdbe_cols[i]->type); } std::string create_table_str = "CREATE TABLE "; create_table_str += alt_name; diff --git a/server/types.h b/server/types.h index d4a5656..13d1b72 100644 --- a/server/types.h +++ b/server/types.h @@ -29,7 +29,7 @@ namespace types { static constexpr const char* printf_str[] = { "%d", "%f", "%s", "%lf", "%Lf", "%ld", "%d", "%hi", "%s", "%s", "%c", "%u", "%lu", "%s", "%hu", "%hhu", "%s", "%s", "Vector<%s>", "%s", "NULL", "ERROR" }; static constexpr const char* SQL_Type[] = { "INT", "REAL", "TEXT", "DOUBLE", "DOUBLE", "BIGINT", "HUGEINT", "SMALLINT", "DATE", "TIME", "TINYINT", - "INT", "BIGINT", "HUGEINT", "SMALLINT", "TINYINT", "BIGINT", "BOOL", "BIGINT", "TIMESTAMP", "NULL", "ERROR"}; + "INT", "BIGINT", "HUGEINT", "SMALLINT", "TINYINT", "BOOL", "BIGINT", "TIMESTAMP", "NULL", "ERROR"}; // TODO: deal with data/time <=> str/uint conversion @@ -167,8 +167,33 @@ namespace types { }; template using GetLongType = typename GetLongTypeImpl::type>::type; + + + template + struct GetLongerTypeImpl { + using type = Cond( + + __U(T), Cond(__Eq(char), unsigned short, + Cond(__Eq(short), unsigned int, + Cond(__Eq(int), unsigned long long, + ULL_Type + ))), + + Cond(Fp(T), double, + + Cond(__Eq(char), short, + Cond(__Eq(short), int, + Cond(__Eq(int), long, + LL_Type + )))) + + ); + }; + template + using GetLongerType = typename GetLongerTypeImpl::type>::type; } + struct astring_view { const unsigned char* str = 0; constexpr astring_view(const char* str) : @@ -200,7 +225,7 @@ struct astring_view { return reinterpret_cast(str); } operator const signed char* () const { - return reinterpret_cast(str); + return reinterpret_cast(str); } }; diff --git a/tests/best_profit.a b/tests/best_profit.a index f6f3bf5..4d242ec 100644 --- a/tests/best_profit.a +++ b/tests/best_profit.a @@ -43,5 +43,4 @@ SELECT ID, avgs(10, ClosePrice) FROM td NATURAL JOIN HistoricQuotes ASSUMING ASC TradeDate -GROUP BY ID -ORDER BY ID \ No newline at end of file +GROUP BY ID \ No newline at end of file