diff --git a/.gitignore b/.gitignore index a2ad2b0..3ef09c6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +tests/datagen_jose/histgen +tests/datagen_jose/tickgen +datagen *.dSYM testmain.lib testmain.exp diff --git a/aquery_config.py b/aquery_config.py index 0470d20..3330b6e 100644 --- a/aquery_config.py +++ b/aquery_config.py @@ -2,7 +2,7 @@ ## GLOBAL CONFIGURATION FLAGS -version_string = '0.4.4a' +version_string = '0.4.5a' add_path_to_ldpath = True rebuild_backend = False run_backend = True diff --git a/aquery_parser/keywords.py b/aquery_parser/keywords.py index 479081b..5ae05bf 100644 --- a/aquery_parser/keywords.py +++ b/aquery_parser/keywords.py @@ -44,6 +44,7 @@ LEFT = keyword("left") LIKE = keyword("like") LIMIT = keyword("limit").suppress() MINUS = keyword("minus") +NATURAL = keyword("natural") OFFSET = keyword("offset").suppress() ON = keyword("on").suppress() ORDER = keyword("order").suppress() @@ -145,7 +146,7 @@ VIEW = keyword("view") joins = ( ( - Optional(CROSS | OUTER | INNER | ((FULL | LEFT | RIGHT) + Optional(INNER | OUTER))) + Optional(CROSS | OUTER | INNER | NATURAL | ((FULL | LEFT | RIGHT) + Optional(INNER | OUTER))) + JOIN + Optional(LATERAL) ) @@ -214,6 +215,7 @@ RESERVED = MatchFirst([ LIKE, LIMIT, MINUS, + NATURAL, NOCASE, NOT, NULL, @@ -253,6 +255,7 @@ EQ = Char("=").suppress() join_keywords = { "join", + "natural join", "full join", "cross join", "inner join", diff --git a/aquery_parser/sql_parser.py b/aquery_parser/sql_parser.py index c07aea3..45bbe28 100644 --- a/aquery_parser/sql_parser.py +++ b/aquery_parser/sql_parser.py @@ -323,9 +323,12 @@ def parser(literal_string, ident, sqlserver=False): table_source = Forward() + assumption = Group((ASC|DESC) ("sort") + var_name("value")) + assumptions = Optional(ASSUMING.suppress() + Group(delimited_list(assumption))) + join = ( Group(joins)("op") - + table_source("join") + + (table_source )("join") + Optional((ON + expr("on")) | (USING + expr("using"))) | ( Group(WINDOW)("op") @@ -403,7 +406,12 @@ def parser(literal_string, ident, sqlserver=False): | selection + Optional(INTO + table_source("into")) + Optional( - (FROM + delimited_list(table_source) + ZeroOrMore(join))("from") + ( + FROM + + (delimited_list(table_source) + + ZeroOrMore(join))("table_source") + + Optional(assumptions) ("assumptions") + )("from") + Optional(WHERE + expr("where")) + Optional(GROUP_BY + delimited_list(Group(named_column))("groupby")) + Optional(HAVING + expr("having")) @@ -443,12 +451,8 @@ def parser(literal_string, ident, sqlserver=False): + RB, ) - assumption = Group((ASC|DESC) ("sort") + var_name("value")) - assumptions = (ASSUMING + Group(delimited_list(assumption))("assumptions")) - table_source << Group( ((LB + query + RB) | stack | call_function | var_name)("value") - + Optional(assumptions) + Optional(flag("with ordinality")) + Optional(tablesample) + alias diff --git a/build.py b/build.py index b6d4006..3312a0b 100644 --- a/build.py +++ b/build.py @@ -21,7 +21,10 @@ class checksums: server = 'server.so' ): from platform import machine - self.env = aquery_config.os_platform + machine() + aquery_config.build_driver + self.env = (aquery_config.os_platform + + machine() + + aquery_config.build_driver + ) for key in self.__dict__.keys(): try: with open(eval(key), 'rb') as file: @@ -37,12 +40,13 @@ class checksums: except FileNotFoundError: print('missing component: ' + key) self.sources[key] = None + def __ne__(self, __o: 'checksums') -> 'checksums': ret = checksums() for key in self.__dict__.keys(): try: ret.__dict__[key] = ( - self.__dict__[key] and __o.__dict__[key] and + not (self.__dict__[key] and __o.__dict__[key]) or self.__dict__[key] != __o.__dict__[key] ) except KeyError: @@ -54,7 +58,7 @@ class checksums: for key in self.__dict__.keys(): try: ret.__dict__[key] = ( - not (self.__dict__[key] and __o.__dict__[key]) or + self.__dict__[key] and __o.__dict__[key] and self.__dict__[key] == __o.__dict__[key] ) except KeyError: @@ -82,16 +86,19 @@ class build_manager: self.mgr = mgr self.build_cmd = [] def libaquery_a(self) : - pass + return False def pch(self): - pass + return False def build(self, stdout = sys.stdout, stderr = sys.stderr): + ret = True for c in self.build_cmd: if c: try: - subprocess.call(c, stdout = stdout, stderr = stderr) + ret = subprocess.call(c, stdout = stdout, stderr = stderr) and ret except (FileNotFoundError): + ret = False pass + return ret class MakefileDriver(DriverBase): def __init__(self, mgr : 'build_manager') -> None: @@ -132,7 +139,7 @@ class build_manager: loc = os.path.abspath('./msc-plugin/libaquery.vcxproj') self.get_flags() self.build_cmd = [['del', 'libaquery.lib'], [aquery_config.msbuildroot, loc, self.opt, self.platform]] - self.build() + return self.build() def pch(self): pass @@ -141,24 +148,17 @@ class build_manager: loc = os.path.abspath('./msc-plugin/server.vcxproj') self.get_flags() self.build_cmd = [['del', 'server.so'], [aquery_config.msbuildroot, loc, self.opt, self.platform]] - self.build() + return self.build() def snippet(self): loc = os.path.abspath('./msc-plugin/msc-plugin.vcxproj') self.get_flags() self.build_cmd = [[aquery_config.msbuildroot, loc, self.opt, self.platform]] - self.build() + return self.build() #class PythonDriver(DriverBase): # def __init__(self, mgr : 'build_manager') -> None: - # super().__init__(mgr) - - #@property - #def MSBuild(self): - # return MSBuildDriver(self) - #@property - #def Makefile(self): - # return MakefileDriver(self) + # super().__init__(mgr) def __init__(self) -> None: self.method = 'make' @@ -181,7 +181,9 @@ class build_manager: def build_caches(self, force = False): cached = checksums() current = checksums() - libaquery_a = 'libaquery.lib' if aquery_config.os_platform else 'libaquery.a' + libaquery_a = 'libaquery.a' + if aquery_config.os_platform == 'win': + libaquery_a = 'libaquery.lib' current.calc(libaquery_a) try: with open('.cached', 'rb') as cache_sig: @@ -190,18 +192,25 @@ class build_manager: pass self.cache_status = current != cached + success = True if force or self.cache_status.sources: self.driver.pch() self.driver.libaquery_a() self.driver.server() else: if self.cache_status.libaquery_a: - self.driver.libaquery_a() + success = self.driver.libaquery_a() and success if self.cache_status.pch_hpp_gch: - self.driver.pch() + success = self.driver.pch() and success if self.cache_status.server: - self.driver.server() - current.calc(libaquery_a) - with open('.cached', 'wb') as cache_sig: - cache_sig.write(pickle.dumps(current)) + success = self.driver.server() and success + if success: + current.calc(libaquery_a) + with open('.cached', 'wb') as cache_sig: + cache_sig.write(pickle.dumps(current)) + else: + try: + os.remove('./.cached') + except: + pass diff --git a/datagen.cpp b/datagen.cpp index a94d3e6..88f5a48 100644 --- a/datagen.cpp +++ b/datagen.cpp @@ -37,7 +37,7 @@ void permutation(int *v, int n) { } } -int main(int argc, char* argv[]) +int gen_trade_data(int argc, char* argv[]) { using std::vector; float frac = .3; @@ -108,3 +108,48 @@ int main(int argc, char* argv[]) fclose(fp); return 0; } +#include "./server/utils.h" +#include "./server/types.h" +#include +types::date_t rand_date(){ + unsigned char d = ui(engine) % 28 + 1; + unsigned char m = ui(engine) % 12 + 1; + short y = ui(engine) % 40 + 1990; + if (ui(engine) % 2) return types::date_t((unsigned char)10, (unsigned char)1, 2003); + return types::date_t{d, m, y}; +} +int gen_stock_data(int argc, char* argv[]){ + using std::string; + using namespace types; + int n_stocks = 5; + int n_data = 1000; + string* IDs = new string[n_stocks + 1]; + string* names = new string[n_stocks + 1]; + for(int i = 0; i < n_stocks; ++i){ + IDs[i] = base62uuid(); + names[i] = base62uuid(); + } + IDs[n_stocks] = "S"; + names[n_stocks] = "x"; + FILE* fp = fopen("./data/stock.csv", "w"); + fprintf(fp, "ID, timestamp, tradeDate, price\n"); + char date_str_buf [types::date_t::string_length()]; + int* timestamps = new int[n_data]; + for(int i = 0; i < n_data; ++i) timestamps[i] = i+1; + permutation(timestamps, n_data); + for(int i = 0; i < n_data; ++i){ + auto date = rand_date().toString(date_str_buf + date_t::string_length()); + fprintf(fp, "%s,%d,%s,%d\n", IDs[ui(engine)%(n_stocks + 1)].c_str(), timestamps[i], date, ui(engine) % 1000); + } + fclose(fp); + fp = fopen("./data/base.csv", "w"); + fprintf(fp, "ID, name\n"); + for(int i = 0; i < n_stocks + 1; ++ i){ + fprintf(fp, "%s,%s\n", IDs[i].c_str(), names[i].c_str()); + } + fclose(fp); +} + +int main(int argc, char* argv[]){ + gen_stock_data(argc, argv); +} diff --git a/engine/projection.py b/engine/projection.py index fa199ed..f813005 100644 --- a/engine/projection.py +++ b/engine/projection.py @@ -29,7 +29,7 @@ class projection(ast_node): def spawn(self, node): self.datasource = None if 'from' in node: - from_clause = node['from'] + from_clause = node['from']['table_source'] if type(from_clause) is list: # from joins join(self, from_clause) @@ -47,8 +47,8 @@ class projection(ast_node): self.datasource = self.context.tables_byname[value] if 'name' in value: self.datasource.add_alias(value['name']) - if 'assumptions' in from_clause: - self.assumptions = enlist(from_clause['assumptions']) + if 'assuming' in node['from']: + self.assumptions = enlist(node['from']['assuming']) elif type(from_clause) is str: self.datasource = self.context.tables_byname[from_clause] diff --git a/engine/types.py b/engine/types.py index 477934d..de80c7d 100644 --- a/engine/types.py +++ b/engine/types.py @@ -1,5 +1,5 @@ from copy import deepcopy -from engine.utils import defval +from engine.utils import base62uuid, defval from aquery_config import have_hge from typing import Dict, List @@ -244,6 +244,14 @@ def fn_behavior(op:OperatorBase, c_code, *x): name = op.cname if c_code else op.sqlname return f'{name}({", ".join([f"{xx}" for xx in x])})' +def count_behavior(op:OperatorBase, c_code, x, distinct = False): + if not c_code: + return f'{op.sqlname}({"distinct " if distinct else ""}{x})' + elif distinct: + return '({x}).distinct_size()' + else: + return '{count()}' + def windowed_fn_behavor(op: OperatorBase, c_code, *x): if not c_code: return f'{op.sqlname}({", ".join([f"{xx}" for xx in x])})' @@ -282,7 +290,7 @@ fnmaxs = OperatorBase('maxs', [1, 2], ty_clamp(as_is, -1), cname = 'maxs', sqlna fnmins = OperatorBase('mins', [1, 2], ty_clamp(as_is, -1), cname = 'mins', sqlname = 'MINS', call = windowed_fn_behavor) fnsums = OperatorBase('sums', [1, 2], ext(ty_clamp(auto_extension, -1)), cname = 'sums', sqlname = 'SUMS', call = windowed_fn_behavor) fnavgs = OperatorBase('avgs', [1, 2], fp(ext(ty_clamp(auto_extension, -1))), cname = 'avgs', sqlname = 'AVGS', call = windowed_fn_behavor) -fncnt = OperatorBase('count', 1, int_return, cname = 'count', sqlname = 'COUNT', call = fn_behavior) +fncnt = OperatorBase('count', 1, int_return, cname = 'count', sqlname = 'COUNT', call = count_behavior) # special def is_null_call_behavior(op:OperatorBase, c_code : bool, x : str): if c_code : diff --git a/prompt.py b/prompt.py index aadbeb7..0f59d33 100644 --- a/prompt.py +++ b/prompt.py @@ -479,6 +479,10 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None): print(e) continue except (ValueError, FileNotFoundError) as e: + try: + os.remove('./cached') + except: + pass print(e) except (KeyboardInterrupt): break diff --git a/reconstruct/ast.py b/reconstruct/ast.py index 7d5b0c8..a66da39 100644 --- a/reconstruct/ast.py +++ b/reconstruct/ast.py @@ -73,7 +73,7 @@ class projection(ast_node): self.datasource = join(self, [], self.context) # datasource is Join instead of TableInfo self.assumptions = [] if 'from' in node: - from_clause = node['from'] + from_clause = node['from']['table_source'] self.datasource = join(self, from_clause) if 'assumptions' in from_clause: self.assumptions = enlist(from_clause['assumptions']) @@ -129,12 +129,17 @@ class projection(ast_node): if not proj_expr.is_special: y = lambda x:x name = eval('f\'' + name + '\'') + offset = len(col_exprs) if name not in self.var_table: - self.var_table[name] = len(col_exprs) - proj_map[i] = [this_type, len(col_exprs), proj_expr] + self.var_table[name] = offset + if proj_expr.is_ColExpr and type(proj_expr.raw_col) is ColRef: + for n in (proj_expr.raw_col.table.alias): + self.var_table[f'{n}.'+name] = offset + proj_map[i] = [this_type, offset, proj_expr] col_expr = name + ' AS ' + alias if alias else name if alias: - self.var_table[alias] = len(col_exprs) + self.var_table[alias] = offset + col_exprs.append((col_expr, proj_expr.type)) else: self.context.headers.add('"./server/aggregations.h"') @@ -164,10 +169,12 @@ class projection(ast_node): self.add(', '.join([c[0] for c in col_exprs] + col_ext_names)) _base_offset = len(col_exprs) - for i, col in enumerate(col_ext_names): - if col not in self.var_table: - self.var_table[col] = i + _base_offset - + for i, col in enumerate(self.col_ext): + if col.name not in self.var_table: + offset = i + _base_offset + self.var_table[col.name] = offset + for n in (col.table.alias): + self.var_table[f'{n}.'+col.name] = offset def finialize(astnode:ast_node): if(astnode is not None): @@ -223,12 +230,15 @@ class projection(ast_node): if type(val[1]) is str: x = True y = lambda t: self.pyname2cname[t] - val[1] = val[2].eval(x, y, gettype=True) + count = lambda : '0' + if vid2cname: + count = lambda : f'{vid2cname[0]}.size' + val[1] = val[2].eval(x, y, count=count) if callable(val[1]): - val[1] = val[1](True) - decltypestring = val[1] + val[1] = val[1](False) if val[0] == LazyT: + decltypestring = val[2].eval(x,y,gettype=True)(True) decltypestring = f'value_type>' out_typenames[key] = decltypestring else: @@ -461,7 +471,8 @@ class groupby_c(ast_node): return get_var_names(sql_code) else: return varex.eval(c_code=True, y = get_var_names, - materialize_builtin = materialize_builtin) + materialize_builtin = materialize_builtin, + count=lambda:f'{val_var}.size') for ce in cexprs: ex = ce[1] @@ -544,24 +555,25 @@ class join(ast_node): self.tables_dir = dict() self.rec = None self.top_level = self.parent and type(self.parent) is projection + self.have_sep = False # self.tmp_name = 'join_' + base62uuid(4) # self.datasource = TableInfo(self.tmp_name, [], self.context) def append(self, tbls, __alias = ''): - alias = lambda t : '(' + t + ') ' + __alias if len(__alias) else t + alias = lambda t : t + ' ' + __alias if len(__alias) else t if type(tbls) is join: - self.joins.append(alias(tbls.__str__())) + self.joins.append((alias(tbls.__str__()), tbls.have_sep)) self.tables += tbls.tables self.tables_dir = {**self.tables_dir, **tbls.tables_dir} elif type(tbls) is TableInfo: - self.joins.append(alias(tbls.table_name)) + self.joins.append((alias(tbls.table_name), False)) self.tables.append(tbls) self.tables_dir[tbls.table_name] = tbls for a in tbls.alias: self.tables_dir[a] = tbls elif type(tbls) is projection: - self.joins.append(alias(tbls.finalize())) + self.joins.append((alias(tbls.finalize()), False)) def produce(self, node): if type(node) is list: @@ -585,13 +597,14 @@ class join(ast_node): tbl.add_alias(node['name']) self.append(tbl, alias) else: - keys = node.keys() + keys = list(node.keys()) if keys[0].lower().endswith('join'): + self.have_sep = True j = join(self, node[keys[0]]) tablename = f' {keys[0]} {j}' - if keys[1].lower() == 'on': + if len(keys) > 1 and keys[1].lower() == 'on': tablename += f' on {expr(self, node[keys[1]])}' - self.joins.append(tablename) + self.joins.append((tablename, self.have_sep)) self.tables += j.tables self.tables_dir = {**self.tables_dir, **j.tables_dir} @@ -618,18 +631,27 @@ class join(ast_node): if datasource is None: raise ValueError(f'Table name/alias not defined{parsedColExpr[0]}') else: - return datasource.parse_col_names(parsedColExpr[1]) + datasource.rec = self.rec + ret = datasource.parse_col_names(parsedColExpr[1]) + datasource.rec = None + return ret + @property def all_cols(self): return set([c for t in self.tables for c in t.columns]) def consume(self, node): - self.sql = ', '.join(self.joins) + self.sql = '' + for j in self.joins: + if not self.sql or j[1]: + self.sql += j[0] + else: + self.sql += ', ' + j[0] if node and self.sql and self.top_level: self.sql = ' FROM ' + self.sql return super().consume(node) def __str__(self): - return ', '.join(self.joins) + return self.sql def __repr__(self): return self.__str__() diff --git a/reconstruct/expr.py b/reconstruct/expr.py index b636667..ce5ea4f 100644 --- a/reconstruct/expr.py +++ b/reconstruct/expr.py @@ -26,7 +26,13 @@ class expr(ast_node): def __init__(self, parent, node, *, c_code = None, supress_undefined = False): from reconstruct.ast import projection, udf - + # gen2 expr have multi-passes + # first pass parse json into expr tree + # generate target code in later passes upon need + self.children = [] + self.opname = '' + self.curr_code = '' + self.counts = {} self.type = None self.raw_col = None self.udf : Optional[udf] = None @@ -81,106 +87,124 @@ class expr(ast_node): from reconstruct.ast import udf if type(node) is dict: - if len(node) > 1: - print(f'Parser Error: {node} has more than 1 dict entry.') + if 'literal' in node: + node = node['literal'] + else: + if len(node) > 1: + print(f'Parser Error: {node} has more than 1 dict entry.') - for key, val in node.items(): - if key in self.operators: - if key in builtin_func: - if self.is_agg_func: - self.root.is_special = True # Nested Aggregation - else: - self.is_agg_func = True - - op = self.operators[key] - - val = enlist(val) - exp_vals = [expr(self, v, c_code = self.c_code) for v in val] - str_vals = [e.sql for e in exp_vals] - type_vals = [e.type for e in exp_vals] - is_compound = any([e.is_compound for e in exp_vals]) - if key in self.ext_aggfuncs: - self.is_compound = False - else: - self.is_compound = is_compound - try: - self.type = op.return_type(*type_vals) - except AttributeError as e: - if type(self.root) is not udf: - # TODO: do something when this is not an error - # print(f'alert: {e}') - pass - self.type = AnyT + for key, val in node.items(): + key = key.lower() + if key in self.operators: + if key in builtin_func: + if self.is_agg_func: + self.root.is_special = True # Nested Aggregation + else: + self.is_agg_func = True - self.sql = op(self.c_code, *str_vals) - special_func = [*self.context.udf_map.keys(), *self.context.module_map.keys(), - "maxs", "mins", "avgs", "sums", "deltas"] - if self.context.special_gb: - special_func = [*special_func, *self.ext_aggfuncs] + op = self.operators[key] + count_distinct = False + if key == 'count' and type(val) is dict and 'distinct' in val: + count_distinct = True + val = val['distinct'] + val = enlist(val) + exp_vals = [expr(self, v, c_code = self.c_code) for v in val] + self.children = exp_vals + self.opname = key - if key in special_func and not self.is_special: - self.is_special = True - if key in self.context.udf_map: - self.root.udf_called = self.context.udf_map[key] - if self.is_udfexpr and key == self.root.udf.name: - self.root.is_recursive_call_inudf = True - elif key in user_module_func.keys(): - udf.try_init_udf(self.context) - # TODO: make udf_called a set! - p = self.parent - while type(p) is expr and not p.udf_called: - p.udf_called = self.udf_called - p = p.parent - p = self.parent - while type(p) is expr and not p.is_special: - p.is_special = True - p = p.parent + str_vals = [e.sql for e in exp_vals] + type_vals = [e.type for e in exp_vals] + is_compound = any([e.is_compound for e in exp_vals]) + if key in self.ext_aggfuncs: + self.is_compound = False + else: + self.is_compound = is_compound + try: + self.type = op.return_type(*type_vals) + except AttributeError as e: + if type(self.root.parent) is not udf: + # TODO: do something when this is not an error + print(f'alert: {e}') + pass + self.type = AnyT + + if count_distinct: # inject distinct col later + self.sql = f'{{{op(self.c_code, *str_vals, True)}}}' + else: + self.sql = op(self.c_code, *str_vals) + + special_func = [*self.context.udf_map.keys(), *self.context.module_map.keys(), + "maxs", "mins", "avgs", "sums", "deltas", "last"] + if self.context.special_gb: + special_func = [*special_func, *self.ext_aggfuncs] + + if key in special_func and not self.is_special: + self.is_special = True + if key in self.context.udf_map: + self.root.udf_called = self.context.udf_map[key] + if self.is_udfexpr and key == self.root.udf.name: + self.root.is_recursive_call_inudf = True + elif key in user_module_func.keys(): + udf.try_init_udf(self.context) + # TODO: make udf_called a set! + p = self.parent + while type(p) is expr and not p.udf_called: + p.udf_called = self.udf_called + p = p.parent + p = self.parent + while type(p) is expr and not p.is_special: + p.is_special = True + p = p.parent - need_decltypestr = any([e.need_decltypestr for e in exp_vals]) - if need_decltypestr or (self.udf_called and type(op) is udf): - decltypestr_vals = [e.udf_decltypecall for e in exp_vals] - self.udf_decltypecall = op(self.c_code, *decltypestr_vals) + need_decltypestr = any([e.need_decltypestr for e in exp_vals]) + if need_decltypestr or (self.udf_called and type(op) is udf): + decltypestr_vals = [e.udf_decltypecall for e in exp_vals] + self.udf_decltypecall = op(self.c_code, *decltypestr_vals) - if self.udf_called and type(op) is udf: - self.udf_decltypecall = op.decltypecall(self.c_code, *decltypestr_vals) - - elif self.is_udfexpr: - var_table = self.root.udf.var_table - vec = key.split('.') - _vars = [*var_table, *self.builtin_vars] - def get_vname (node): - if node in self.builtin_vars: - self.root.udf.builtin[node].enabled = True - self.builtin_var = node - return node + if self.udf_called and type(op) is udf: + self.udf_decltypecall = op.decltypecall(self.c_code, *decltypestr_vals) + + elif self.is_udfexpr: + var_table = self.root.udf.var_table + vec = key.split('.') + _vars = [*var_table, *self.builtin_vars] + def get_vname (node): + if node in self.builtin_vars: + self.root.udf.builtin[node].enabled = True + self.builtin_var = node + return node + else: + return var_table[node] + if vec[0] not in _vars: + # print(f'Use of undefined variable {vec[0]}') + # TODO: do something when this is not an error + pass else: - return var_table[node] - if vec[0] not in _vars: - # print(f'Use of undefined variable {vec[0]}') - # TODO: do something when this is not an error - pass + vname = get_vname(vec[0]) + val = enlist(val) + if(len(val) > 2): + print('Warning: more than 2 indexes found for subvec operator.') + ex = [expr(self, v, c_code = self.c_code) for v in val] + idxs = ', '.join([e.sql for e in ex]) + self.sql = f'{vname}.subvec({idxs})' + if any([e.need_decltypestr for e in ex]): + self.udf_decltypecall = f'{vname}.subvec({[", ".join([e.udf_decltypecall for e in ex])]})' + if key == 'get' and len(val) > 1: + ex_vname = expr(self, val[0], c_code=self.c_code) + self.sql = f'{ex_vname.sql}[{expr(self, val[1], c_code=self.c_code).sql}]' + if hasattr(ex_vname, 'builtin_var'): + if not hasattr(self, 'builtin_var'): + self.builtin_var = [] + self.builtin_var = [*self.builtin_var, *ex_vname.builtin_var] + self.udf_decltypecall = ex_vname.sql else: - vname = get_vname(vec[0]) - val = enlist(val) - if(len(val) > 2): - print('Warning: more than 2 indexes found for subvec operator.') - ex = [expr(self, v, c_code = self.c_code) for v in val] - idxs = ', '.join([e.sql for e in ex]) - self.sql = f'{vname}.subvec({idxs})' - if any([e.need_decltypestr for e in ex]): - self.udf_decltypecall = f'{vname}.subvec({[", ".join([e.udf_decltypecall for e in ex])]})' - if key == 'get' and len(val) > 1: - ex_vname = expr(self, val[0], c_code=self.c_code) - self.sql = f'{ex_vname.sql}[{expr(self, val[1], c_code=self.c_code).sql}]' - if hasattr(ex_vname, 'builtin_var'): - if not hasattr(self, 'builtin_var'): - self.builtin_var = [] - self.builtin_var = [*self.builtin_var, *ex_vname.builtin_var] - self.udf_decltypecall = ex_vname.sql - else: - print(f'Undefined expr: {key}{val}') - - elif type(node) is str: + print(f'Undefined expr: {key}{val}') + if 'distinct' in val and key != count: + if self.c_code: + self.sql = 'distinct ' + self.sql + elif self.is_compound: + self.sql = '(' + self.sql + ').distinct()' + if type(node) is str: if self.is_udfexpr: curr_udf : udf = self.root.udf var_table = curr_udf.var_table @@ -215,22 +239,41 @@ class expr(ast_node): self.raw_col = self.raw_col if type(self.raw_col) is ColRef else None if self.raw_col is not None: self.is_ColExpr = True - self.sql = self.raw_col.name + table_name = '' + if '.' in node: + table_name = self.raw_col.table.table_name + if self.raw_col.table.alias: + alias = iter(self.raw_col.table.alias) + try: + a = next(alias) + while(not a or a == table_name): + a = next(alias) + if (a and a != table_name): + table_name = a + except StopIteration: + pass + if table_name: + table_name = table_name + '.' + self.sql = table_name + self.raw_col.name self.type = self.raw_col.type self.is_compound = True + self.opname = self.raw_col else: - self.sql = node + self.sql = '\'' + node + '\'' self.type = StrT + self.opname = self.sql if self.c_code and self.datasource is not None: self.sql = f'{{y(\"{self.sql}\")}}' elif type(node) is bool: self.type = BoolT + self.opname = node if self.c_code: self.sql = '1' if node else '0' else: self.sql = 'TRUE' if node else 'FALSE' - else: + elif type(node) is not dict: self.sql = f'{node}' + self.opname = node if type(node) is int: if (node >= 2**63 - 1 or node <= -2**63): self.type = LongT @@ -252,6 +295,12 @@ class expr(ast_node): self.codebuf += c.finalize(override=override) return self.codebuf + def codegen(self, delegate): + self.curr_code = '' + for c in self.children: + self.curr_code += c.codegen(delegate) + return self.curr_code + def __str__(self): return self.sql def __repr__(self): @@ -259,10 +308,17 @@ class expr(ast_node): # builtins is readonly, so it's okay to set default value as an object # eval is only called at root expr. - def eval(self, c_code = None, y = lambda t: t, materialize_builtin = False, _decltypestr = False, *, gettype = False): + def eval(self, c_code = None, y = lambda t: t, + materialize_builtin = False, _decltypestr = False, + count = lambda : 'count', var_inject = None, + *, + gettype = False): assert(self.is_root) def call(decltypestr = False) -> str: - nonlocal c_code, y, materialize_builtin + nonlocal c_code, y, materialize_builtin, count, var_inject + if var_inject: + for k, v in var_inject.items(): + locals()[k] = v if self.udf_called is not None: loc = locals() builtin_vars = self.udf_called.builtin_used @@ -344,4 +400,4 @@ class getrefs(expr): def consume(self, _): if self.root == self: self.rec = self.datasource.rec - self.datasource.rec = None \ No newline at end of file + self.datasource.rec = None diff --git a/server/aggregations.h b/server/aggregations.h index ac15d8e..2add603 100644 --- a/server/aggregations.h +++ b/server/aggregations.h @@ -16,7 +16,8 @@ constexpr static inline size_t count(const T&) { return 1; } // TODO: Specializations for dt/str/none template class VT> -types::GetLongType sum(const VT& v) { +types::GetLongType +sum(const VT& v) { types::GetLongType ret = 0; for (const auto& _v : v) ret += _v; diff --git a/server/io.cpp b/server/io.cpp index 9a527d4..c34dc42 100644 --- a/server/io.cpp +++ b/server/io.cpp @@ -181,7 +181,7 @@ namespace types { return !operator==(other); } bool time_t::validate() const{ - return hours < 24 && minutes < 60 && seconds < 60 && ms < 1000; + return hours < 24 && minutes < 60 && seconds < 60 && ms < 1000000; } timestamp_t::timestamp_t(const char* str) { fromString(str); } @@ -244,13 +244,13 @@ std::ostream& operator<<(std::ostream& os, types::timestamp_t & v) using std::string; -string base62uuid(int l = 8) { +string base62uuid(int l) { using namespace std; constexpr static const char* base62alp = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; static mt19937_64 engine(chrono::system_clock::now().time_since_epoch().count()); static uniform_int_distribution u(0x10000, 0xfffff); uint64_t uuid = (u(engine) << 32ull) + (chrono::system_clock::now().time_since_epoch().count() & 0xffffffff); - printf("%llu\n", uuid); + //printf("%llu\n", uuid); string ret; while (uuid && l-- >= 0) { ret = string("") + base62alp[uuid % 62] + ret; diff --git a/server/server.cpp b/server/server.cpp index b5b8195..dd63e3e 100644 --- a/server/server.cpp +++ b/server/server.cpp @@ -100,10 +100,10 @@ void Context::end_session(){ void* Context::get_module_function(const char* fname){ auto fmap = static_cast*> (this->module_function_maps); - printf("%p\n", fmap->find("mydiv")->second); - for (const auto& [key, value] : *fmap){ - printf("%s %p\n", key.c_str(), value); - } + // printf("%p\n", fmap->find("mydiv")->second); + // for (const auto& [key, value] : *fmap){ + // printf("%s %p\n", key.c_str(), value); + // } auto ret = fmap->find(fname); return ret == fmap->end() ? nullptr : ret->second; } @@ -188,9 +188,9 @@ int dll_main(int argc, char** argv, Context* cxt){ case 'F': // Register Function in Module { auto fname = n_recvd[i] + 1; - printf("F:: %s: %p, %p\n", fname, user_module_handle, dlsym(user_module_handle, fname)); + //printf("F:: %s: %p, %p\n", fname, user_module_handle, dlsym(user_module_handle, fname)); module_fn_map->insert_or_assign(fname, dlsym(user_module_handle, fname)); - printf("F::: %p\n", module_fn_map->find("mydiv") != module_fn_map->end() ? module_fn_map->find("mydiv")->second : nullptr); + //printf("F::: %p\n", module_fn_map->find("mydiv") != module_fn_map->end() ? module_fn_map->find("mydiv")->second : nullptr); } break; case 'U': // Unload Module diff --git a/server/table.h b/server/table.h index 36b653a..f96c0e6 100644 --- a/server/table.h +++ b/server/table.h @@ -139,7 +139,9 @@ public: const ColRef<_Ty>& orig; constexpr Iterator_t(const uint32_t* val, const ColRef<_Ty>& orig) noexcept : val(val), orig(orig) {} _Ty& operator*() { return orig[*val]; } - bool operator != (const Iterator_t& rhs) { return rhs.val != val; } + bool operator != (const Iterator_t& rhs) const { return rhs.val != val; } + bool operator == (const Iterator_t& rhs) const { return rhs.val == val; } + size_t operator - (const Iterator_t& rhs) const { return val - rhs.val; } Iterator_t& operator++ () { ++val; return *this; @@ -180,6 +182,20 @@ public: subvec[i] = operator[](i); return subvec; } + std::unordered_set<_Ty> distinct_common() { + return std::unordered_set<_Ty> {begin(), end()}; + } + uint32_t distinct_size(){ + return distinct_common().size(); + } + ColRef<_Ty> distinct(){ + auto set = distinct_common(); + ColRef<_Ty> ret(set.size()); + uint32_t i = 0; + for (auto& val : set) + ret.container[i++] = val; + return ret; + } inline ColRef<_Ty> subvec(uint32_t start = 0) { return subvec_deep(start, size); } }; template