diff --git a/.gitignore b/.gitignore index d544b90..948a9a8 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,6 @@ out.k k *.so *.pdf +test*.c* +*.csv +*.out \ No newline at end of file diff --git a/aquery_parser/keywords.py b/aquery_parser/keywords.py index 75f3198..c948a78 100644 --- a/aquery_parser/keywords.py +++ b/aquery_parser/keywords.py @@ -72,6 +72,7 @@ REFERENCES = keyword("references").suppress() RECURSIVE = keyword("recursive").suppress() VALUES = keyword("values").suppress() WINDOW = keyword("window") +INTO = keyword("into").suppress() PRIMARY_KEY = Group(PRIMARY + KEY).set_parser_name("primary_key") FOREIGN_KEY = Group(FOREIGN + KEY).set_parser_name("foreign_key") @@ -226,6 +227,7 @@ RESERVED = MatchFirst([ WINDOW, WITH, WITHIN, + INTO, ]) L_INLINE = Literal("").suppress() R_INLINE = Literal("").suppress() diff --git a/aquery_parser/sql_parser.py b/aquery_parser/sql_parser.py index fe0ebd9..073731c 100644 --- a/aquery_parser/sql_parser.py +++ b/aquery_parser/sql_parser.py @@ -29,7 +29,7 @@ def common_parser(): ansi_ident | mysql_backtick_ident | simple_ident, separator=".", combine=True, )).set_parser_name("identifier") - return parser(ansi_string, combined_ident) + return parser(ansi_string | mysql_doublequote_string, combined_ident) def mysql_parser(): @@ -436,6 +436,19 @@ def parser(literal_string, ident, sqlserver=False): & Optional(assign("limit", expr)) ) + outfile = Optional( + ( + INTO + + keyword("outfile").suppress() + + literal_string ("loc") + + Optional ( + keyword("fields") + + keyword("terminated") + + keyword("by") + + literal_string ("term") + ) + )("outfile") + ) ordered_sql = ( ( (unordered_sql | (LB + query + RB)) @@ -448,6 +461,7 @@ def parser(literal_string, ident, sqlserver=False): )("union") + Optional(ORDER_BY + delimited_list(Group(sort_column))("orderby")) + limit + + outfile ).set_parser_name("ordered sql") / to_union_call with_expr = delimited_list(Group( @@ -605,9 +619,27 @@ def parser(literal_string, ident, sqlserver=False): + Optional(assign("where", expr)) ) / to_json_call + load = ( + keyword("load")("op") + + keyword("data").suppress() + + keyword("infile")("loc") + + literal_string ("file") + + INTO + + keyword("table").suppress() + + var_name ("table") + + Optional( + keyword("fields").suppress() + + keyword("terminated").suppress() + + keyword("by").suppress() + + literal_string ("term") + ) + ) ("load") + + + sql_stmts = delimited_list( ( query - | (insert | update | delete) + | (insert | update | delete | load) | (create_table | create_view | create_cache | create_index) | (drop_table | drop_view | drop_index) )("stmts"), ";") @@ -617,6 +649,10 @@ def parser(literal_string, ident, sqlserver=False): | udf ) ("stmts") - stmts = ZeroOrMore(sql_stmts|other_stmt) + stmts = ZeroOrMore( + sql_stmts + |other_stmt + | keyword(";").suppress() # empty stmt + ) return stmts.finalize() diff --git a/aquery_parser/utils.py b/aquery_parser/utils.py index 6578c3a..6aeaec5 100644 --- a/aquery_parser/utils.py +++ b/aquery_parser/utils.py @@ -522,6 +522,7 @@ def to_union_call(tokens): output["limit"] = tokens["limit"] output["offset"] = tokens["offset"] output["fetch"] = tokens["fetch"] + output["outfile"] = tokens["outfile"] return output diff --git a/engine/agg.py b/engine/agg.py new file mode 100644 index 0000000..e69de29 diff --git a/engine/ast.py b/engine/ast.py index fb48b03..86adaaf 100644 --- a/engine/ast.py +++ b/engine/ast.py @@ -4,13 +4,15 @@ from engine.utils import base62uuid # replace column info with this later. class ColRef: - def __init__(self, k9name, type, cobj, cnt, table): + def __init__(self, k9name, _ty, cobj, cnt, table, name, id): self.k9name = k9name - self.type = type + self.type = _ty self.cobj = cobj self.cnt = cnt self.table = table - self.__arr__ = (k9name, type, cobj, cnt, table) + self.name = name + self.id = id + self.__arr__ = (k9name, _ty, cobj, cnt, table, name, id) def __getitem__(self, key): return self.__arr__[key] @@ -28,6 +30,7 @@ class TableInfo: self.columns = [] self.cxt = cxt self.views = set() + self.rec = None for c in cols: self.add_col(c) @@ -48,7 +51,7 @@ class TableInfo: # root.cnt += 1 # column: (k9name, type, original col_object, dup_count) - col_object = ColRef(k9name, (list(c['type'].keys()))[0], c, 1, self) + col_object = ColRef(k9name, (list(c['type'].keys()))[0], c, 1, self,c['name'], len(self.columns)) self.cxt.k9cols_byname[k9name] = col_object self.columns_byname[c['name']] = col_object @@ -62,7 +65,11 @@ class TableInfo: return len(self.columns) def get_k9colname(self, col_name): - return self.columns_byname[col_name].k9name + col = self.columns_byname[col_name] + if type(self.rec) is list: + self.rec.append(col) + return col.k9name + def add_alias(self, alias): # TODO: Exception when alias already defined. # TODO: Scoping of alias should be constrainted in the query. @@ -158,5 +165,5 @@ class ast_node: def include(objs): import inspect for _, cls in inspect.getmembers(objs): - if inspect.isclass(cls) and issubclass(cls, ast_node): + if inspect.isclass(cls) and issubclass(cls, ast_node) and not cls.name.startswith('_'): ast_node.types[cls.name] = cls \ No newline at end of file diff --git a/engine/ddl.py b/engine/ddl.py index 2fd8110..84fc205 100644 --- a/engine/ddl.py +++ b/engine/ddl.py @@ -1,7 +1,7 @@ # code-gen for data decl languages -from engine.ast import TableInfo, ast_node, include - +from engine.ast import ColRef, TableInfo, ast_node, include +from engine.utils import base62uuid class create_table(ast_node): name = 'create_table' def produce(self, node): @@ -27,6 +27,34 @@ class insert(ast_node): else: # subquery, dispatch to select astnode pass + +class k9(ast_node): + name='k9' + def produce(self, node): + self.emit(node[self.name]) +class load(ast_node): + name="load" + def produce(self, node): + node = node[self.name] + tablename = 'l'+base62uuid(7) + keys = 'k'+base62uuid(7) + self.emit(f"{tablename}:`csv ? 1:\"{node['file']['literal']}\"") + self.emit(f"{keys}:!{tablename}") + table:TableInfo = self.context.tables_byname[node['table']] + + for i, c in enumerate(table.columns): + c:ColRef + self.emit(f'{c.k9name}:{tablename}[({keys})[{i}]]') + +class outfile(ast_node): + name="_outfile" + def produce(self, node): + out_table:TableInfo = self.parent.out_table + self.emit_no_ln(f"\"{node['loc']['literal']}\"1:`csv@[[]") + for i, c in enumerate(out_table.columns): + self.emit_no_ln(f"{c.name}:{c.k9name}{';' if i < len(out_table.columns) - 1 else ''}") + self.emit(']') + import sys include(sys.modules[__name__]) \ No newline at end of file diff --git a/engine/expr.py b/engine/expr.py index 9aa61c6..2d62c89 100644 --- a/engine/expr.py +++ b/engine/expr.py @@ -6,10 +6,12 @@ class expr(ast_node): builtin_func_maps = { 'max': 'max', 'min': 'min', - 'avg':'avg', - 'sum':'sum', - 'mins': 'mins', - 'maxs': 'maxs' + 'avg': 'avg', + 'sum': 'sum', + 'mins': ['mins', 'minsw'], + 'maxs': ['maxs', 'maxsw'], + 'avgs': ['avgs', 'avgsw'], + 'sums': ['sums', 'sumsw'], } binary_ops = { 'sub':'-', @@ -22,6 +24,10 @@ class expr(ast_node): 'gt':'>', 'lt':'<', } + compound_ops = { + 'ge' : [2, lambda x: f'~({x[0]}<{x[1]})'], + 'le' : [2, lambda x: f'~({x[0]}>{x[1]})'], + } unary_ops = { 'neg' : '-', 'not' : '~' @@ -45,19 +51,32 @@ class expr(ast_node): if type(node) is dict: for key, val in node.items(): if key in self.func_maps: - self.k9expr += f"{self.func_maps[key]}(" # if type(val) in [dict, str]: - self.k9expr += expr(self, val).k9expr - self.k9expr += ')' + if type(val) is list and len(val) > 1: + k9func = self.func_maps[key] + k9func = k9func[len(val) - 1] if type(k9func) is list else k9func + self.k9expr += f"{k9func}[" + for i, p in enumerate(val): + self.k9expr += expr(self, p).k9expr + (';'if i 1: - self.emit(f'+{disp_varname}') + self.emit(f"+{disp_varname}") else: self.emit(f'+,(,{disp_varname})') + if flatten: + self.emit(f'{disp_varname}') + if flatten: + self.out_table.columns = cols + outfile(self, node['outfile']) if self.datasource_changed: self.context.datasource = self.prev_datasource diff --git a/engine/utils.py b/engine/utils.py index 9c58764..283a80f 100644 --- a/engine/utils.py +++ b/engine/utils.py @@ -1,12 +1,19 @@ import uuid +base62alp = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' + def base62uuid(crop=8): - alp = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' id = uuid.uuid4().int ret = '' while id: - ret = alp[id % 62] + ret + ret = base62alp[id % 62] + ret id //= 62 - return ret[:crop] if len(ret) else '0' \ No newline at end of file + return ret[:crop] if len(ret) else '0' + +def enlist(l): + return l if type(l) is list else [l] + +def seps(s, i, l): + return s if i < len(l) - 1 else '' \ No newline at end of file diff --git a/header.k b/header.k index 0d1cc4c..287f913 100644 --- a/header.k +++ b/header.k @@ -1,2 +1,28 @@ +import`csv + maxs:{[L]{max(x, y)}\L} mins:{[L]{min(x, y)}\L} +sums:{[L]{(x + y)}\L} + +avgsimpl:{[L;i] curr:L[i]%(i+1); $[i<(#L)-1;curr, avgsimpl[L;i+1];curr]} +avgs:{[L] avgsimpl[sums[L];0]} + +maxswimp:{[L;w;i] curr:max(L@(((i-w)+!w)|0)); $[i<#L;curr, maxswimp[L; w; i + 1];curr]} +maxsw:{[w;L]maxswimp[L; w; 1]} + +minswimp:{[L;w;i] curr:min(L@(((i-w)+!w)|0)); $[i<#L;curr, maxswimp[L; w; i + 1];curr]} +minsw:{[w;L]minswimp[L;w;1]} + +avgswimp:{[L;w;s;i] s:(s+L[i])-L[i-w];curr:s%((i+1)&w);$[i<(#L)-1; curr, avgswimp[L; w; s; i+1]; curr]} +avgsw:{[w;L] avgswimp[L;w;0;0]} + +sumswimp:{[L;w;s;i] s:(s+L[i])-L[i-w];$[i<(#L)-1; s, sumswimp[L; w; s; i+1]; s]} +sumsw:{[w;L] sumswimp[L;w;0;0]} + +groupbyi:{[L;GV;i] + k:(,(L[i]));gvk:GV[k][0]; + found:$[(gvk[0]+gvk[1])>0;1;L[i] in !GV]; + cg:(,L[i])!$[found;,gvk[0],i;,(,i)]; + $[i<(#L)-1; groupbyi[L;(GV,cg);i+1]; (GV,cg)]} +groupbys:{[L;ll] GV1:(,(L[0]))!,(,0);$[ll>1;groupbyi[L;GV1;1];GV1]} +groupby:{[l;L] $[(#l)=0;,();groupbys[L;#l]]} diff --git a/moving_avg.csv b/moving_avg.csv index f6b2570..8016053 100644 --- a/moving_avg.csv +++ b/moving_avg.csv @@ -1,6 +1,6 @@ Month,sales 1,100 2,120 -4,140 3,140 +4,140 5,130 diff --git a/prompt.py b/prompt.py index 1395edc..bdf0abf 100644 --- a/prompt.py +++ b/prompt.py @@ -3,6 +3,13 @@ import aquery_parser as parser import engine import subprocess +import sys +if sys.platform != 'win32': + import readline + +# else: +# import pyreadline3 + test_parser = True # code to test parser @@ -37,8 +44,8 @@ while test_parser: print(stmts) continue trimed = ws.sub(' ', q.lower()).split(' ') - if trimed[0] == 'file': - fn = 'q.sql' if len(trimed) <= 1 or len(trimed[1]) == 0 \ + if trimed[0].startswith('f'): + fn = 'stock.a' if len(trimed) <= 1 or len(trimed[1]) == 0 \ else trimed[1] with open(fn, 'r') as file: @@ -47,6 +54,6 @@ while test_parser: continue stmts = parser.parse(q) print(stmts) - except ValueError as e: + except (ValueError) as e: print(type(e), e) diff --git a/q1.sql b/q1.sql index e69de29..72b1d2a 100644 --- a/q1.sql +++ b/q1.sql @@ -0,0 +1,9 @@ +CREATE TABLE test(a INT, b INT, c INT, d INT) + +LOAD DATA INFILE "test.csv" +INTO TABLE test +FIELDS TERMINATED BY "," + +SELECT sum(c), b, d +FROM test +group by a,b,d \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 92b3841..52da39d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ mo-future mo-dots==8.20.21357 mo-parsing mo-imports +readline; sys_platform != 'win32' \ No newline at end of file diff --git a/stock.a b/stock.a index a5578ba..f55ae52 100644 --- a/stock.a +++ b/stock.a @@ -17,11 +17,13 @@ INSERT INTO stocks VALUES(14,5) INSERT INTO stocks VALUES(15,2) INSERT INTO stocks VALUES(16,5) -SELECT max(price-min(timestamp)) FROM stocks - -SELECT price, timestamp FROM stocks where price -timestamp > 1 and not (price*timestamp<100) + "q1" +SELECT max(price-min(timestamp)) FROM stocks + "q2" +SELECT price, timestamp FROM stocks where price -timestamp > 1 and not (price*timestamp<100); + "q3" SELECT max(price-mins(price)) FROM stocks ASSUMING ASC timestamp