diff --git a/.gitignore b/.gitignore
index d544b90..948a9a8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,3 +18,6 @@ out.k
k
*.so
*.pdf
+test*.c*
+*.csv
+*.out
\ No newline at end of file
diff --git a/aquery_parser/keywords.py b/aquery_parser/keywords.py
index 75f3198..c948a78 100644
--- a/aquery_parser/keywords.py
+++ b/aquery_parser/keywords.py
@@ -72,6 +72,7 @@ REFERENCES = keyword("references").suppress()
RECURSIVE = keyword("recursive").suppress()
VALUES = keyword("values").suppress()
WINDOW = keyword("window")
+INTO = keyword("into").suppress()
PRIMARY_KEY = Group(PRIMARY + KEY).set_parser_name("primary_key")
FOREIGN_KEY = Group(FOREIGN + KEY).set_parser_name("foreign_key")
@@ -226,6 +227,7 @@ RESERVED = MatchFirst([
WINDOW,
WITH,
WITHIN,
+ INTO,
])
L_INLINE = Literal("").suppress()
R_INLINE = Literal("").suppress()
diff --git a/aquery_parser/sql_parser.py b/aquery_parser/sql_parser.py
index fe0ebd9..073731c 100644
--- a/aquery_parser/sql_parser.py
+++ b/aquery_parser/sql_parser.py
@@ -29,7 +29,7 @@ def common_parser():
ansi_ident | mysql_backtick_ident | simple_ident, separator=".", combine=True,
)).set_parser_name("identifier")
- return parser(ansi_string, combined_ident)
+ return parser(ansi_string | mysql_doublequote_string, combined_ident)
def mysql_parser():
@@ -436,6 +436,19 @@ def parser(literal_string, ident, sqlserver=False):
& Optional(assign("limit", expr))
)
+ outfile = Optional(
+ (
+ INTO
+ + keyword("outfile").suppress()
+ + literal_string ("loc")
+ + Optional (
+ keyword("fields")
+ + keyword("terminated")
+ + keyword("by")
+ + literal_string ("term")
+ )
+ )("outfile")
+ )
ordered_sql = (
(
(unordered_sql | (LB + query + RB))
@@ -448,6 +461,7 @@ def parser(literal_string, ident, sqlserver=False):
)("union")
+ Optional(ORDER_BY + delimited_list(Group(sort_column))("orderby"))
+ limit
+ + outfile
).set_parser_name("ordered sql") / to_union_call
with_expr = delimited_list(Group(
@@ -605,9 +619,27 @@ def parser(literal_string, ident, sqlserver=False):
+ Optional(assign("where", expr))
) / to_json_call
+ load = (
+ keyword("load")("op")
+ + keyword("data").suppress()
+ + keyword("infile")("loc")
+ + literal_string ("file")
+ + INTO
+ + keyword("table").suppress()
+ + var_name ("table")
+ + Optional(
+ keyword("fields").suppress()
+ + keyword("terminated").suppress()
+ + keyword("by").suppress()
+ + literal_string ("term")
+ )
+ ) ("load")
+
+
+
sql_stmts = delimited_list( (
query
- | (insert | update | delete)
+ | (insert | update | delete | load)
| (create_table | create_view | create_cache | create_index)
| (drop_table | drop_view | drop_index)
)("stmts"), ";")
@@ -617,6 +649,10 @@ def parser(literal_string, ident, sqlserver=False):
| udf
) ("stmts")
- stmts = ZeroOrMore(sql_stmts|other_stmt)
+ stmts = ZeroOrMore(
+ sql_stmts
+ |other_stmt
+ | keyword(";").suppress() # empty stmt
+ )
return stmts.finalize()
diff --git a/aquery_parser/utils.py b/aquery_parser/utils.py
index 6578c3a..6aeaec5 100644
--- a/aquery_parser/utils.py
+++ b/aquery_parser/utils.py
@@ -522,6 +522,7 @@ def to_union_call(tokens):
output["limit"] = tokens["limit"]
output["offset"] = tokens["offset"]
output["fetch"] = tokens["fetch"]
+ output["outfile"] = tokens["outfile"]
return output
diff --git a/engine/agg.py b/engine/agg.py
new file mode 100644
index 0000000..e69de29
diff --git a/engine/ast.py b/engine/ast.py
index fb48b03..86adaaf 100644
--- a/engine/ast.py
+++ b/engine/ast.py
@@ -4,13 +4,15 @@ from engine.utils import base62uuid
# replace column info with this later.
class ColRef:
- def __init__(self, k9name, type, cobj, cnt, table):
+ def __init__(self, k9name, _ty, cobj, cnt, table, name, id):
self.k9name = k9name
- self.type = type
+ self.type = _ty
self.cobj = cobj
self.cnt = cnt
self.table = table
- self.__arr__ = (k9name, type, cobj, cnt, table)
+ self.name = name
+ self.id = id
+ self.__arr__ = (k9name, _ty, cobj, cnt, table, name, id)
def __getitem__(self, key):
return self.__arr__[key]
@@ -28,6 +30,7 @@ class TableInfo:
self.columns = []
self.cxt = cxt
self.views = set()
+ self.rec = None
for c in cols:
self.add_col(c)
@@ -48,7 +51,7 @@ class TableInfo:
# root.cnt += 1
# column: (k9name, type, original col_object, dup_count)
- col_object = ColRef(k9name, (list(c['type'].keys()))[0], c, 1, self)
+ col_object = ColRef(k9name, (list(c['type'].keys()))[0], c, 1, self,c['name'], len(self.columns))
self.cxt.k9cols_byname[k9name] = col_object
self.columns_byname[c['name']] = col_object
@@ -62,7 +65,11 @@ class TableInfo:
return len(self.columns)
def get_k9colname(self, col_name):
- return self.columns_byname[col_name].k9name
+ col = self.columns_byname[col_name]
+ if type(self.rec) is list:
+ self.rec.append(col)
+ return col.k9name
+
def add_alias(self, alias):
# TODO: Exception when alias already defined.
# TODO: Scoping of alias should be constrainted in the query.
@@ -158,5 +165,5 @@ class ast_node:
def include(objs):
import inspect
for _, cls in inspect.getmembers(objs):
- if inspect.isclass(cls) and issubclass(cls, ast_node):
+ if inspect.isclass(cls) and issubclass(cls, ast_node) and not cls.name.startswith('_'):
ast_node.types[cls.name] = cls
\ No newline at end of file
diff --git a/engine/ddl.py b/engine/ddl.py
index 2fd8110..84fc205 100644
--- a/engine/ddl.py
+++ b/engine/ddl.py
@@ -1,7 +1,7 @@
# code-gen for data decl languages
-from engine.ast import TableInfo, ast_node, include
-
+from engine.ast import ColRef, TableInfo, ast_node, include
+from engine.utils import base62uuid
class create_table(ast_node):
name = 'create_table'
def produce(self, node):
@@ -27,6 +27,34 @@ class insert(ast_node):
else:
# subquery, dispatch to select astnode
pass
+
+class k9(ast_node):
+ name='k9'
+ def produce(self, node):
+ self.emit(node[self.name])
+class load(ast_node):
+ name="load"
+ def produce(self, node):
+ node = node[self.name]
+ tablename = 'l'+base62uuid(7)
+ keys = 'k'+base62uuid(7)
+ self.emit(f"{tablename}:`csv ? 1:\"{node['file']['literal']}\"")
+ self.emit(f"{keys}:!{tablename}")
+ table:TableInfo = self.context.tables_byname[node['table']]
+
+ for i, c in enumerate(table.columns):
+ c:ColRef
+ self.emit(f'{c.k9name}:{tablename}[({keys})[{i}]]')
+
+class outfile(ast_node):
+ name="_outfile"
+ def produce(self, node):
+ out_table:TableInfo = self.parent.out_table
+ self.emit_no_ln(f"\"{node['loc']['literal']}\"1:`csv@[[]")
+ for i, c in enumerate(out_table.columns):
+ self.emit_no_ln(f"{c.name}:{c.k9name}{';' if i < len(out_table.columns) - 1 else ''}")
+ self.emit(']')
+
import sys
include(sys.modules[__name__])
\ No newline at end of file
diff --git a/engine/expr.py b/engine/expr.py
index 9aa61c6..2d62c89 100644
--- a/engine/expr.py
+++ b/engine/expr.py
@@ -6,10 +6,12 @@ class expr(ast_node):
builtin_func_maps = {
'max': 'max',
'min': 'min',
- 'avg':'avg',
- 'sum':'sum',
- 'mins': 'mins',
- 'maxs': 'maxs'
+ 'avg': 'avg',
+ 'sum': 'sum',
+ 'mins': ['mins', 'minsw'],
+ 'maxs': ['maxs', 'maxsw'],
+ 'avgs': ['avgs', 'avgsw'],
+ 'sums': ['sums', 'sumsw'],
}
binary_ops = {
'sub':'-',
@@ -22,6 +24,10 @@ class expr(ast_node):
'gt':'>',
'lt':'<',
}
+ compound_ops = {
+ 'ge' : [2, lambda x: f'~({x[0]}<{x[1]})'],
+ 'le' : [2, lambda x: f'~({x[0]}>{x[1]})'],
+ }
unary_ops = {
'neg' : '-',
'not' : '~'
@@ -45,19 +51,32 @@ class expr(ast_node):
if type(node) is dict:
for key, val in node.items():
if key in self.func_maps:
- self.k9expr += f"{self.func_maps[key]}("
# if type(val) in [dict, str]:
- self.k9expr += expr(self, val).k9expr
- self.k9expr += ')'
+ if type(val) is list and len(val) > 1:
+ k9func = self.func_maps[key]
+ k9func = k9func[len(val) - 1] if type(k9func) is list else k9func
+ self.k9expr += f"{k9func}["
+ for i, p in enumerate(val):
+ self.k9expr += expr(self, p).k9expr + (';'if i 1:
- self.emit(f'+{disp_varname}')
+ self.emit(f"+{disp_varname}")
else:
self.emit(f'+,(,{disp_varname})')
+ if flatten:
+ self.emit(f'{disp_varname}')
+ if flatten:
+ self.out_table.columns = cols
+ outfile(self, node['outfile'])
if self.datasource_changed:
self.context.datasource = self.prev_datasource
diff --git a/engine/utils.py b/engine/utils.py
index 9c58764..283a80f 100644
--- a/engine/utils.py
+++ b/engine/utils.py
@@ -1,12 +1,19 @@
import uuid
+base62alp = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
+
def base62uuid(crop=8):
- alp = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
id = uuid.uuid4().int
ret = ''
while id:
- ret = alp[id % 62] + ret
+ ret = base62alp[id % 62] + ret
id //= 62
- return ret[:crop] if len(ret) else '0'
\ No newline at end of file
+ return ret[:crop] if len(ret) else '0'
+
+def enlist(l):
+ return l if type(l) is list else [l]
+
+def seps(s, i, l):
+ return s if i < len(l) - 1 else ''
\ No newline at end of file
diff --git a/header.k b/header.k
index 0d1cc4c..287f913 100644
--- a/header.k
+++ b/header.k
@@ -1,2 +1,28 @@
+import`csv
+
maxs:{[L]{max(x, y)}\L}
mins:{[L]{min(x, y)}\L}
+sums:{[L]{(x + y)}\L}
+
+avgsimpl:{[L;i] curr:L[i]%(i+1); $[i<(#L)-1;curr, avgsimpl[L;i+1];curr]}
+avgs:{[L] avgsimpl[sums[L];0]}
+
+maxswimp:{[L;w;i] curr:max(L@(((i-w)+!w)|0)); $[i<#L;curr, maxswimp[L; w; i + 1];curr]}
+maxsw:{[w;L]maxswimp[L; w; 1]}
+
+minswimp:{[L;w;i] curr:min(L@(((i-w)+!w)|0)); $[i<#L;curr, maxswimp[L; w; i + 1];curr]}
+minsw:{[w;L]minswimp[L;w;1]}
+
+avgswimp:{[L;w;s;i] s:(s+L[i])-L[i-w];curr:s%((i+1)&w);$[i<(#L)-1; curr, avgswimp[L; w; s; i+1]; curr]}
+avgsw:{[w;L] avgswimp[L;w;0;0]}
+
+sumswimp:{[L;w;s;i] s:(s+L[i])-L[i-w];$[i<(#L)-1; s, sumswimp[L; w; s; i+1]; s]}
+sumsw:{[w;L] sumswimp[L;w;0;0]}
+
+groupbyi:{[L;GV;i]
+ k:(,(L[i]));gvk:GV[k][0];
+ found:$[(gvk[0]+gvk[1])>0;1;L[i] in !GV];
+ cg:(,L[i])!$[found;,gvk[0],i;,(,i)];
+ $[i<(#L)-1; groupbyi[L;(GV,cg);i+1]; (GV,cg)]}
+groupbys:{[L;ll] GV1:(,(L[0]))!,(,0);$[ll>1;groupbyi[L;GV1;1];GV1]}
+groupby:{[l;L] $[(#l)=0;,();groupbys[L;#l]]}
diff --git a/moving_avg.csv b/moving_avg.csv
index f6b2570..8016053 100644
--- a/moving_avg.csv
+++ b/moving_avg.csv
@@ -1,6 +1,6 @@
Month,sales
1,100
2,120
-4,140
3,140
+4,140
5,130
diff --git a/prompt.py b/prompt.py
index 1395edc..bdf0abf 100644
--- a/prompt.py
+++ b/prompt.py
@@ -3,6 +3,13 @@ import aquery_parser as parser
import engine
import subprocess
+import sys
+if sys.platform != 'win32':
+ import readline
+
+# else:
+# import pyreadline3
+
test_parser = True
# code to test parser
@@ -37,8 +44,8 @@ while test_parser:
print(stmts)
continue
trimed = ws.sub(' ', q.lower()).split(' ')
- if trimed[0] == 'file':
- fn = 'q.sql' if len(trimed) <= 1 or len(trimed[1]) == 0 \
+ if trimed[0].startswith('f'):
+ fn = 'stock.a' if len(trimed) <= 1 or len(trimed[1]) == 0 \
else trimed[1]
with open(fn, 'r') as file:
@@ -47,6 +54,6 @@ while test_parser:
continue
stmts = parser.parse(q)
print(stmts)
- except ValueError as e:
+ except (ValueError) as e:
print(type(e), e)
diff --git a/q1.sql b/q1.sql
index e69de29..72b1d2a 100644
--- a/q1.sql
+++ b/q1.sql
@@ -0,0 +1,9 @@
+CREATE TABLE test(a INT, b INT, c INT, d INT)
+
+LOAD DATA INFILE "test.csv"
+INTO TABLE test
+FIELDS TERMINATED BY ","
+
+SELECT sum(c), b, d
+FROM test
+group by a,b,d
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 92b3841..52da39d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,3 +2,4 @@ mo-future
mo-dots==8.20.21357
mo-parsing
mo-imports
+readline; sys_platform != 'win32'
\ No newline at end of file
diff --git a/stock.a b/stock.a
index a5578ba..f55ae52 100644
--- a/stock.a
+++ b/stock.a
@@ -17,11 +17,13 @@ INSERT INTO stocks VALUES(14,5)
INSERT INTO stocks VALUES(15,2)
INSERT INTO stocks VALUES(16,5)
-SELECT max(price-min(timestamp)) FROM stocks
-
-SELECT price, timestamp FROM stocks where price -timestamp > 1 and not (price*timestamp<100)
+ "q1"
+SELECT max(price-min(timestamp)) FROM stocks
+ "q2"
+SELECT price, timestamp FROM stocks where price -timestamp > 1 and not (price*timestamp<100);
+ "q3"
SELECT max(price-mins(price))
FROM stocks
ASSUMING ASC timestamp