Bill Sun 3 years ago
parent 3b2dfb295e
commit 277dad6b3e

3
.gitignore vendored

@ -18,3 +18,6 @@ out.k
k
*.so
*.pdf
test*.c*
*.csv
*.out

@ -72,6 +72,7 @@ REFERENCES = keyword("references").suppress()
RECURSIVE = keyword("recursive").suppress()
VALUES = keyword("values").suppress()
WINDOW = keyword("window")
INTO = keyword("into").suppress()
PRIMARY_KEY = Group(PRIMARY + KEY).set_parser_name("primary_key")
FOREIGN_KEY = Group(FOREIGN + KEY).set_parser_name("foreign_key")
@ -226,6 +227,7 @@ RESERVED = MatchFirst([
WINDOW,
WITH,
WITHIN,
INTO,
])
L_INLINE = Literal("<k>").suppress()
R_INLINE = Literal("</k>").suppress()

@ -29,7 +29,7 @@ def common_parser():
ansi_ident | mysql_backtick_ident | simple_ident, separator=".", combine=True,
)).set_parser_name("identifier")
return parser(ansi_string, combined_ident)
return parser(ansi_string | mysql_doublequote_string, combined_ident)
def mysql_parser():
@ -436,6 +436,19 @@ def parser(literal_string, ident, sqlserver=False):
& Optional(assign("limit", expr))
)
outfile = Optional(
(
INTO
+ keyword("outfile").suppress()
+ literal_string ("loc")
+ Optional (
keyword("fields")
+ keyword("terminated")
+ keyword("by")
+ literal_string ("term")
)
)("outfile")
)
ordered_sql = (
(
(unordered_sql | (LB + query + RB))
@ -448,6 +461,7 @@ def parser(literal_string, ident, sqlserver=False):
)("union")
+ Optional(ORDER_BY + delimited_list(Group(sort_column))("orderby"))
+ limit
+ outfile
).set_parser_name("ordered sql") / to_union_call
with_expr = delimited_list(Group(
@ -605,9 +619,27 @@ def parser(literal_string, ident, sqlserver=False):
+ Optional(assign("where", expr))
) / to_json_call
load = (
keyword("load")("op")
+ keyword("data").suppress()
+ keyword("infile")("loc")
+ literal_string ("file")
+ INTO
+ keyword("table").suppress()
+ var_name ("table")
+ Optional(
keyword("fields").suppress()
+ keyword("terminated").suppress()
+ keyword("by").suppress()
+ literal_string ("term")
)
) ("load")
sql_stmts = delimited_list( (
query
| (insert | update | delete)
| (insert | update | delete | load)
| (create_table | create_view | create_cache | create_index)
| (drop_table | drop_view | drop_index)
)("stmts"), ";")
@ -617,6 +649,10 @@ def parser(literal_string, ident, sqlserver=False):
| udf
) ("stmts")
stmts = ZeroOrMore(sql_stmts|other_stmt)
stmts = ZeroOrMore(
sql_stmts
|other_stmt
| keyword(";").suppress() # empty stmt
)
return stmts.finalize()

@ -522,6 +522,7 @@ def to_union_call(tokens):
output["limit"] = tokens["limit"]
output["offset"] = tokens["offset"]
output["fetch"] = tokens["fetch"]
output["outfile"] = tokens["outfile"]
return output

@ -4,13 +4,15 @@ from engine.utils import base62uuid
# replace column info with this later.
class ColRef:
def __init__(self, k9name, type, cobj, cnt, table):
def __init__(self, k9name, _ty, cobj, cnt, table, name, id):
self.k9name = k9name
self.type = type
self.type = _ty
self.cobj = cobj
self.cnt = cnt
self.table = table
self.__arr__ = (k9name, type, cobj, cnt, table)
self.name = name
self.id = id
self.__arr__ = (k9name, _ty, cobj, cnt, table, name, id)
def __getitem__(self, key):
return self.__arr__[key]
@ -28,6 +30,7 @@ class TableInfo:
self.columns = []
self.cxt = cxt
self.views = set()
self.rec = None
for c in cols:
self.add_col(c)
@ -48,7 +51,7 @@ class TableInfo:
# root.cnt += 1
# column: (k9name, type, original col_object, dup_count)
col_object = ColRef(k9name, (list(c['type'].keys()))[0], c, 1, self)
col_object = ColRef(k9name, (list(c['type'].keys()))[0], c, 1, self,c['name'], len(self.columns))
self.cxt.k9cols_byname[k9name] = col_object
self.columns_byname[c['name']] = col_object
@ -62,7 +65,11 @@ class TableInfo:
return len(self.columns)
def get_k9colname(self, col_name):
return self.columns_byname[col_name].k9name
col = self.columns_byname[col_name]
if type(self.rec) is list:
self.rec.append(col)
return col.k9name
def add_alias(self, alias):
# TODO: Exception when alias already defined.
# TODO: Scoping of alias should be constrainted in the query.
@ -158,5 +165,5 @@ class ast_node:
def include(objs):
import inspect
for _, cls in inspect.getmembers(objs):
if inspect.isclass(cls) and issubclass(cls, ast_node):
if inspect.isclass(cls) and issubclass(cls, ast_node) and not cls.name.startswith('_'):
ast_node.types[cls.name] = cls

@ -1,7 +1,7 @@
# code-gen for data decl languages
from engine.ast import TableInfo, ast_node, include
from engine.ast import ColRef, TableInfo, ast_node, include
from engine.utils import base62uuid
class create_table(ast_node):
name = 'create_table'
def produce(self, node):
@ -28,5 +28,33 @@ class insert(ast_node):
# subquery, dispatch to select astnode
pass
class k9(ast_node):
name='k9'
def produce(self, node):
self.emit(node[self.name])
class load(ast_node):
name="load"
def produce(self, node):
node = node[self.name]
tablename = 'l'+base62uuid(7)
keys = 'k'+base62uuid(7)
self.emit(f"{tablename}:`csv ? 1:\"{node['file']['literal']}\"")
self.emit(f"{keys}:!{tablename}")
table:TableInfo = self.context.tables_byname[node['table']]
for i, c in enumerate(table.columns):
c:ColRef
self.emit(f'{c.k9name}:{tablename}[({keys})[{i}]]')
class outfile(ast_node):
name="_outfile"
def produce(self, node):
out_table:TableInfo = self.parent.out_table
self.emit_no_ln(f"\"{node['loc']['literal']}\"1:`csv@[[]")
for i, c in enumerate(out_table.columns):
self.emit_no_ln(f"{c.name}:{c.k9name}{';' if i < len(out_table.columns) - 1 else ''}")
self.emit(']')
import sys
include(sys.modules[__name__])

@ -6,10 +6,12 @@ class expr(ast_node):
builtin_func_maps = {
'max': 'max',
'min': 'min',
'avg':'avg',
'sum':'sum',
'mins': 'mins',
'maxs': 'maxs'
'avg': 'avg',
'sum': 'sum',
'mins': ['mins', 'minsw'],
'maxs': ['maxs', 'maxsw'],
'avgs': ['avgs', 'avgsw'],
'sums': ['sums', 'sumsw'],
}
binary_ops = {
'sub':'-',
@ -22,6 +24,10 @@ class expr(ast_node):
'gt':'>',
'lt':'<',
}
compound_ops = {
'ge' : [2, lambda x: f'~({x[0]}<{x[1]})'],
'le' : [2, lambda x: f'~({x[0]}>{x[1]})'],
}
unary_ops = {
'neg' : '-',
'not' : '~'
@ -45,19 +51,32 @@ class expr(ast_node):
if type(node) is dict:
for key, val in node.items():
if key in self.func_maps:
self.k9expr += f"{self.func_maps[key]}("
# if type(val) in [dict, str]:
if type(val) is list and len(val) > 1:
k9func = self.func_maps[key]
k9func = k9func[len(val) - 1] if type(k9func) is list else k9func
self.k9expr += f"{k9func}["
for i, p in enumerate(val):
self.k9expr += expr(self, p).k9expr + (';'if i<len(val)-1 else '')
else:
self.k9expr += f"{self.func_maps[key]}["
self.k9expr += expr(self, val).k9expr
self.k9expr += ')'
self.k9expr += ']'
elif key in self.binary_ops:
l = expr(self, val[0]).k9expr
r = expr(self, val[1]).k9expr
self.k9expr += f'({l}{self.binary_ops[key]}{r})'
elif key in self.compound_ops:
x = []
if type(val) is list:
for v in val:
x.append(expr(self, v).k9expr)
self.k9expr = self.compound_ops[key][1](x)
elif key in self.unary_ops:
self.k9expr += f'({expr(self, val).k9expr}{self.unary_ops[key]})'
else:
print(f'Undefined expr: {key}{val}')
elif type(node) is str:
p = self.parent
while type(p) is expr and not p.isvector:

@ -0,0 +1,37 @@
from engine.ast import ast_node
from engine.utils import base62uuid
from engine.expr import expr
class groupby(ast_node):
name = '_groupby'
def init(self, _):
self.group = 'g' + base62uuid(7)
self.datasource = self.parent.datasource
self.datasource.rec = []
def produce(self, node):
if type(node) is not list:
node = [node]
g_contents = '('
for i, g in enumerate(node):
v = g['value']
e = expr(self, v).k9expr
# if v is compound expr, create tmp cols
if type(v) is not str:
tmpcol = 't' + base62uuid(7)
self.emit(f'{tmpcol}:{e}')
e = tmpcol
g_contents += e + (';'if i < len(node)-1 else '')
self.emit(f'{self.group}:'+g_contents+')')
if len(node) <= 1:
self.emit(f'{self.group}:={self.group}')
else:
self.emit(f'{self.group}:groupby[{self.group}[0];+{self.group}]')
def consume(self, _):
self.referenced = self.datasource.rec
self.datasource.rec = None
return super().consume(_)

@ -1,9 +1,10 @@
from engine.ast import TableInfo, ast_node, Context, include
from engine.ast import ColRef, TableInfo, ast_node, Context, include
from engine.groupby import groupby
from engine.join import join
from engine.expr import expr
from engine.scan import filter
from engine.utils import base62uuid
from engine.utils import base62uuid, enlist, base62alp
from engine.ddl import outfile
class projection(ast_node):
name='select'
def __init__(self, parent:ast_node, node, context:Context = None, outname = None, disp = True):
@ -35,11 +36,13 @@ class projection(ast_node):
projection(self, from_clause, disp = False)
else:
# TODO: from func over table
print(f"from func over table{node}")
print(f'from func over table{node}')
elif type(value) is str:
self.datasource = self.context.tables_byname[value]
if 'assumptions' in from_clause:
ord = from_clause['assumptions']['ord'] == 'asc'
for assumption in enlist(from_clause['assumptions']):
ord = assumption['ord'] == 'asc'
attrib = assumption['attrib']
ord = '^' if ord else '|^'
# TODO: generate view of table by order
@ -57,26 +60,70 @@ class projection(ast_node):
self.datasource = filter(self, node['where'], True).output
self.context.datasource = self.datasource
if 'groupby' in node:
self.group_node = groupby(self, node['groupby'])
else:
self.group_node = None
def consume(self, _):
def consume(self, node):
disp_varname = 'd'+base62uuid(7)
pcolrefs = []
if type(self.group_node) is groupby:
grp_table = self.group_node.group
grp_refs = self.group_node.referenced
for i, proj in enumerate(self.projections):
self.datasource.rec = []
cname = ''
if type(proj) is dict:
if 'value' in proj:
e = proj['value']
if type(e) is str:
cname = self.datasource.parse_tablenames(proj['value'])
elif type(e) is dict:
cname = expr(self, e).k9expr
cname = ''.join([a if a in base62alp else '' for a in cname])
pcolrefs.append(self.datasource.rec)
self.datasource.rec = None
keys = 'k'+base62uuid(7)
self.emit(f'{keys}:!{grp_table}')
fn = 'fn' + base62uuid(6)
# self.emit
self.emit_no_ln(f'{disp_varname}:(')
flatten = False
cols = []
self.out_table = TableInfo('out_'+base62uuid(4), [], self.context)
if 'outfile' in node:
flatten = True
for i, proj in enumerate(self.projections):
cname = ''
if type(proj) is dict:
if 'value' in proj:
e = proj['value']
if type(e) is str:
self.emit_no_ln(f"{self.datasource.parse_tablenames(proj['value'])}")
cname = self.datasource.parse_tablenames(proj['value'])
self.emit_no_ln(f"{cname}")
elif type(e) is dict:
self.emit_no_ln(f"{expr(self, e).k9expr}")
cname = expr(self, e).k9expr
self.emit_no_ln(f"{cname}")
cname = ''.join([a if a in base62alp else '' for a in cname])
self.emit_no_ln(';'if i < len(self.projections)-1 else '')
cols.append(ColRef(f'(+{disp_varname})[{i}]', 'generic', self.out_table, 0, None, cname, i))
self.emit(')')
if self.disp:
if flatten:
self.emit_no_ln(f'{disp_varname}:' if flatten else '')
if flatten or self.disp:
if len(self.projections) > 1:
self.emit(f'+{disp_varname}')
self.emit(f"+{disp_varname}")
else:
self.emit(f'+,(,{disp_varname})')
if flatten:
self.emit(f'{disp_varname}')
if flatten:
self.out_table.columns = cols
outfile(self, node['outfile'])
if self.datasource_changed:
self.context.datasource = self.prev_datasource

@ -1,12 +1,19 @@
import uuid
base62alp = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
def base62uuid(crop=8):
alp = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
id = uuid.uuid4().int
ret = ''
while id:
ret = alp[id % 62] + ret
ret = base62alp[id % 62] + ret
id //= 62
return ret[:crop] if len(ret) else '0'
def enlist(l):
return l if type(l) is list else [l]
def seps(s, i, l):
return s if i < len(l) - 1 else ''

@ -1,2 +1,28 @@
import`csv
maxs:{[L]{max(x, y)}\L}
mins:{[L]{min(x, y)}\L}
sums:{[L]{(x + y)}\L}
avgsimpl:{[L;i] curr:L[i]%(i+1); $[i<(#L)-1;curr, avgsimpl[L;i+1];curr]}
avgs:{[L] avgsimpl[sums[L];0]}
maxswimp:{[L;w;i] curr:max(L@(((i-w)+!w)|0)); $[i<#L;curr, maxswimp[L; w; i + 1];curr]}
maxsw:{[w;L]maxswimp[L; w; 1]}
minswimp:{[L;w;i] curr:min(L@(((i-w)+!w)|0)); $[i<#L;curr, maxswimp[L; w; i + 1];curr]}
minsw:{[w;L]minswimp[L;w;1]}
avgswimp:{[L;w;s;i] s:(s+L[i])-L[i-w];curr:s%((i+1)&w);$[i<(#L)-1; curr, avgswimp[L; w; s; i+1]; curr]}
avgsw:{[w;L] avgswimp[L;w;0;0]}
sumswimp:{[L;w;s;i] s:(s+L[i])-L[i-w];$[i<(#L)-1; s, sumswimp[L; w; s; i+1]; s]}
sumsw:{[w;L] sumswimp[L;w;0;0]}
groupbyi:{[L;GV;i]
k:(,(L[i]));gvk:GV[k][0];
found:$[(gvk[0]+gvk[1])>0;1;L[i] in !GV];
cg:(,L[i])!$[found;,gvk[0],i;,(,i)];
$[i<(#L)-1; groupbyi[L;(GV,cg);i+1]; (GV,cg)]}
groupbys:{[L;ll] GV1:(,(L[0]))!,(,0);$[ll>1;groupbyi[L;GV1;1];GV1]}
groupby:{[l;L] $[(#l)=0;,();groupbys[L;#l]]}

@ -1,6 +1,6 @@
Month,sales
1,100
2,120
4,140
3,140
4,140
5,130

1 Month sales
2 1 100
3 2 120
4 140
4 3 140
5 4 140
6 5 130

@ -3,6 +3,13 @@ import aquery_parser as parser
import engine
import subprocess
import sys
if sys.platform != 'win32':
import readline
# else:
# import pyreadline3
test_parser = True
# code to test parser
@ -37,8 +44,8 @@ while test_parser:
print(stmts)
continue
trimed = ws.sub(' ', q.lower()).split(' ')
if trimed[0] == 'file':
fn = 'q.sql' if len(trimed) <= 1 or len(trimed[1]) == 0 \
if trimed[0].startswith('f'):
fn = 'stock.a' if len(trimed) <= 1 or len(trimed[1]) == 0 \
else trimed[1]
with open(fn, 'r') as file:
@ -47,6 +54,6 @@ while test_parser:
continue
stmts = parser.parse(q)
print(stmts)
except ValueError as e:
except (ValueError) as e:
print(type(e), e)

@ -0,0 +1,9 @@
CREATE TABLE test(a INT, b INT, c INT, d INT)
LOAD DATA INFILE "test.csv"
INTO TABLE test
FIELDS TERMINATED BY ","
SELECT sum(c), b, d
FROM test
group by a,b,d

@ -2,3 +2,4 @@ mo-future
mo-dots==8.20.21357
mo-parsing
mo-imports
readline; sys_platform != 'win32'

@ -17,11 +17,13 @@ INSERT INTO stocks VALUES(14,5)
INSERT INTO stocks VALUES(15,2)
INSERT INTO stocks VALUES(16,5)
SELECT max(price-min(timestamp)) FROM stocks
SELECT price, timestamp FROM stocks where price -timestamp > 1 and not (price*timestamp<100)
<k> "q1" </k>
SELECT max(price-min(timestamp)) FROM stocks
<k> "q2" </k>
SELECT price, timestamp FROM stocks where price -timestamp > 1 and not (price*timestamp<100);
<k> "q3"</k>
SELECT max(price-mins(price))
FROM stocks
ASSUMING ASC timestamp

Loading…
Cancel
Save