added filter, basic aggregations.

Fixed bugs with var length, etc.
dev
Bill Sun 3 years ago
parent 137d670da2
commit 3b2dfb295e

5
.gitignore vendored

@ -14,4 +14,7 @@ vendor/
.DS_Store .DS_Store
.eggs .eggs
.vscode .vscode
out.k out.k
k
*.so
*.pdf

@ -1,60 +1,111 @@
from typing import List from typing import List
from engine.utils import base62uuid
# replace column info with this later. # replace column info with this later.
class ColRef: class ColRef:
def __init__(self, k9name, type, cobj, cnt): def __init__(self, k9name, type, cobj, cnt, table):
self.k9name = k9name self.k9name = k9name
self.type = type self.type = type
self.cobj = cobj self.cobj = cobj
self.cnt = cnt self.cnt = cnt
self.table = table
self.__arr__ = (k9name, type, cobj, cnt, table)
def __getitem__(self, key):
return self.__arr__[key]
def __setitem__(self, key, value):
self.__arr__[key] = value
class TableInfo: class TableInfo:
def __init__(self, table_name, cols, cxt:'Context'): def __init__(self, table_name, cols, cxt:'Context'):
# statics # statics
self.table_name = table_name self.table_name = table_name
self.alias = set([table_name])
self.columns_byname = dict() # column_name, type self.columns_byname = dict() # column_name, type
self.columns = [] self.columns = []
self.cxt = cxt
self.views = set()
for c in cols: for c in cols:
k9name = self.table_name + c['name'] self.add_col(c)
if k9name in cxt.k9cols_byname: # duplicate names?
root = cxt.k9cols_byname[k9name]
k9name = k9name + root[3]
root[3] += 1
# column: (k9name, type, original col_object, dup_count)
col_object = (k9name, (list(c['type'].keys()))[0], c, 1)
cxt.k9cols_byname[k9name] = col_object
self.columns_byname[c['name']] = col_object
self.columns.append(col_object)
# runtime # runtime
self.n_rows = 0 # number of cols self.n_rows = 0 # number of cols
self.order = [] # assumptions self.order = [] # assumptions
cxt.tables_byname[self.table_name] = self # construct reverse map cxt.tables_byname[self.table_name] = self # construct reverse map
def add_col(self, c):
if type(c) is ColRef:
c = c.cobj
k9name = 'c' + base62uuid(7)
# k9name = self.table_name + c['name']
# if k9name in self.cxt.k9cols_byname: # duplicate names?
# root = self.cxt.k9cols_byname[k9name]
# k9name = k9name + root.cnt
# root.cnt += 1
# column: (k9name, type, original col_object, dup_count)
col_object = ColRef(k9name, (list(c['type'].keys()))[0], c, 1, self)
self.cxt.k9cols_byname[k9name] = col_object
self.columns_byname[c['name']] = col_object
self.columns.append(col_object)
def construct(self):
for c in self.columns:
self.cxt.emit(f'{c.k9name}:()')
@property @property
def n_cols(self): def n_cols(self):
return len(self.columns) return len(self.columns)
def get_k9colname(self, col_name): def get_k9colname(self, col_name):
return self.columns_byname[col_name][0] return self.columns_byname[col_name].k9name
def add_alias(self, alias):
def parse_tablenames(self, str): # TODO: Exception when alias already defined.
# TODO: deal with alias # TODO: Scoping of alias should be constrainted in the query.
return self.get_k9colname(str) self.cxt.tables_byname[alias] = self
self.alias.add(alias)
def parse_tablenames(self, colExpr):
parsedColExpr = colExpr.split('.')
if len(parsedColExpr) <= 1:
return self.get_k9colname(colExpr)
else:
datasource = self.cxt.tables_byname[parsedColExpr[0]]
if datasource is None:
raise ValueError(f'Table name/alias not defined{parsedColExpr[0]}')
else:
return datasource.get_k9colname(parsedColExpr[1])
class View:
def __init__(self, context, table = None, tmp = True):
self.table: TableInfo = table
self.name = 'v'+base62uuid(7)
if type(table) is TableInfo:
table.views.add(self)
self.context = context
def construct(self):
self.context.emit(f'{self.name}:()')
class Context: class Context:
def __init__(self): def __init__(self):
self.tables:List[TableInfo] = [] self.tables:List[TableInfo] = []
self.tables_byname = dict() self.tables_byname = dict()
self.k9cols_byname = dict() self.k9cols_byname = dict()
self.udf_map = dict() self.udf_map = dict()
# read header
self.k9code = '' self.k9code = ''
with open('header.k', 'r') as outfile:
self.k9code = outfile.read()
# datasource will be availible after `from' clause is parsed
# and will be deactivated when the `from' is out of scope
self.datasource = None
def add_table(self, table_name, cols): def add_table(self, table_name, cols):
tbl = TableInfo(table_name, cols, self) tbl = TableInfo(table_name, cols, self)
@ -63,7 +114,7 @@ class Context:
def gen_tmptable(self): def gen_tmptable(self):
from engine.utils import base62uuid from engine.utils import base62uuid
return f'tmp{base62uuid()}' return f't{base62uuid(7)}'
def emit(self, codelet): def emit(self, codelet):
self.k9code += codelet + '\n' self.k9code += codelet + '\n'
@ -76,6 +127,7 @@ class ast_node:
types = dict() types = dict()
def __init__(self, parent:"ast_node", node, context:Context = None): def __init__(self, parent:"ast_node", node, context:Context = None):
self.context = parent.context if context is None else context self.context = parent.context if context is None else context
self.parent = parent
self.init(node) self.init(node)
self.produce(node) self.produce(node)
self.spawn(node) self.spawn(node)

@ -8,29 +8,38 @@ class expr(ast_node):
'min': 'min', 'min': 'min',
'avg':'avg', 'avg':'avg',
'sum':'sum', 'sum':'sum',
'mins': 'mins',
'maxs': 'maxs'
} }
binary_ops = { binary_ops = {
'sub':'-', 'sub':'-',
'add':'+', 'add':'+',
'mul':'*', 'mul':'*',
'div':'%', 'div':'%',
'mod':'mod',
'and':'&',
'or':'|',
'gt':'>',
'lt':'<',
} }
unary_ops = { unary_ops = {
'neg' : '-', 'neg' : '-',
'not' : '~'
} }
def __init__(self, parent, node): def __init__(self, parent, node):
ast_node.__init__(self, parent, node, None)
def init(self, _):
from engine.projection import projection from engine.projection import projection
parent = self.parent
self.isvector = parent.isvector if type(parent) is expr else False
if type(parent) in [projection, expr]: if type(parent) in [projection, expr]:
self.datasource = parent.datasource self.datasource = parent.datasource
else: else:
self.datasource = None self.datasource = self.context.datasource
self.udf_map = parent.context.udf_map self.udf_map = parent.context.udf_map
self.k9expr = '' self.k9expr = ''
self.func_maps = {**self.udf_map, **self.builtin_func_maps} self.func_maps = {**self.udf_map, **self.builtin_func_maps}
ast_node.__init__(self, parent, node, None)
def produce(self, node): def produce(self, node):
if type(node) is dict: if type(node) is dict:
@ -39,7 +48,6 @@ class expr(ast_node):
self.k9expr += f"{self.func_maps[key]}(" self.k9expr += f"{self.func_maps[key]}("
# if type(val) in [dict, str]: # if type(val) in [dict, str]:
self.k9expr += expr(self, val).k9expr self.k9expr += expr(self, val).k9expr
self.k9expr += ')' self.k9expr += ')'
elif key in self.binary_ops: elif key in self.binary_ops:
l = expr(self, val[0]).k9expr l = expr(self, val[0]).k9expr
@ -51,7 +59,13 @@ class expr(ast_node):
else: else:
print(f'Undefined expr: {key}{val}') print(f'Undefined expr: {key}{val}')
elif type(node) is str: elif type(node) is str:
p = self.parent
while type(p) is expr and not p.isvector:
p.isvector = True
p = p.parent
self.k9expr = self.datasource.parse_tablenames(node) self.k9expr = self.datasource.parse_tablenames(node)
elif type(node) is bool:
self.k9expr = '1' if node else '0'
else: else:
self.k9expr = f'{node}' self.k9expr = f'{node}'
def __str__(self): def __str__(self):

@ -1,6 +1,7 @@
from engine.ast import TableInfo, ast_node, Context, include from engine.ast import TableInfo, ast_node, Context, include
from engine.join import join from engine.join import join
from engine.expr import expr from engine.expr import expr
from engine.scan import filter
from engine.utils import base62uuid from engine.utils import base62uuid
class projection(ast_node): class projection(ast_node):
@ -15,7 +16,7 @@ class projection(ast_node):
def produce(self, node): def produce(self, node):
p = node['select'] p = node['select']
self.projections = p if type(projection) == list else [p] self.projections = p if type(p) is list else [p]
print(node) print(node)
def spawn(self, node): def spawn(self, node):
@ -47,27 +48,37 @@ class projection(ast_node):
if self.datasource is None: if self.datasource is None:
raise ValueError('spawn error: from clause') raise ValueError('spawn error: from clause')
if self.datasource is not None:
self.datasource_changed = True
self.prev_datasource = self.context.datasource
self.context.datasource = self.datasource
if 'where' in node: if 'where' in node:
# apply filter self.datasource = filter(self, node['where'], True).output
pass self.context.datasource = self.datasource
def consume(self, node): def consume(self, _):
disp_varname = 'disptmp' + base62uuid() disp_varname = 'd'+base62uuid(7)
self.emit_no_ln(f'{disp_varname}:(') self.emit_no_ln(f'{disp_varname}:(')
for proj in self.projections: for i, proj in enumerate(self.projections):
if type(proj) is dict: if type(proj) is dict:
if 'value' in proj: if 'value' in proj:
e = proj['value'] e = proj['value']
if type(e) is str: if type(e) is str:
self.emit_no_ln(f"{self.datasource.parse_tablenames(proj['value'])};") self.emit_no_ln(f"{self.datasource.parse_tablenames(proj['value'])}")
elif type(e) is dict: elif type(e) is dict:
self.emit_no_ln(f"{expr(self, e).k9expr};") self.emit_no_ln(f"{expr(self, e).k9expr}")
self.emit_no_ln(';'if i < len(self.projections)-1 else '')
self.emit(')') self.emit(')')
if self.disp: if self.disp:
self.emit(disp_varname) if len(self.projections) > 1:
self.emit(f'+{disp_varname}')
else:
self.emit(f'+,(,{disp_varname})')
if self.datasource_changed:
self.context.datasource = self.prev_datasource
import sys import sys

@ -0,0 +1,107 @@
from xmlrpc.client import Boolean
from engine.ast import ColRef, TableInfo, View, ast_node
from engine.utils import base62uuid
from engine.expr import expr
class scan(ast_node):
name = 'scan'
class filter(ast_node):
name = 'filter'
def __init__(self, parent: "ast_node", node, materialize = False, context = None):
self.materialize = materialize
super().__init__(parent, node, context)
def init(self, _):
self.datasource = self.context.datasource
self.view = View(self.context, self.datasource)
self.value = None
def spawn(self, node):
# TODO: deal with subqueries
return super().spawn(node)
def __materialize__(self):
if self.materialize:
cols = [] if self.datasource is None else self.datasource.columns
self.output = TableInfo('tn'+base62uuid(6), cols, self.context)
self.output.construct()
if type(self.value) is View: # cond filtered on tables.
self.emit(f'{self.value.name}:&{self.value.name}')
for o, c in zip(self.output.columns,self.value.table.columns):
self.emit(f'{o.k9name}:{c.k9name}[{self.value.name}]')
elif self.value is not None: # cond is scalar
tmpVar = 't'+base62uuid(7)
self.emit(f'{tmpVar}:{self.value}')
for o, c in zip(self.output.columns, self.datasource.columns):
self.emit(f'{o.k9name}:$[{tmpVar};{c.k9name};()]')
def consume(self, node):
# TODO: optimizations after converting expr to cnf
if type(node) is bool and node and self.materialize:
self.output = self.context.datasource if node else None
self.value = '1' if node else '0'
else:
if type(node) is dict:
def short_circuit(op, idx, inv = True):
v = filter(self, node[op][idx]).value
inv_filter = lambda x: not x if inv else x
if type(v) is bool and inv_filter(v):
self.value = inv_filter(v)
self.__materialize__()
return None
return v
def binary(l, r, _ty = '&'):
if type(l) is bool:
self.value = r
elif type(r) is bool:
self.value = l
elif type(l) is View:
if type(r) is View:
self.emit(f"{l.name}: {l.name} {_ty} {r.name if type(r) is View else f'({r})'}")
self.value = l
elif type(l) is str:
if type(r) is str:
self.value = f'({l}){_ty}({r})'
else:
self.emit(f'{r.name}:{r.name} {_ty} ({l})')
self.value = r
if 'and' in node:
l = short_circuit('and', 0)
if l is not None:
r = short_circuit('and', 1)
if r is not None:
binary(l, r)
elif 'or' in node:
l = short_circuit('or', 0, False)
if l is not None:
r = short_circuit('or', 1, False)
if r is not None:
binary(l, r, '|')
elif 'not' in node:
v = filter(self, node['not']).value
if type(v) is bool:
self.value = not v
self.__materialize__()
elif type(v) is View:
if len(v.table.columns) > 0:
all_rows = View(self.context, v.table)
self.emit(f'{all_rows.name}:(#{v.table.columns[0].k9name})#1')
self.emit(f'{v.name}:{all_rows.name}-{v.name}')
self.value = v
else:
self.value = '~(' + v + ')'
# TODO: arithmetic ops connecting logical ops.
else:
e = expr(self, node)
if e.isvector:
v = View(self.context, self.datasource)
v.construct()
self.emit(f'{v.name}:{e.k9expr}')
self.value = v
else:
self.value = e.k9expr
self.__materialize__()
print(node)

@ -4,7 +4,7 @@ def base62uuid(crop=8):
alp = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' alp = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
id = uuid.uuid4().int id = uuid.uuid4().int
ret = '' ret = ''
while id: while id:
ret = alp[id % 62] + ret ret = alp[id % 62] + ret
id //= 62 id //= 62

@ -0,0 +1,2 @@
maxs:{[L]{max(x, y)}\L}
mins:{[L]{min(x, y)}\L}

@ -1,7 +1,7 @@
from multiprocessing.sharedctypes import Value
import re import re
import aquery_parser as parser import aquery_parser as parser
import engine import engine
import subprocess
test_parser = True test_parser = True
@ -27,7 +27,11 @@ while test_parser:
engine.generate(stmts_stmts, cxt) engine.generate(stmts_stmts, cxt)
print(cxt.k9code) print(cxt.k9code)
with open('out.k', 'wb') as outfile: with open('out.k', 'wb') as outfile:
outfile.write(cxt.k9code.encode('utf-8')) outfile.write((cxt.k9code+'\n\\\\').encode('utf-8'))
subprocess.call(['bash.exe', '-c',"./k out.k"])
continue
elif q == 'k':
subprocess.call(['bash.exe', '-c',"./k"])
continue continue
elif q == 'print': elif q == 'print':
print(stmts) print(stmts)

@ -18,8 +18,10 @@ INSERT INTO stocks VALUES(15,2)
INSERT INTO stocks VALUES(16,5) INSERT INTO stocks VALUES(16,5)
SELECT max(price-min(timestamp)) FROM stocks SELECT max(price-min(timestamp)) FROM stocks
SELECT price, timestamp FROM stocks where price -timestamp > 1 and not (price*timestamp<100)
/*SELECT max(price-mins(price)) SELECT max(price-mins(price))
FROM stocks FROM stocks
ASSUMING ASC timestamp*/ ASSUMING ASC timestamp

Loading…
Cancel
Save