added filter, basic aggregations.

Fixed bugs with var length, etc.
dev
Bill Sun 3 years ago
parent 137d670da2
commit 3b2dfb295e

3
.gitignore vendored

@ -15,3 +15,6 @@ vendor/
.eggs
.vscode
out.k
k
*.so
*.pdf

@ -1,33 +1,35 @@
from typing import List
from engine.utils import base62uuid
# replace column info with this later.
class ColRef:
def __init__(self, k9name, type, cobj, cnt):
def __init__(self, k9name, type, cobj, cnt, table):
self.k9name = k9name
self.type = type
self.cobj = cobj
self.cnt = cnt
self.table = table
self.__arr__ = (k9name, type, cobj, cnt, table)
def __getitem__(self, key):
return self.__arr__[key]
def __setitem__(self, key, value):
self.__arr__[key] = value
class TableInfo:
def __init__(self, table_name, cols, cxt:'Context'):
# statics
self.table_name = table_name
self.alias = set([table_name])
self.columns_byname = dict() # column_name, type
self.columns = []
self.cxt = cxt
self.views = set()
for c in cols:
k9name = self.table_name + c['name']
if k9name in cxt.k9cols_byname: # duplicate names?
root = cxt.k9cols_byname[k9name]
k9name = k9name + root[3]
root[3] += 1
# column: (k9name, type, original col_object, dup_count)
col_object = (k9name, (list(c['type'].keys()))[0], c, 1)
cxt.k9cols_byname[k9name] = col_object
self.columns_byname[c['name']] = col_object
self.columns.append(col_object)
self.add_col(c)
# runtime
self.n_rows = 0 # number of cols
@ -35,16 +37,59 @@ class TableInfo:
cxt.tables_byname[self.table_name] = self # construct reverse map
def add_col(self, c):
if type(c) is ColRef:
c = c.cobj
k9name = 'c' + base62uuid(7)
# k9name = self.table_name + c['name']
# if k9name in self.cxt.k9cols_byname: # duplicate names?
# root = self.cxt.k9cols_byname[k9name]
# k9name = k9name + root.cnt
# root.cnt += 1
# column: (k9name, type, original col_object, dup_count)
col_object = ColRef(k9name, (list(c['type'].keys()))[0], c, 1, self)
self.cxt.k9cols_byname[k9name] = col_object
self.columns_byname[c['name']] = col_object
self.columns.append(col_object)
def construct(self):
for c in self.columns:
self.cxt.emit(f'{c.k9name}:()')
@property
def n_cols(self):
return len(self.columns)
def get_k9colname(self, col_name):
return self.columns_byname[col_name][0]
def parse_tablenames(self, str):
# TODO: deal with alias
return self.get_k9colname(str)
return self.columns_byname[col_name].k9name
def add_alias(self, alias):
# TODO: Exception when alias already defined.
# TODO: Scoping of alias should be constrainted in the query.
self.cxt.tables_byname[alias] = self
self.alias.add(alias)
def parse_tablenames(self, colExpr):
parsedColExpr = colExpr.split('.')
if len(parsedColExpr) <= 1:
return self.get_k9colname(colExpr)
else:
datasource = self.cxt.tables_byname[parsedColExpr[0]]
if datasource is None:
raise ValueError(f'Table name/alias not defined{parsedColExpr[0]}')
else:
return datasource.get_k9colname(parsedColExpr[1])
class View:
def __init__(self, context, table = None, tmp = True):
self.table: TableInfo = table
self.name = 'v'+base62uuid(7)
if type(table) is TableInfo:
table.views.add(self)
self.context = context
def construct(self):
self.context.emit(f'{self.name}:()')
class Context:
def __init__(self):
@ -53,8 +98,14 @@ class Context:
self.k9cols_byname = dict()
self.udf_map = dict()
# read header
self.k9code = ''
with open('header.k', 'r') as outfile:
self.k9code = outfile.read()
# datasource will be availible after `from' clause is parsed
# and will be deactivated when the `from' is out of scope
self.datasource = None
def add_table(self, table_name, cols):
tbl = TableInfo(table_name, cols, self)
@ -63,7 +114,7 @@ class Context:
def gen_tmptable(self):
from engine.utils import base62uuid
return f'tmp{base62uuid()}'
return f't{base62uuid(7)}'
def emit(self, codelet):
self.k9code += codelet + '\n'
@ -76,6 +127,7 @@ class ast_node:
types = dict()
def __init__(self, parent:"ast_node", node, context:Context = None):
self.context = parent.context if context is None else context
self.parent = parent
self.init(node)
self.produce(node)
self.spawn(node)

@ -8,29 +8,38 @@ class expr(ast_node):
'min': 'min',
'avg':'avg',
'sum':'sum',
'mins': 'mins',
'maxs': 'maxs'
}
binary_ops = {
'sub':'-',
'add':'+',
'mul':'*',
'div':'%',
'mod':'mod',
'and':'&',
'or':'|',
'gt':'>',
'lt':'<',
}
unary_ops = {
'neg' : '-',
'not' : '~'
}
def __init__(self, parent, node):
ast_node.__init__(self, parent, node, None)
def init(self, _):
from engine.projection import projection
parent = self.parent
self.isvector = parent.isvector if type(parent) is expr else False
if type(parent) in [projection, expr]:
self.datasource = parent.datasource
else:
self.datasource = None
self.datasource = self.context.datasource
self.udf_map = parent.context.udf_map
self.k9expr = ''
self.func_maps = {**self.udf_map, **self.builtin_func_maps}
ast_node.__init__(self, parent, node, None)
def produce(self, node):
if type(node) is dict:
@ -39,7 +48,6 @@ class expr(ast_node):
self.k9expr += f"{self.func_maps[key]}("
# if type(val) in [dict, str]:
self.k9expr += expr(self, val).k9expr
self.k9expr += ')'
elif key in self.binary_ops:
l = expr(self, val[0]).k9expr
@ -51,7 +59,13 @@ class expr(ast_node):
else:
print(f'Undefined expr: {key}{val}')
elif type(node) is str:
p = self.parent
while type(p) is expr and not p.isvector:
p.isvector = True
p = p.parent
self.k9expr = self.datasource.parse_tablenames(node)
elif type(node) is bool:
self.k9expr = '1' if node else '0'
else:
self.k9expr = f'{node}'
def __str__(self):

@ -1,6 +1,7 @@
from engine.ast import TableInfo, ast_node, Context, include
from engine.join import join
from engine.expr import expr
from engine.scan import filter
from engine.utils import base62uuid
class projection(ast_node):
@ -15,7 +16,7 @@ class projection(ast_node):
def produce(self, node):
p = node['select']
self.projections = p if type(projection) == list else [p]
self.projections = p if type(p) is list else [p]
print(node)
def spawn(self, node):
@ -47,27 +48,37 @@ class projection(ast_node):
if self.datasource is None:
raise ValueError('spawn error: from clause')
if self.datasource is not None:
self.datasource_changed = True
self.prev_datasource = self.context.datasource
self.context.datasource = self.datasource
if 'where' in node:
# apply filter
pass
self.datasource = filter(self, node['where'], True).output
self.context.datasource = self.datasource
def consume(self, node):
disp_varname = 'disptmp' + base62uuid()
def consume(self, _):
disp_varname = 'd'+base62uuid(7)
self.emit_no_ln(f'{disp_varname}:(')
for proj in self.projections:
for i, proj in enumerate(self.projections):
if type(proj) is dict:
if 'value' in proj:
e = proj['value']
if type(e) is str:
self.emit_no_ln(f"{self.datasource.parse_tablenames(proj['value'])};")
self.emit_no_ln(f"{self.datasource.parse_tablenames(proj['value'])}")
elif type(e) is dict:
self.emit_no_ln(f"{expr(self, e).k9expr};")
self.emit_no_ln(f"{expr(self, e).k9expr}")
self.emit_no_ln(';'if i < len(self.projections)-1 else '')
self.emit(')')
if self.disp:
self.emit(disp_varname)
if len(self.projections) > 1:
self.emit(f'+{disp_varname}')
else:
self.emit(f'+,(,{disp_varname})')
if self.datasource_changed:
self.context.datasource = self.prev_datasource
import sys

@ -0,0 +1,107 @@
from xmlrpc.client import Boolean
from engine.ast import ColRef, TableInfo, View, ast_node
from engine.utils import base62uuid
from engine.expr import expr
class scan(ast_node):
name = 'scan'
class filter(ast_node):
name = 'filter'
def __init__(self, parent: "ast_node", node, materialize = False, context = None):
self.materialize = materialize
super().__init__(parent, node, context)
def init(self, _):
self.datasource = self.context.datasource
self.view = View(self.context, self.datasource)
self.value = None
def spawn(self, node):
# TODO: deal with subqueries
return super().spawn(node)
def __materialize__(self):
if self.materialize:
cols = [] if self.datasource is None else self.datasource.columns
self.output = TableInfo('tn'+base62uuid(6), cols, self.context)
self.output.construct()
if type(self.value) is View: # cond filtered on tables.
self.emit(f'{self.value.name}:&{self.value.name}')
for o, c in zip(self.output.columns,self.value.table.columns):
self.emit(f'{o.k9name}:{c.k9name}[{self.value.name}]')
elif self.value is not None: # cond is scalar
tmpVar = 't'+base62uuid(7)
self.emit(f'{tmpVar}:{self.value}')
for o, c in zip(self.output.columns, self.datasource.columns):
self.emit(f'{o.k9name}:$[{tmpVar};{c.k9name};()]')
def consume(self, node):
# TODO: optimizations after converting expr to cnf
if type(node) is bool and node and self.materialize:
self.output = self.context.datasource if node else None
self.value = '1' if node else '0'
else:
if type(node) is dict:
def short_circuit(op, idx, inv = True):
v = filter(self, node[op][idx]).value
inv_filter = lambda x: not x if inv else x
if type(v) is bool and inv_filter(v):
self.value = inv_filter(v)
self.__materialize__()
return None
return v
def binary(l, r, _ty = '&'):
if type(l) is bool:
self.value = r
elif type(r) is bool:
self.value = l
elif type(l) is View:
if type(r) is View:
self.emit(f"{l.name}: {l.name} {_ty} {r.name if type(r) is View else f'({r})'}")
self.value = l
elif type(l) is str:
if type(r) is str:
self.value = f'({l}){_ty}({r})'
else:
self.emit(f'{r.name}:{r.name} {_ty} ({l})')
self.value = r
if 'and' in node:
l = short_circuit('and', 0)
if l is not None:
r = short_circuit('and', 1)
if r is not None:
binary(l, r)
elif 'or' in node:
l = short_circuit('or', 0, False)
if l is not None:
r = short_circuit('or', 1, False)
if r is not None:
binary(l, r, '|')
elif 'not' in node:
v = filter(self, node['not']).value
if type(v) is bool:
self.value = not v
self.__materialize__()
elif type(v) is View:
if len(v.table.columns) > 0:
all_rows = View(self.context, v.table)
self.emit(f'{all_rows.name}:(#{v.table.columns[0].k9name})#1')
self.emit(f'{v.name}:{all_rows.name}-{v.name}')
self.value = v
else:
self.value = '~(' + v + ')'
# TODO: arithmetic ops connecting logical ops.
else:
e = expr(self, node)
if e.isvector:
v = View(self.context, self.datasource)
v.construct()
self.emit(f'{v.name}:{e.k9expr}')
self.value = v
else:
self.value = e.k9expr
self.__materialize__()
print(node)

@ -0,0 +1,2 @@
maxs:{[L]{max(x, y)}\L}
mins:{[L]{min(x, y)}\L}

@ -1,7 +1,7 @@
from multiprocessing.sharedctypes import Value
import re
import aquery_parser as parser
import engine
import subprocess
test_parser = True
@ -27,7 +27,11 @@ while test_parser:
engine.generate(stmts_stmts, cxt)
print(cxt.k9code)
with open('out.k', 'wb') as outfile:
outfile.write(cxt.k9code.encode('utf-8'))
outfile.write((cxt.k9code+'\n\\\\').encode('utf-8'))
subprocess.call(['bash.exe', '-c',"./k out.k"])
continue
elif q == 'k':
subprocess.call(['bash.exe', '-c',"./k"])
continue
elif q == 'print':
print(stmts)

@ -19,7 +19,9 @@ INSERT INTO stocks VALUES(16,5)
SELECT max(price-min(timestamp)) FROM stocks
SELECT price, timestamp FROM stocks where price -timestamp > 1 and not (price*timestamp<100)
/*SELECT max(price-mins(price))
SELECT max(price-mins(price))
FROM stocks
ASSUMING ASC timestamp*/
ASSUMING ASC timestamp

Loading…
Cancel
Save