Order by, on-demand assumptions, bugfixes

Nested-loop join (concept)
dev
BillSun 3 years ago
parent b9a8ad3ac7
commit 8b182cf0f2

@ -412,7 +412,7 @@ def parser(literal_string, ident, sqlserver=False):
+ RB, + RB,
) )
assumption = Group((ASC|DESC) ("ord") + var_name("attrib")) assumption = Group((ASC|DESC) ("sort") + var_name("value"))
assumptions = (ASSUMING + Group(delimited_list(assumption))("assumptions")) assumptions = (ASSUMING + Group(delimited_list(assumption))("assumptions"))
table_source << Group( table_source << Group(

@ -6,7 +6,7 @@ from engine.utils import base62uuid
# replace column info with this later. # replace column info with this later.
class ColRef: class ColRef:
def __init__(self, k9name, _ty, cobj, cnt, table, name, id, order = None, compound = False): def __init__(self, k9name, _ty, cobj, cnt, table, name, id, compound = False):
self.k9name = k9name self.k9name = k9name
self.type = _ty self.type = _ty
self.cobj = cobj self.cobj = cobj
@ -14,12 +14,15 @@ class ColRef:
self.table = table self.table = table
self.name = name self.name = name
self.id = id self.id = id
self.order = order # True -> asc, False -> dsc; None -> unordered self.order_pending = None # order_pending
self.compound = compound # compound field (list as a field) self.compound = compound # compound field (list as a field)
self.views = [] self.views = []
self.__arr__ = (k9name, _ty, cobj, cnt, table, name, id) self.__arr__ = (k9name, _ty, cobj, cnt, table, name, id)
def __getitem__(self, key): def __getitem__(self, key):
if type(key) is str:
return getattr(self, key)
else:
return self.__arr__[key] return self.__arr__[key]
def __setitem__(self, key, value): def __setitem__(self, key, value):
@ -40,21 +43,24 @@ class TableInfo:
self.views = set() self.views = set()
self.rec = None self.rec = None
self.groupinfo = None self.groupinfo = None
for c in cols: self.add_cols(cols)
self.add_col(c)
# runtime # runtime
self.n_rows = 0 # number of cols self.n_rows = 0 # number of cols
self.order = [] # assumptions self.order = [] # assumptions
cxt.tables_byname[self.table_name] = self # construct reverse map cxt.tables_byname[self.table_name] = self # construct reverse map
def add_cols(self, cols, new = True):
def add_col(self, c): for c in cols:
if type(c) is ColRef: self.add_col(c, new)
c = c.cobj def add_col(self, c, new = True):
_ty = c['type']
if new:
k9name = 'c' + base62uuid(7) k9name = 'c' + base62uuid(7)
col_object = ColRef(k9name, (list(c['type'].keys()))[0], c, 1, self,c['name'], len(self.columns)) _ty = _ty if type(c) is ColRef else list(_ty.keys())[0]
col_object = ColRef(k9name, _ty, c, 1, self,c['name'], len(self.columns))
else:
col_object = c
k9name = c.k9name
self.cxt.k9cols_byname[k9name] = col_object self.cxt.k9cols_byname[k9name] = col_object
self.columns_byname[c['name']] = col_object self.columns_byname[c['name']] = col_object
self.columns.append(col_object) self.columns.append(col_object)
@ -66,11 +72,41 @@ class TableInfo:
def n_cols(self): def n_cols(self):
return len(self.columns) return len(self.columns)
def get_col(self, col_name): def materialize_orderbys(self):
view_stack = ''
stack_name = ''
for o in self.order:
o.materialize()
if len(view_stack) == 0:
view_stack = o.view.name
stack_name = view_stack
else:
view_stack = view_stack+'['+ o.view.name +']'
# TODO: Optimize by doing everything in a stmt
if len(view_stack) > 0:
if len(self.order) > 1:
self.cxt.emit(f'{stack_name}:{view_stack}')
for c in self.columns:
c.order_pending = stack_name
self.order[0].node.view = stack_name
self.order.clear()
def get_col_d(self, col_name):
col = self.columns_byname[col_name] col = self.columns_byname[col_name]
if type(self.rec) is list: if type(self.rec) is list:
self.rec.append(col) self.rec.append(col)
return col return col
def get_k9colname_d(self, col_name):
return self.get_col_d(col_name).k9name
def get_col(self, col_name):
self.materialize_orderbys()
col = self.get_col_d(col_name)
if type(col.order_pending) is str:
self.cxt.emit_no_flush(f'{col.k9name}:{col.k9name}[{col.order_pending}]')
col.order_pending = None
return col
def get_k9colname(self, col_name): def get_k9colname(self, col_name):
return self.get_col(col_name).k9name return self.get_col(col_name).k9name
@ -80,7 +116,9 @@ class TableInfo:
self.cxt.tables_byname[alias] = self self.cxt.tables_byname[alias] = self
self.alias.add(alias) self.alias.add(alias)
def parse_tablenames(self, colExpr): def parse_tablenames(self, colExpr, materialize = True):
self.get_col = self.get_col if materialize else self.get_col_d
parsedColExpr = colExpr.split('.') parsedColExpr = colExpr.split('.')
ret = None ret = None
if len(parsedColExpr) <= 1: if len(parsedColExpr) <= 1:
@ -117,12 +155,13 @@ class Context:
self.udf_map = dict() self.udf_map = dict()
# read header # read header
self.k9code = '' self.k9code = ''
self.k9codelet = ''
with open('header.k', 'r') as outfile: with open('header.k', 'r') as outfile:
self.k9code = outfile.read() self.k9code = outfile.read()
# datasource will be availible after `from' clause is parsed # datasource will be availible after `from' clause is parsed
# and will be deactivated when the `from' is out of scope # and will be deactivated when the `from' is out of scope
self.datasource = None self.datasource = None
self.ds_stack = []
def add_table(self, table_name, cols): def add_table(self, table_name, cols):
tbl = TableInfo(table_name, cols, self) tbl = TableInfo(table_name, cols, self)
@ -134,17 +173,46 @@ class Context:
return f't{base62uuid(7)}' return f't{base62uuid(7)}'
def emit(self, codelet): def emit(self, codelet):
self.k9code += self.k9codelet + codelet + '\n'
self.k9codelet = ''
def emit_no_flush(self, codelet):
self.k9code += codelet + '\n' self.k9code += codelet + '\n'
def emit_flush(self):
self.k9code += self.k9codelet + '\n'
self.k9codelet = ''
def emit_nonewline(self, codelet): def emit_nonewline(self, codelet):
self.k9code += codelet self.k9codelet += codelet
def datsource_top(self):
if len(self.ds_stack) > 0:
return self.ds_stack[-1]
else:
return None
def datasource_pop(self):
if len(self.ds_stack) > 0:
self.ds_stack.pop()
return self.ds_stack[-1]
else:
return None
def datasource_push(self, ds):
if type(ds) is TableInfo:
self.ds_stack.append(ds)
return ds
else:
return None
def __str__(self): def __str__(self):
return self.k9code return self.k9code
def __repr__(self) -> str:
return self.__str__()
class ast_node: class ast_node:
types = dict() types = dict()
def __init__(self, parent:"ast_node", node, context:Context = None): def __init__(self, parent:"ast_node", node, context:Context = None):
self.context = parent.context if context is None else context self.context = parent.context if context is None else context
self.parent = parent self.parent = parent
self.datasource = None
self.init(node) self.init(node)
self.produce(node) self.produce(node)
self.spawn(node) self.spawn(node)

@ -14,6 +14,7 @@ class expr(ast_node):
'avgs': ['avgs', 'avgsw'], 'avgs': ['avgs', 'avgsw'],
'sums': ['sums', 'sumsw'], 'sums': ['sums', 'sumsw'],
} }
binary_ops = { binary_ops = {
'sub':'-', 'sub':'-',
'add':'+', 'add':'+',
@ -24,18 +25,23 @@ class expr(ast_node):
'gt':'>', 'gt':'>',
'lt':'<', 'lt':'<',
} }
compound_ops = { compound_ops = {
'ge' : [2, lambda x: f'~({x[0]}<{x[1]})'], 'ge' : [2, lambda x: f'~({x[0]}<{x[1]})'],
'le' : [2, lambda x: f'~({x[0]}>{x[1]})'], 'le' : [2, lambda x: f'~({x[0]}>{x[1]})'],
'count' : [1, lambda x: f'#({x[0]})']
} }
unary_ops = { unary_ops = {
'neg' : '-', 'neg' : '-',
'not' : '~' 'not' : '~'
} }
coumpound_generating_ops = ['mod', 'mins', 'maxs', 'sums'] + \ coumpound_generating_ops = ['mod', 'mins', 'maxs', 'sums'] + \
list( binary_ops.keys()) + list(compound_ops.keys()) + list(unary_ops.keys() ) list( binary_ops.keys()) + list(compound_ops.keys()) + list(unary_ops.keys() )
def __init__(self, parent, node): def __init__(self, parent, node, materialize_cols = True):
self.materialize_cols = materialize_cols
ast_node.__init__(self, parent, node, None) ast_node.__init__(self, parent, node, None)
def init(self, _): def init(self, _):
@ -95,7 +101,7 @@ class expr(ast_node):
while type(p) is expr and not p.isvector: while type(p) is expr and not p.isvector:
p.isvector = True p.isvector = True
p = p.parent p = p.parent
self.k9expr = self.datasource.parse_tablenames(node) self.k9expr = self.datasource.parse_tablenames(node, self.materialize_cols)
elif type(node) is bool: elif type(node) is bool:
self.k9expr = '1' if node else '0' self.k9expr = '1' if node else '0'
else: else:

@ -46,10 +46,11 @@ class groupby(ast_node):
self.parent.inv = False self.parent.inv = False
else: else:
k9fn = "{[ids;grps;ll;dim;x] " + \ k9fn = "{[ids;grps;ll;dim;x] " + \
"start:$[x=ll;ll;grps[x+1][dim-1]];" + \ "start:grps[x][dim];" + \
"end: grps[x][dim-1];" + \ "end:$[x=0;ll;grps[x-1][dim]];" + \
"range:(end-start)#(((start-ll))#ids);" + \ "range:(end-start)#((start-ll)#ids);" + \
"start:ids[start];" + \
ret + '}' ret + '}'
self.emit(f'{self.groupby_function}:{k9fn}') self.emit(f'{self.groupby_function}:{k9fn}')
self.emit(f'{out}:+({self.groupby_function}' + \ self.emit(f'{out}:+({self.groupby_function}' + \
f'[{grp}[1];{grp}[0];(#{grp}[0])-1;#({grp}[0][0])]\'!((#({grp}[0]))-1))') f'[{grp}[1];{grp}[0];(#{grp}[0])+1;(#({grp}[0][0]))-1]\'!(#({grp}[0])))')

@ -1,38 +1,59 @@
from engine.ast import ColRef, TableInfo, ast_node from engine.ast import ColRef, TableInfo, View, ast_node, Context
from engine.utils import base62uuid from engine.utils import base62uuid, seps
from engine.expr import expr from engine.expr import expr
import k
class order_item:
def __init__(self, name, node, order = True):
self.name = name
self.order = order
self.node = node
self.materialized = False
def materialize(self):
if not self.materialized:
self.name = expr(self.node, self.name, False).k9expr
self.materialized = True
return ('' if self.order else '-') + f'({self.name})'
def __str__(self):
return self.materialize()
def __repr__(self):
return self.__str__()
class orders:
def __init__(self, node, datasource):
self.order_items = []
self.materialized = False
self.view = None
self.node = node
self.datasource = datasource
self.n_attrs = -1
def materialize(self):
if not self.materialized:
self.view = View(self.node.context, self.datasource, False)
keys = ';'.join([f'{o}' for o in self.order_items])
self.n_attrs = len(self.order_items)
self.node.emit(f"{self.view.name}: > +`j (({',' if self.n_attrs == 1 else ''}{keys}))")
self.materialized = True
def append(self, o):
self.order_items.append(o)
class orderby(ast_node): class orderby(ast_node):
name = '_orderby' name = '_orderby'
def init(self, _): def init(self, _):
self.group = 'g' + base62uuid(7)
self.datasource = self.parent.datasource self.datasource = self.parent.datasource
self.datasource.rec = [] self.order = orders(self, self.datasource)
self.view = ''
def produce(self, node): def produce(self, node):
if type(node) is not list: if type(node) is not list:
node = [node] node = [node]
g_contents = '(' for n in node:
first_col = '' order = not ('sort' in n and n['sort'] == 'desc')
for i, g in enumerate(node): self.order.append(order_item(n['value'], self, order))
v = g['value']
e = expr(self, v).k9expr
# if v is compound expr, create tmp cols
if type(v) is not str:
tmpcol = 't' + base62uuid(7)
self.emit(f'{tmpcol}:{e}')
e = tmpcol
if i == 0:
first_col = e
g_contents += e + (';'if i < len(node)-1 else '')
self.emit(f'{self.group}:'+g_contents+')')
self.n_grps = len(node)
if self.n_grps <= 1:
self.emit(f'{self.group}:={self.group}')
else:
self.emit(f'{self.group}:groupby[+({self.group},(,!(#({first_col}))))]')
def consume(self, _): def consume(self, _):
self.referenced = self.datasource.rec self.datasource.order.append(self.order)
self.datasource.rec = None
return super().consume(_)

@ -1,7 +1,9 @@
from attr import has
from engine.ast import ColRef, TableInfo, ast_node, Context, include from engine.ast import ColRef, TableInfo, ast_node, Context, include
from engine.groupby import groupby from engine.groupby import groupby
from engine.join import join from engine.join import join
from engine.expr import expr from engine.expr import expr
from engine.orderby import orderby
from engine.scan import filter from engine.scan import filter
from engine.utils import base62uuid, enlist, base62alp from engine.utils import base62uuid, enlist, base62alp
from engine.ddl import outfile from engine.ddl import outfile
@ -44,10 +46,7 @@ class projection(ast_node):
self.datasource = self.context.tables_byname[value] self.datasource = self.context.tables_byname[value]
if 'assumptions' in from_clause: if 'assumptions' in from_clause:
for assumption in enlist(from_clause['assumptions']): for assumption in enlist(from_clause['assumptions']):
ord = assumption['ord'] == 'asc' orderby(self, assumption)
attrib = assumption['attrib']
ord = '^' if ord else '|^'
# TODO: generate view of table by order
elif type(from_clause) is str: elif type(from_clause) is str:
self.datasource = self.context.tables_byname[from_clause] self.datasource = self.context.tables_byname[from_clause]
@ -92,8 +91,8 @@ class projection(ast_node):
if 'value' in proj: if 'value' in proj:
e = proj['value'] e = proj['value']
if type(e) is str: if type(e) is str:
cname = self.datasource.parse_tablenames(proj['value']) cname = e # TODO: deal w/ alias
k9expr += (f"{cname}") k9expr += (f"{self.datasource.parse_tablenames(proj['value'])}")
elif type(e) is dict: elif type(e) is dict:
p_expr = expr(self, e) p_expr = expr(self, e)
cname = p_expr.k9expr cname = p_expr.k9expr
@ -104,27 +103,41 @@ class projection(ast_node):
compound = compound and has_groupby and self.datasource.rec not in self.group_node.referenced compound = compound and has_groupby and self.datasource.rec not in self.group_node.referenced
cols.append(ColRef(f'(+{disp_varname})[{i}]', 'generic', self.out_table, 0, None, cname, i, compound=compound)) cols.append(ColRef(f'{disp_varname}[{i}]', 'generic', self.out_table, 0, None, cname, i, compound=compound))
self.out_table.add_cols(cols, False)
k9expr += ')' k9expr += ')'
if has_groupby: if has_groupby:
self.group_node.finalize(k9expr, disp_varname) self.group_node.finalize(k9expr, disp_varname)
else: else:
self.emit(f'{disp_varname}:{k9expr}') self.emit(f'{disp_varname}:{k9expr}')
self.datasource.group_node = None self.datasource.group_node = None
if flatten:
self.emit_no_ln(f'{disp_varname}:' if flatten else '')
if flatten or self.disp: has_orderby = 'orderby' in node
if has_orderby:
self.datasource = self.out_table
self.context.datasource = self.out_table # discard current ds
orderby_node = orderby(self, node['orderby'])
self.context.datasource.materialize_orderbys()
self.emit_no_ln(f"{f'{disp_varname}:+' if flatten else ''}(")
if self.disp or has_orderby:
if len(self.projections) > 1: if len(self.projections) > 1:
self.emit(f"{'+' if self.inv else ''}{disp_varname}") self.emit_no_ln(f"{'+' if self.inv else ''}{disp_varname}")
else: else:
self.emit(f'$[(#{disp_varname})>1;+,({disp_varname});+,(,{disp_varname})]') self.emit_no_ln(f'$[(#{disp_varname})>1;+,({disp_varname});+,(,{disp_varname})]')
if flatten: if flatten:
self.emit(f'{disp_varname}') self.emit_no_ln(f'{disp_varname}')
if has_orderby:
self.emit(f')[{orderby_node.view}]')
else:
self.context.emit_flush()
if flatten: if flatten:
self.out_table.columns = cols if len(self.projections) > 1 and not self.inv:
self.emit(f"{disp_varname}:+{disp_varname}")
outfile(self, node['outfile']) outfile(self, node['outfile'])
if self.datasource_changed: if self.datasource_changed:
self.context.datasource = self.prev_datasource self.context.datasource = self.prev_datasource

@ -1,6 +1,6 @@
Month,sales Month,sales
1,100 1,100
2,120
3,140
4,140 4,140
5,130 5,130
3,140
2,120

1 Month sales
2 1 100
2 120
3 140
3 4 140
4 5 130
5 3 140
6 2 120

@ -6,9 +6,9 @@ import subprocess
import sys import sys
if sys.platform != 'win32': if sys.platform != 'win32':
import readline import readline
basecmd = ['bash', '-c', 'k'] basecmd = ['bash', '-c', 'rlwrap k']
else: else:
basecmd = ['bash.exe', '-c', './k'] basecmd = ['bash.exe', '-c', 'rlwrap ./k']
test_parser = True test_parser = True

@ -7,3 +7,4 @@ FIELDS TERMINATED BY ","
SELECT sum(c), b, d SELECT sum(c), b, d
FROM test FROM test
group by a,b,d group by a,b,d
order by d DESC, b ASC

Loading…
Cancel
Save