Order by, on-demand assumptions, bugfixes

Nested-loop join (concept)
dev
BillSun 3 years ago
parent b9a8ad3ac7
commit 8b182cf0f2

@ -412,7 +412,7 @@ def parser(literal_string, ident, sqlserver=False):
+ RB,
)
assumption = Group((ASC|DESC) ("ord") + var_name("attrib"))
assumption = Group((ASC|DESC) ("sort") + var_name("value"))
assumptions = (ASSUMING + Group(delimited_list(assumption))("assumptions"))
table_source << Group(

@ -6,7 +6,7 @@ from engine.utils import base62uuid
# replace column info with this later.
class ColRef:
def __init__(self, k9name, _ty, cobj, cnt, table, name, id, order = None, compound = False):
def __init__(self, k9name, _ty, cobj, cnt, table, name, id, compound = False):
self.k9name = k9name
self.type = _ty
self.cobj = cobj
@ -14,12 +14,15 @@ class ColRef:
self.table = table
self.name = name
self.id = id
self.order = order # True -> asc, False -> dsc; None -> unordered
self.order_pending = None # order_pending
self.compound = compound # compound field (list as a field)
self.views = []
self.__arr__ = (k9name, _ty, cobj, cnt, table, name, id)
def __getitem__(self, key):
if type(key) is str:
return getattr(self, key)
else:
return self.__arr__[key]
def __setitem__(self, key, value):
@ -40,21 +43,24 @@ class TableInfo:
self.views = set()
self.rec = None
self.groupinfo = None
for c in cols:
self.add_col(c)
self.add_cols(cols)
# runtime
self.n_rows = 0 # number of cols
self.order = [] # assumptions
cxt.tables_byname[self.table_name] = self # construct reverse map
def add_col(self, c):
if type(c) is ColRef:
c = c.cobj
def add_cols(self, cols, new = True):
for c in cols:
self.add_col(c, new)
def add_col(self, c, new = True):
_ty = c['type']
if new:
k9name = 'c' + base62uuid(7)
col_object = ColRef(k9name, (list(c['type'].keys()))[0], c, 1, self,c['name'], len(self.columns))
_ty = _ty if type(c) is ColRef else list(_ty.keys())[0]
col_object = ColRef(k9name, _ty, c, 1, self,c['name'], len(self.columns))
else:
col_object = c
k9name = c.k9name
self.cxt.k9cols_byname[k9name] = col_object
self.columns_byname[c['name']] = col_object
self.columns.append(col_object)
@ -66,11 +72,41 @@ class TableInfo:
def n_cols(self):
return len(self.columns)
def get_col(self, col_name):
def materialize_orderbys(self):
view_stack = ''
stack_name = ''
for o in self.order:
o.materialize()
if len(view_stack) == 0:
view_stack = o.view.name
stack_name = view_stack
else:
view_stack = view_stack+'['+ o.view.name +']'
# TODO: Optimize by doing everything in a stmt
if len(view_stack) > 0:
if len(self.order) > 1:
self.cxt.emit(f'{stack_name}:{view_stack}')
for c in self.columns:
c.order_pending = stack_name
self.order[0].node.view = stack_name
self.order.clear()
def get_col_d(self, col_name):
col = self.columns_byname[col_name]
if type(self.rec) is list:
self.rec.append(col)
return col
def get_k9colname_d(self, col_name):
return self.get_col_d(col_name).k9name
def get_col(self, col_name):
self.materialize_orderbys()
col = self.get_col_d(col_name)
if type(col.order_pending) is str:
self.cxt.emit_no_flush(f'{col.k9name}:{col.k9name}[{col.order_pending}]')
col.order_pending = None
return col
def get_k9colname(self, col_name):
return self.get_col(col_name).k9name
@ -80,7 +116,9 @@ class TableInfo:
self.cxt.tables_byname[alias] = self
self.alias.add(alias)
def parse_tablenames(self, colExpr):
def parse_tablenames(self, colExpr, materialize = True):
self.get_col = self.get_col if materialize else self.get_col_d
parsedColExpr = colExpr.split('.')
ret = None
if len(parsedColExpr) <= 1:
@ -117,12 +155,13 @@ class Context:
self.udf_map = dict()
# read header
self.k9code = ''
self.k9codelet = ''
with open('header.k', 'r') as outfile:
self.k9code = outfile.read()
# datasource will be availible after `from' clause is parsed
# and will be deactivated when the `from' is out of scope
self.datasource = None
self.ds_stack = []
def add_table(self, table_name, cols):
tbl = TableInfo(table_name, cols, self)
@ -134,17 +173,46 @@ class Context:
return f't{base62uuid(7)}'
def emit(self, codelet):
self.k9code += self.k9codelet + codelet + '\n'
self.k9codelet = ''
def emit_no_flush(self, codelet):
self.k9code += codelet + '\n'
def emit_flush(self):
self.k9code += self.k9codelet + '\n'
self.k9codelet = ''
def emit_nonewline(self, codelet):
self.k9code += codelet
self.k9codelet += codelet
def datsource_top(self):
if len(self.ds_stack) > 0:
return self.ds_stack[-1]
else:
return None
def datasource_pop(self):
if len(self.ds_stack) > 0:
self.ds_stack.pop()
return self.ds_stack[-1]
else:
return None
def datasource_push(self, ds):
if type(ds) is TableInfo:
self.ds_stack.append(ds)
return ds
else:
return None
def __str__(self):
return self.k9code
def __repr__(self) -> str:
return self.__str__()
class ast_node:
types = dict()
def __init__(self, parent:"ast_node", node, context:Context = None):
self.context = parent.context if context is None else context
self.parent = parent
self.datasource = None
self.init(node)
self.produce(node)
self.spawn(node)

@ -14,6 +14,7 @@ class expr(ast_node):
'avgs': ['avgs', 'avgsw'],
'sums': ['sums', 'sumsw'],
}
binary_ops = {
'sub':'-',
'add':'+',
@ -24,18 +25,23 @@ class expr(ast_node):
'gt':'>',
'lt':'<',
}
compound_ops = {
'ge' : [2, lambda x: f'~({x[0]}<{x[1]})'],
'le' : [2, lambda x: f'~({x[0]}>{x[1]})'],
'count' : [1, lambda x: f'#({x[0]})']
}
unary_ops = {
'neg' : '-',
'not' : '~'
}
coumpound_generating_ops = ['mod', 'mins', 'maxs', 'sums'] + \
list( binary_ops.keys()) + list(compound_ops.keys()) + list(unary_ops.keys() )
def __init__(self, parent, node):
def __init__(self, parent, node, materialize_cols = True):
self.materialize_cols = materialize_cols
ast_node.__init__(self, parent, node, None)
def init(self, _):
@ -95,7 +101,7 @@ class expr(ast_node):
while type(p) is expr and not p.isvector:
p.isvector = True
p = p.parent
self.k9expr = self.datasource.parse_tablenames(node)
self.k9expr = self.datasource.parse_tablenames(node, self.materialize_cols)
elif type(node) is bool:
self.k9expr = '1' if node else '0'
else:

@ -46,10 +46,11 @@ class groupby(ast_node):
self.parent.inv = False
else:
k9fn = "{[ids;grps;ll;dim;x] " + \
"start:$[x=ll;ll;grps[x+1][dim-1]];" + \
"end: grps[x][dim-1];" + \
"range:(end-start)#(((start-ll))#ids);" + \
"start:grps[x][dim];" + \
"end:$[x=0;ll;grps[x-1][dim]];" + \
"range:(end-start)#((start-ll)#ids);" + \
"start:ids[start];" + \
ret + '}'
self.emit(f'{self.groupby_function}:{k9fn}')
self.emit(f'{out}:+({self.groupby_function}' + \
f'[{grp}[1];{grp}[0];(#{grp}[0])-1;#({grp}[0][0])]\'!((#({grp}[0]))-1))')
f'[{grp}[1];{grp}[0];(#{grp}[0])+1;(#({grp}[0][0]))-1]\'!(#({grp}[0])))')

@ -1,38 +1,59 @@
from engine.ast import ColRef, TableInfo, ast_node
from engine.utils import base62uuid
from engine.ast import ColRef, TableInfo, View, ast_node, Context
from engine.utils import base62uuid, seps
from engine.expr import expr
import k
class order_item:
def __init__(self, name, node, order = True):
self.name = name
self.order = order
self.node = node
self.materialized = False
def materialize(self):
if not self.materialized:
self.name = expr(self.node, self.name, False).k9expr
self.materialized = True
return ('' if self.order else '-') + f'({self.name})'
def __str__(self):
return self.materialize()
def __repr__(self):
return self.__str__()
class orders:
def __init__(self, node, datasource):
self.order_items = []
self.materialized = False
self.view = None
self.node = node
self.datasource = datasource
self.n_attrs = -1
def materialize(self):
if not self.materialized:
self.view = View(self.node.context, self.datasource, False)
keys = ';'.join([f'{o}' for o in self.order_items])
self.n_attrs = len(self.order_items)
self.node.emit(f"{self.view.name}: > +`j (({',' if self.n_attrs == 1 else ''}{keys}))")
self.materialized = True
def append(self, o):
self.order_items.append(o)
class orderby(ast_node):
name = '_orderby'
def init(self, _):
self.group = 'g' + base62uuid(7)
self.datasource = self.parent.datasource
self.datasource.rec = []
self.order = orders(self, self.datasource)
self.view = ''
def produce(self, node):
if type(node) is not list:
node = [node]
g_contents = '('
first_col = ''
for i, g in enumerate(node):
v = g['value']
e = expr(self, v).k9expr
# if v is compound expr, create tmp cols
if type(v) is not str:
tmpcol = 't' + base62uuid(7)
self.emit(f'{tmpcol}:{e}')
e = tmpcol
if i == 0:
first_col = e
g_contents += e + (';'if i < len(node)-1 else '')
self.emit(f'{self.group}:'+g_contents+')')
self.n_grps = len(node)
if self.n_grps <= 1:
self.emit(f'{self.group}:={self.group}')
else:
self.emit(f'{self.group}:groupby[+({self.group},(,!(#({first_col}))))]')
for n in node:
order = not ('sort' in n and n['sort'] == 'desc')
self.order.append(order_item(n['value'], self, order))
def consume(self, _):
self.referenced = self.datasource.rec
self.datasource.rec = None
return super().consume(_)
self.datasource.order.append(self.order)

@ -1,7 +1,9 @@
from attr import has
from engine.ast import ColRef, TableInfo, ast_node, Context, include
from engine.groupby import groupby
from engine.join import join
from engine.expr import expr
from engine.orderby import orderby
from engine.scan import filter
from engine.utils import base62uuid, enlist, base62alp
from engine.ddl import outfile
@ -44,10 +46,7 @@ class projection(ast_node):
self.datasource = self.context.tables_byname[value]
if 'assumptions' in from_clause:
for assumption in enlist(from_clause['assumptions']):
ord = assumption['ord'] == 'asc'
attrib = assumption['attrib']
ord = '^' if ord else '|^'
# TODO: generate view of table by order
orderby(self, assumption)
elif type(from_clause) is str:
self.datasource = self.context.tables_byname[from_clause]
@ -92,8 +91,8 @@ class projection(ast_node):
if 'value' in proj:
e = proj['value']
if type(e) is str:
cname = self.datasource.parse_tablenames(proj['value'])
k9expr += (f"{cname}")
cname = e # TODO: deal w/ alias
k9expr += (f"{self.datasource.parse_tablenames(proj['value'])}")
elif type(e) is dict:
p_expr = expr(self, e)
cname = p_expr.k9expr
@ -104,27 +103,41 @@ class projection(ast_node):
compound = compound and has_groupby and self.datasource.rec not in self.group_node.referenced
cols.append(ColRef(f'(+{disp_varname})[{i}]', 'generic', self.out_table, 0, None, cname, i, compound=compound))
cols.append(ColRef(f'{disp_varname}[{i}]', 'generic', self.out_table, 0, None, cname, i, compound=compound))
self.out_table.add_cols(cols, False)
k9expr += ')'
if has_groupby:
self.group_node.finalize(k9expr, disp_varname)
else:
self.emit(f'{disp_varname}:{k9expr}')
self.datasource.group_node = None
if flatten:
self.emit_no_ln(f'{disp_varname}:' if flatten else '')
if flatten or self.disp:
has_orderby = 'orderby' in node
if has_orderby:
self.datasource = self.out_table
self.context.datasource = self.out_table # discard current ds
orderby_node = orderby(self, node['orderby'])
self.context.datasource.materialize_orderbys()
self.emit_no_ln(f"{f'{disp_varname}:+' if flatten else ''}(")
if self.disp or has_orderby:
if len(self.projections) > 1:
self.emit(f"{'+' if self.inv else ''}{disp_varname}")
self.emit_no_ln(f"{'+' if self.inv else ''}{disp_varname}")
else:
self.emit(f'$[(#{disp_varname})>1;+,({disp_varname});+,(,{disp_varname})]')
self.emit_no_ln(f'$[(#{disp_varname})>1;+,({disp_varname});+,(,{disp_varname})]')
if flatten:
self.emit(f'{disp_varname}')
self.emit_no_ln(f'{disp_varname}')
if has_orderby:
self.emit(f')[{orderby_node.view}]')
else:
self.context.emit_flush()
if flatten:
self.out_table.columns = cols
if len(self.projections) > 1 and not self.inv:
self.emit(f"{disp_varname}:+{disp_varname}")
outfile(self, node['outfile'])
if self.datasource_changed:
self.context.datasource = self.prev_datasource

@ -1,6 +1,6 @@
Month,sales
1,100
2,120
3,140
4,140
5,130
3,140
2,120

1 Month sales
2 1 100
2 120
3 140
3 4 140
4 5 130
5 3 140
6 2 120

@ -6,9 +6,9 @@ import subprocess
import sys
if sys.platform != 'win32':
import readline
basecmd = ['bash', '-c', 'k']
basecmd = ['bash', '-c', 'rlwrap k']
else:
basecmd = ['bash.exe', '-c', './k']
basecmd = ['bash.exe', '-c', 'rlwrap ./k']
test_parser = True

@ -7,3 +7,4 @@ FIELDS TERMINATED BY ","
SELECT sum(c), b, d
FROM test
group by a,b,d
order by d DESC, b ASC

Loading…
Cancel
Save