diff --git a/.gitignore b/.gitignore index 948a9a8..dc09fca 100644 --- a/.gitignore +++ b/.gitignore @@ -20,4 +20,8 @@ k *.pdf test*.c* *.csv -*.out \ No newline at end of file +*.out +*.asm +!mmw.so +*.k +!header.k \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..fef8c50 --- /dev/null +++ b/Makefile @@ -0,0 +1,8 @@ +all: + g++ mmw.cpp --std=c++1z -shared -fPIC -Ofast -march=native -g0 -s -o mmw.so +avx512: + g++ mmw.cpp --std=c++1z -shared -fPIC -Ofast -mavx512f -g0 -s -o mmw.so +debug: + g++ mmw.cpp --std=c++1z -shared -fPIC -O0 -march=native -g3 -o mmw.so +clean: + rm mmw.so -rf diff --git a/engine/ast.py b/engine/ast.py index 86adaaf..210e3e8 100644 --- a/engine/ast.py +++ b/engine/ast.py @@ -4,7 +4,7 @@ from engine.utils import base62uuid # replace column info with this later. class ColRef: - def __init__(self, k9name, _ty, cobj, cnt, table, name, id): + def __init__(self, k9name, _ty, cobj, cnt, table, name, id, order = None, compound = False): self.k9name = k9name self.type = _ty self.cobj = cobj @@ -12,6 +12,9 @@ class ColRef: self.table = table self.name = name self.id = id + self.order = order # True -> asc, False -> dsc; None -> unordered + self.compound = compound # compound field (list as a field) + self.views = [] self.__arr__ = (k9name, _ty, cobj, cnt, table, name, id) def __getitem__(self, key): @@ -31,6 +34,7 @@ class TableInfo: self.cxt = cxt self.views = set() self.rec = None + self.groupinfo = None for c in cols: self.add_col(c) @@ -44,13 +48,6 @@ class TableInfo: if type(c) is ColRef: c = c.cobj k9name = 'c' + base62uuid(7) - # k9name = self.table_name + c['name'] - # if k9name in self.cxt.k9cols_byname: # duplicate names? - # root = self.cxt.k9cols_byname[k9name] - # k9name = k9name + root.cnt - # root.cnt += 1 - - # column: (k9name, type, original col_object, dup_count) col_object = ColRef(k9name, (list(c['type'].keys()))[0], c, 1, self,c['name'], len(self.columns)) self.cxt.k9cols_byname[k9name] = col_object diff --git a/engine/ddl.py b/engine/ddl.py index 84fc205..91dfb8e 100644 --- a/engine/ddl.py +++ b/engine/ddl.py @@ -37,15 +37,18 @@ class load(ast_node): name="load" def produce(self, node): node = node[self.name] - tablename = 'l'+base62uuid(7) - keys = 'k'+base62uuid(7) - self.emit(f"{tablename}:`csv ? 1:\"{node['file']['literal']}\"") - self.emit(f"{keys}:!{tablename}") table:TableInfo = self.context.tables_byname[node['table']] - + n_keys = len(table.columns) + keys = '' + for _ in n_keys: + keys+='`tk'+base62uuid(6) + tablename = 'l'+base62uuid(7) + + self.emit(f"{tablename}:[{keys}!+(`csv ? 1:\"{node['file']['literal']}\")][{keys}]") + for i, c in enumerate(table.columns): c:ColRef - self.emit(f'{c.k9name}:{tablename}[({keys})[{i}]]') + self.emit(f'{c.k9name}:{tablename}[{i}]') class outfile(ast_node): name="_outfile" diff --git a/engine/expr.py b/engine/expr.py index 2d62c89..7c41776 100644 --- a/engine/expr.py +++ b/engine/expr.py @@ -8,17 +8,17 @@ class expr(ast_node): 'min': 'min', 'avg': 'avg', 'sum': 'sum', + 'mod':'mod', 'mins': ['mins', 'minsw'], 'maxs': ['maxs', 'maxsw'], 'avgs': ['avgs', 'avgsw'], 'sums': ['sums', 'sumsw'], } binary_ops = { - 'sub':'-', + 'sub':'-', 'add':'+', 'mul':'*', 'div':'%', - 'mod':'mod', 'and':'&', 'or':'|', 'gt':'>', diff --git a/engine/groupby.py b/engine/groupby.py index 090c75d..5b54c12 100644 --- a/engine/groupby.py +++ b/engine/groupby.py @@ -12,7 +12,7 @@ class groupby(ast_node): if type(node) is not list: node = [node] g_contents = '(' - + first_col = '' for i, g in enumerate(node): v = g['value'] e = expr(self, v).k9expr @@ -21,7 +21,8 @@ class groupby(ast_node): tmpcol = 't' + base62uuid(7) self.emit(f'{tmpcol}:{e}') e = tmpcol - + if i == 0: + first_col = e g_contents += e + (';'if i < len(node)-1 else '') self.emit(f'{self.group}:'+g_contents+')') @@ -29,8 +30,8 @@ class groupby(ast_node): if len(node) <= 1: self.emit(f'{self.group}:={self.group}') else: - self.emit(f'{self.group}:groupby[{self.group}[0];+{self.group}]') - + self.emit(f'{self.group}:groupby[+({self.group},(,!(#({first_col}))))]') + def consume(self, _): self.referenced = self.datasource.rec self.datasource.rec = None diff --git a/engine/projection.py b/engine/projection.py index 0df274e..2a3d24f 100644 --- a/engine/projection.py +++ b/engine/projection.py @@ -5,6 +5,8 @@ from engine.expr import expr from engine.scan import filter from engine.utils import base62uuid, enlist, base62alp from engine.ddl import outfile +import copy + class projection(ast_node): name='select' def __init__(self, parent:ast_node, node, context:Context = None, outname = None, disp = True): @@ -62,6 +64,8 @@ class projection(ast_node): if 'groupby' in node: self.group_node = groupby(self, node['groupby']) + self.datasource = copy(self.datasource) # shallow copy + self.datasource.groupinfo = self.group_node else: self.group_node = None diff --git a/header.k b/header.k index 287f913..f0df0c2 100644 --- a/header.k +++ b/header.k @@ -1,5 +1,7 @@ import`csv +md:{y-x*_y%x} + maxs:{[L]{max(x, y)}\L} mins:{[L]{min(x, y)}\L} sums:{[L]{(x + y)}\L} @@ -7,22 +9,56 @@ sums:{[L]{(x + y)}\L} avgsimpl:{[L;i] curr:L[i]%(i+1); $[i<(#L)-1;curr, avgsimpl[L;i+1];curr]} avgs:{[L] avgsimpl[sums[L];0]} -maxswimp:{[L;w;i] curr:max(L@(((i-w)+!w)|0)); $[i<#L;curr, maxswimp[L; w; i + 1];curr]} -maxsw:{[w;L]maxswimp[L; w; 1]} +/ maxswimp:{[L;w;i] curr:max(L@(((i-w)+!w)|0)); $[i<#L;curr, maxswimp[L; w; i + 1];curr]} +/ maxsw:{[w;L]maxswimp[L; w; 1]} + +/ minswimp:{[L;w;i] curr:min(L@(((i-w)+!w)|0)); $[i<#L;curr, maxswimp[L; w; i + 1];curr]} +/ minsw:{[w;L]minswimp[L;w;1]} + +/ avgswimp:{[L;w;s;i] s:(s+L[i])-L[i-w];curr:s%((i+1)&w);$[i<(#L)-1; curr, avgswimp[L; w; s; i+1]; curr]} +/ avgsw:{[w;L] avgswimp[L;w;0;0]} + +/ sumswimp:{[L;w;s;i] s:(s+L[i])-L[i-w];$[i<(#L)-1; s, sumswimp[L; w; s; i+1]; s]} +/ sumsw:{[w;L] sumswimp[L;w;0;0]} + + +groupby0:{[L] + {[x;y] + x:$[(@x)=`i;(,(L[0]))!,(,0);x]; + k:(,(L[y]));gvk:x[k][0]; + found:$[(gvk[0]+gvk[1])>0;1;L[y] in !x]; + cg:(,L[y])!$[found;,gvk[0],y;,(,y)]; + (x,cg)}/!(#L)} + +groupBy:{[x]groupBySingle:{[a;x] + findAll:{[c;xx] + f:{[i;c]$[(c[0])[i]~c[1];i+1;0]}; + @[!#xx;!#xx;f;(#xx)#,(xx;c)]}; + z:findAll[a;x]; + b:(findAll[0;z]_(!(1+#z)))-1;(a;b)}; + x:+x;y:?x; + @[y;!#y;groupBySingle;(#y)#,x]} -minswimp:{[L;w;i] curr:min(L@(((i-w)+!w)|0)); $[i<#L;curr, maxswimp[L; w; i + 1];curr]} -minsw:{[w;L]minswimp[L;w;1]} +groupby:{[L] + L:^+L; + dimy:(#(L[0]))-1; + ((({[L;dim;x;y] + x:$[x~0;(,(dim#(L[0])),0);x]; + curr:dim#(L[y]); + $[(dim#*x)~curr;x;((,curr,y),x)]}[L;dimy])/!(#L));(+L)[dimy]) } -avgswimp:{[L;w;s;i] s:(s+L[i])-L[i-w];curr:s%((i+1)&w);$[i<(#L)-1; curr, avgswimp[L; w; s; i+1]; curr]} -avgsw:{[w;L] avgswimp[L;w;0;0]} +lststr:{[L](+({[x;y] ($x,$y)}/L))[0]} +delist:{[L] $[(@L)in(`LL`LC`LG`L);delist[(,/L)];L]} +cntlist:{[L;i] $[(@L)in(`LL`LC`LG`L);cntlist[(,/L);i+1];i+1]} -sumswimp:{[L;w;s;i] s:(s+L[i])-L[i-w];$[i<(#L)-1; s, sumswimp[L; w; s; i+1]; s]} -sumsw:{[w;L] sumswimp[L;w;0;0]} +sumswkrl:{[L;w;x;y] ((x-L[y-w])+L[y])} +sumsw:{[L;w] $[(#L)=0;L;(sumswkrl[L;w])\@[!#L;0;L[0]]]} +avgswkrl:{[L;w;x;y] (x-(L[y-w]-L[y])%w)} +avgsw:{[L;w] $[(#L)=0;L;(avgswkrl[L;w])\@[!#L;0;L[0]]]} -groupbyi:{[L;GV;i] - k:(,(L[i]));gvk:GV[k][0]; - found:$[(gvk[0]+gvk[1])>0;1;L[i] in !GV]; - cg:(,L[i])!$[found;,gvk[0],i;,(,i)]; - $[i<(#L)-1; groupbyi[L;(GV,cg);i+1]; (GV,cg)]} -groupbys:{[L;ll] GV1:(,(L[0]))!,(,0);$[ll>1;groupbyi[L;GV1;1];GV1]} -groupby:{[l;L] $[(#l)=0;,();groupbys[L;#l]]} +/ minsw:{[w;L] ({[L;w;x] min(L[$[x>w;(!w) + ((x-w)+1);!(x+1)]])}[L;w])'!#L} +import`mmw +minsw:{[w;L] ret:L; mmw[ret;((`g ($@ret)[0]), (#ret), w, 65536)];ret} +maxsw:{[w;L] ret:L; mmw[ret;((`g ($@ret)[0]), (#ret), w, 65537)];ret} +minswip:{[w;L] mmw[L;((`g ($@L)[0]), (#L), w, 65536)];} +maxswip:{[w;L] mmw[L;((`g ($@L)[0]), (#L), w, 65537)];} \ No newline at end of file diff --git a/mmw.cpp b/mmw.cpp new file mode 100644 index 0000000..33f15ae --- /dev/null +++ b/mmw.cpp @@ -0,0 +1,48 @@ + +#include +#include +#include +#include + +using std::size_t; +using std::uint32_t; + +template +void running(void *array, uint32_t len, uint32_t w){ + using std::deque; + T* arr = static_cast (array); + deque> cache; + for(int i = 0; i < len; ++i){ + if(!cache.empty() && cache.front().second == i-w) cache.pop_front(); + if constexpr(minmax) + while(!cache.empty() && cache.back().first>arr[i]) cache.pop_back(); + else + while(!cache.empty() && cache.back().first +inline void mm(void *array, uint32_t len, uint32_t w, bool mm){ + mm? running(array, len, w) : running(array, len, w); +} +extern "C" { + #include + + int mmw(void *array, unsigned long long misc[]){ + char _ty = misc[0]; + uint32_t len = misc[1]; + uint32_t w = misc[2]; + bool minmax = misc[3]-0x10000; + switch(_ty){ + case 'F': mm(array, len, w, minmax); break; + case 'C': case 'G': mm(array, len, w, minmax); break; + case 'H': mm(array, len, w, minmax); break; + case 'D': case 'I': mm(array, len, w, minmax); break; + case 'T': case 'J': mm(array, len, w, minmax); break; + case 'L': if(len == 0) break; + default: printf("nyi %c\n", _ty); + } + return 0; + } +} diff --git a/mmw.so b/mmw.so new file mode 100644 index 0000000..dd2900d Binary files /dev/null and b/mmw.so differ diff --git a/prompt.py b/prompt.py index bdf0abf..3ed66c1 100644 --- a/prompt.py +++ b/prompt.py @@ -7,9 +7,6 @@ import sys if sys.platform != 'win32': import readline -# else: -# import pyreadline3 - test_parser = True # code to test parser