diff --git a/.gitignore b/.gitignore
index 948a9a8..dc09fca 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,4 +20,8 @@ k
 *.pdf
 test*.c*
 *.csv
-*.out
\ No newline at end of file
+*.out
+*.asm
+!mmw.so
+*.k
+!header.k
\ No newline at end of file
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..fef8c50
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,8 @@
+all:
+	g++ mmw.cpp --std=c++1z -shared -fPIC -Ofast -march=native -g0 -s -o mmw.so
+avx512:
+	g++ mmw.cpp --std=c++1z -shared -fPIC -Ofast -mavx512f -g0 -s -o mmw.so
+debug:
+	g++ mmw.cpp --std=c++1z -shared -fPIC -O0 -march=native -g3 -o mmw.so
+clean:
+	rm  mmw.so -rf
diff --git a/engine/ast.py b/engine/ast.py
index 86adaaf..210e3e8 100644
--- a/engine/ast.py
+++ b/engine/ast.py
@@ -4,7 +4,7 @@ from engine.utils import base62uuid
 
 # replace column info with this later.
 class ColRef:
-    def __init__(self, k9name, _ty, cobj, cnt, table, name, id):
+    def __init__(self, k9name, _ty, cobj, cnt, table, name, id, order = None, compound = False):
         self.k9name = k9name
         self.type = _ty
         self.cobj = cobj
@@ -12,6 +12,9 @@ class ColRef:
         self.table = table
         self.name = name
         self.id = id
+        self.order = order # True -> asc, False -> dsc; None -> unordered
+        self.compound = compound # compound field (list as a field) 
+        self.views = []
         self.__arr__ = (k9name, _ty, cobj, cnt, table, name, id)
         
     def __getitem__(self, key):
@@ -31,6 +34,7 @@ class TableInfo:
         self.cxt = cxt
         self.views = set()
         self.rec = None 
+        self.groupinfo = None
         for c in cols:
             self.add_col(c)
 
@@ -44,13 +48,6 @@ class TableInfo:
         if type(c) is ColRef:
             c = c.cobj
         k9name = 'c' + base62uuid(7)
-        # k9name = self.table_name + c['name']
-        # if k9name in self.cxt.k9cols_byname: # duplicate names?
-        #     root = self.cxt.k9cols_byname[k9name] 
-        #     k9name = k9name + root.cnt
-        #     root.cnt += 1
-
-        # column: (k9name, type, original col_object, dup_count)
         col_object =  ColRef(k9name, (list(c['type'].keys()))[0], c, 1, self,c['name'], len(self.columns))
 
         self.cxt.k9cols_byname[k9name] = col_object
diff --git a/engine/ddl.py b/engine/ddl.py
index 84fc205..91dfb8e 100644
--- a/engine/ddl.py
+++ b/engine/ddl.py
@@ -37,15 +37,18 @@ class load(ast_node):
     name="load"
     def produce(self, node):
         node = node[self.name]
-        tablename = 'l'+base62uuid(7)
-        keys = 'k'+base62uuid(7)
-        self.emit(f"{tablename}:`csv ? 1:\"{node['file']['literal']}\"")
-        self.emit(f"{keys}:!{tablename}")
         table:TableInfo = self.context.tables_byname[node['table']]
-        
+        n_keys = len(table.columns)
+        keys = ''
+        for _ in n_keys:
+            keys+='`tk'+base62uuid(6)
+        tablename = 'l'+base62uuid(7)        
+
+        self.emit(f"{tablename}:[{keys}!+(`csv ? 1:\"{node['file']['literal']}\")][{keys}]")
+
         for i, c in enumerate(table.columns):
             c:ColRef
-            self.emit(f'{c.k9name}:{tablename}[({keys})[{i}]]')
+            self.emit(f'{c.k9name}:{tablename}[{i}]')
             
 class outfile(ast_node):
     name="_outfile"
diff --git a/engine/expr.py b/engine/expr.py
index 2d62c89..7c41776 100644
--- a/engine/expr.py
+++ b/engine/expr.py
@@ -8,17 +8,17 @@ class expr(ast_node):
         'min': 'min', 
         'avg': 'avg',
         'sum': 'sum',
+        'mod':'mod',
         'mins': ['mins', 'minsw'],
         'maxs': ['maxs', 'maxsw'],
         'avgs': ['avgs', 'avgsw'],
         'sums': ['sums', 'sumsw'],
     }
     binary_ops = {
-        'sub':'-', 
+        'sub':'-',  
         'add':'+', 
         'mul':'*', 
         'div':'%',
-        'mod':'mod',
         'and':'&',
         'or':'|',
         'gt':'>',
diff --git a/engine/groupby.py b/engine/groupby.py
index 090c75d..5b54c12 100644
--- a/engine/groupby.py
+++ b/engine/groupby.py
@@ -12,7 +12,7 @@ class groupby(ast_node):
         if type(node) is not list:
             node = [node]
         g_contents = '('
-        
+        first_col = ''
         for i, g in enumerate(node):
             v = g['value']
             e = expr(self, v).k9expr
@@ -21,7 +21,8 @@ class groupby(ast_node):
                 tmpcol = 't' + base62uuid(7)
                 self.emit(f'{tmpcol}:{e}')
                 e = tmpcol
-
+            if i == 0:
+                first_col = e
             g_contents += e + (';'if i < len(node)-1 else '')
             
         self.emit(f'{self.group}:'+g_contents+')')
@@ -29,8 +30,8 @@ class groupby(ast_node):
         if len(node) <= 1:
             self.emit(f'{self.group}:={self.group}')
         else:
-            self.emit(f'{self.group}:groupby[{self.group}[0];+{self.group}]')
-    
+            self.emit(f'{self.group}:groupby[+({self.group},(,!(#({first_col}))))]')
+        
     def consume(self, _):
         self.referenced = self.datasource.rec
         self.datasource.rec = None
diff --git a/engine/projection.py b/engine/projection.py
index 0df274e..2a3d24f 100644
--- a/engine/projection.py
+++ b/engine/projection.py
@@ -5,6 +5,8 @@ from engine.expr import expr
 from engine.scan import filter
 from engine.utils import base62uuid, enlist, base62alp
 from engine.ddl import outfile
+import copy
+
 class projection(ast_node):
     name='select'
     def __init__(self, parent:ast_node, node, context:Context = None, outname = None, disp = True):
@@ -62,6 +64,8 @@ class projection(ast_node):
 
         if 'groupby' in node:
             self.group_node = groupby(self, node['groupby'])
+            self.datasource = copy(self.datasource) # shallow copy
+            self.datasource.groupinfo = self.group_node
         else:
             self.group_node = None
             
diff --git a/header.k b/header.k
index 287f913..f0df0c2 100644
--- a/header.k
+++ b/header.k
@@ -1,5 +1,7 @@
 import`csv
 
+md:{y-x*_y%x}
+
 maxs:{[L]{max(x, y)}\L}
 mins:{[L]{min(x, y)}\L}
 sums:{[L]{(x + y)}\L}
@@ -7,22 +9,56 @@ sums:{[L]{(x + y)}\L}
 avgsimpl:{[L;i] curr:L[i]%(i+1); $[i<(#L)-1;curr, avgsimpl[L;i+1];curr]}
 avgs:{[L] avgsimpl[sums[L];0]}
 
-maxswimp:{[L;w;i] curr:max(L@(((i-w)+!w)|0)); $[i<#L;curr, maxswimp[L; w; i + 1];curr]}
-maxsw:{[w;L]maxswimp[L; w; 1]}
+/ maxswimp:{[L;w;i] curr:max(L@(((i-w)+!w)|0)); $[i<#L;curr, maxswimp[L; w; i + 1];curr]}
+/ maxsw:{[w;L]maxswimp[L; w; 1]}
+
+/ minswimp:{[L;w;i] curr:min(L@(((i-w)+!w)|0)); $[i<#L;curr, maxswimp[L; w; i + 1];curr]}
+/ minsw:{[w;L]minswimp[L;w;1]}
+
+/ avgswimp:{[L;w;s;i] s:(s+L[i])-L[i-w];curr:s%((i+1)&w);$[i<(#L)-1; curr, avgswimp[L; w; s; i+1]; curr]}
+/ avgsw:{[w;L] avgswimp[L;w;0;0]}
+
+/ sumswimp:{[L;w;s;i] s:(s+L[i])-L[i-w];$[i<(#L)-1; s, sumswimp[L; w; s; i+1]; s]}
+/ sumsw:{[w;L] sumswimp[L;w;0;0]}
+
+
+groupby0:{[L] 
+            {[x;y]
+                x:$[(@x)=`i;(,(L[0]))!,(,0);x];
+                k:(,(L[y]));gvk:x[k][0];
+                found:$[(gvk[0]+gvk[1])>0;1;L[y] in !x];
+                cg:(,L[y])!$[found;,gvk[0],y;,(,y)];
+                (x,cg)}/!(#L)}
+
+groupBy:{[x]groupBySingle:{[a;x]
+        findAll:{[c;xx]
+            f:{[i;c]$[(c[0])[i]~c[1];i+1;0]};
+            @[!#xx;!#xx;f;(#xx)#,(xx;c)]};
+        z:findAll[a;x];
+        b:(findAll[0;z]_(!(1+#z)))-1;(a;b)};
+    x:+x;y:?x;
+    @[y;!#y;groupBySingle;(#y)#,x]}
 
-minswimp:{[L;w;i] curr:min(L@(((i-w)+!w)|0)); $[i<#L;curr, maxswimp[L; w; i + 1];curr]}
-minsw:{[w;L]minswimp[L;w;1]}
+groupby:{[L]
+        L:^+L;
+        dimy:(#(L[0]))-1;
+        ((({[L;dim;x;y] 
+            x:$[x~0;(,(dim#(L[0])),0);x];
+            curr:dim#(L[y]);
+            $[(dim#*x)~curr;x;((,curr,y),x)]}[L;dimy])/!(#L));(+L)[dimy]) }
 
-avgswimp:{[L;w;s;i] s:(s+L[i])-L[i-w];curr:s%((i+1)&w);$[i<(#L)-1; curr, avgswimp[L; w; s; i+1]; curr]}
-avgsw:{[w;L] avgswimp[L;w;0;0]}
+lststr:{[L](+({[x;y] ($x,$y)}/L))[0]}
+delist:{[L] $[(@L)in(`LL`LC`LG`L);delist[(,/L)];L]}
+cntlist:{[L;i] $[(@L)in(`LL`LC`LG`L);cntlist[(,/L);i+1];i+1]}
 
-sumswimp:{[L;w;s;i] s:(s+L[i])-L[i-w];$[i<(#L)-1; s, sumswimp[L; w; s; i+1]; s]}
-sumsw:{[w;L] sumswimp[L;w;0;0]}
+sumswkrl:{[L;w;x;y] ((x-L[y-w])+L[y])}
+sumsw:{[L;w] $[(#L)=0;L;(sumswkrl[L;w])\@[!#L;0;L[0]]]}
+avgswkrl:{[L;w;x;y] (x-(L[y-w]-L[y])%w)}
+avgsw:{[L;w] $[(#L)=0;L;(avgswkrl[L;w])\@[!#L;0;L[0]]]}
 
-groupbyi:{[L;GV;i] 
-            k:(,(L[i]));gvk:GV[k][0];
-            found:$[(gvk[0]+gvk[1])>0;1;L[i] in !GV];
-            cg:(,L[i])!$[found;,gvk[0],i;,(,i)]; 
-            $[i<(#L)-1; groupbyi[L;(GV,cg);i+1]; (GV,cg)]}
-groupbys:{[L;ll] GV1:(,(L[0]))!,(,0);$[ll>1;groupbyi[L;GV1;1];GV1]}
-groupby:{[l;L] $[(#l)=0;,();groupbys[L;#l]]}
+/ minsw:{[w;L] ({[L;w;x] min(L[$[x>w;(!w) + ((x-w)+1);!(x+1)]])}[L;w])'!#L}
+import`mmw
+minsw:{[w;L] ret:L; mmw[ret;((`g ($@ret)[0]), (#ret), w, 65536)];ret}
+maxsw:{[w;L] ret:L; mmw[ret;((`g ($@ret)[0]), (#ret), w, 65537)];ret}
+minswip:{[w;L] mmw[L;((`g ($@L)[0]), (#L), w, 65536)];}
+maxswip:{[w;L] mmw[L;((`g ($@L)[0]), (#L), w, 65537)];}
\ No newline at end of file
diff --git a/mmw.cpp b/mmw.cpp
new file mode 100644
index 0000000..33f15ae
--- /dev/null
+++ b/mmw.cpp
@@ -0,0 +1,48 @@
+
+#include <cstring>
+#include <cstdlib>
+#include <cstdint>
+#include <deque>
+
+using std::size_t;
+using std::uint32_t;
+
+template<class T, bool minmax>
+void running(void *array, uint32_t len, uint32_t w){
+	using std::deque;
+	T* arr = static_cast<T*> (array);
+	deque<std::pair<T, uint32_t>> cache;
+	for(int i = 0; i < len; ++i){
+		if(!cache.empty() && cache.front().second == i-w) cache.pop_front();
+		if constexpr(minmax)
+			while(!cache.empty() && cache.back().first>arr[i]) cache.pop_back();
+		else
+			while(!cache.empty() && cache.back().first<arr[i]) cache.pop_back();
+		cache.push_back({arr[i], i});
+		arr[i] = cache.front().first;
+	}
+}
+template<class T>
+inline void mm(void *array, uint32_t len, uint32_t w, bool mm){
+	mm?	running<T, true>(array, len, w) : running<T, false>(array, len, w);
+}
+extern "C" { 
+	#include <stdio.h> 
+
+	int mmw(void *array, unsigned long long misc[]){
+		char _ty = misc[0];
+		uint32_t len = misc[1];
+		uint32_t w = misc[2];
+		bool minmax = misc[3]-0x10000;
+		switch(_ty){
+			case 'F': mm<double>(array, len, w, minmax); break;
+			case 'C': case 'G': mm<unsigned char>(array, len, w, minmax); break;
+			case 'H': mm<unsigned short>(array, len, w, minmax); break;
+			case 'D': case 'I': mm<unsigned int>(array, len, w, minmax); break;
+			case 'T': case 'J': mm<long long>(array, len, w, minmax); break;
+			case 'L': if(len == 0) break;
+			default: printf("nyi %c\n", _ty);
+		}
+		return 0; 
+	}
+}
diff --git a/mmw.so b/mmw.so
new file mode 100644
index 0000000..dd2900d
Binary files /dev/null and b/mmw.so differ
diff --git a/prompt.py b/prompt.py
index bdf0abf..3ed66c1 100644
--- a/prompt.py
+++ b/prompt.py
@@ -7,9 +7,6 @@ import sys
 if sys.platform != 'win32':
     import readline
     
-# else:
-#     import pyreadline3
-
 test_parser = True
 
 # code to test parser