From 3b2dfb295e03ebd06f8b3ccedaab4db7e6f77447 Mon Sep 17 00:00:00 2001
From: Bill Sun <sunyinqi0508@gmail.com>
Date: Fri, 28 Jan 2022 10:02:43 -0500
Subject: [PATCH] added filter, basic aggregations. Fixed bugs with var length,
 etc.

---
 .gitignore           |   5 +-
 engine/ast.py        | 100 ++++++++++++++++++++++++++++++----------
 engine/expr.py       |  26 ++++++++---
 engine/groupby.py    |   0
 engine/projection.py |  31 +++++++++----
 engine/scan.py       | 107 +++++++++++++++++++++++++++++++++++++++++++
 engine/utils.py      |   2 +-
 header.k             |   2 +
 run.py => prompt.py  |   8 +++-
 stock.a              |   6 ++-
 10 files changed, 241 insertions(+), 46 deletions(-)
 create mode 100644 engine/groupby.py
 create mode 100644 engine/scan.py
 create mode 100644 header.k
 rename run.py => prompt.py (82%)

diff --git a/.gitignore b/.gitignore
index 48d6ec7..d544b90 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,4 +14,7 @@ vendor/
 .DS_Store
 .eggs
 .vscode
-out.k
\ No newline at end of file
+out.k
+k
+*.so
+*.pdf
diff --git a/engine/ast.py b/engine/ast.py
index aa23aef..fb48b03 100644
--- a/engine/ast.py
+++ b/engine/ast.py
@@ -1,60 +1,111 @@
 from typing import List
 
+from engine.utils import base62uuid
+
 # replace column info with this later.
 class ColRef:
-    def __init__(self, k9name, type, cobj, cnt):
+    def __init__(self, k9name, type, cobj, cnt, table):
         self.k9name = k9name
         self.type = type
         self.cobj = cobj
         self.cnt = cnt
+        self.table = table
+        self.__arr__ = (k9name, type, cobj, cnt, table)
+        
+    def __getitem__(self, key):
+        return self.__arr__[key]
+
+    def __setitem__(self, key, value):
+        self.__arr__[key] = value
 
 class TableInfo:
+    
     def __init__(self, table_name, cols, cxt:'Context'):
         # statics
         self.table_name = table_name
+        self.alias = set([table_name])
         self.columns_byname = dict() # column_name, type
         self.columns = []
-
+        self.cxt = cxt
+        self.views = set()
         for c in cols:
-            k9name = self.table_name + c['name']
-            if k9name in cxt.k9cols_byname: # duplicate names?
-                root = cxt.k9cols_byname[k9name] 
-                k9name = k9name + root[3]
-                root[3] += 1
-
-            # column: (k9name, type, original col_object, dup_count)
-            col_object =  (k9name, (list(c['type'].keys()))[0], c, 1)
-
-            cxt.k9cols_byname[k9name] = col_object
-            self.columns_byname[c['name']] = col_object
-            self.columns.append(col_object)
+            self.add_col(c)
 
         # runtime
         self.n_rows = 0 # number of cols
         self.order = [] # assumptions
 
         cxt.tables_byname[self.table_name] = self # construct reverse map
-
+        
+    def add_col(self, c):
+        if type(c) is ColRef:
+            c = c.cobj
+        k9name = 'c' + base62uuid(7)
+        # k9name = self.table_name + c['name']
+        # if k9name in self.cxt.k9cols_byname: # duplicate names?
+        #     root = self.cxt.k9cols_byname[k9name] 
+        #     k9name = k9name + root.cnt
+        #     root.cnt += 1
+
+        # column: (k9name, type, original col_object, dup_count)
+        col_object =  ColRef(k9name, (list(c['type'].keys()))[0], c, 1, self)
+
+        self.cxt.k9cols_byname[k9name] = col_object
+        self.columns_byname[c['name']] = col_object
+        self.columns.append(col_object)
+        
+    def construct(self):
+        for c in self.columns:
+            self.cxt.emit(f'{c.k9name}:()')
     @property
     def n_cols(self):
         return len(self.columns)
 
     def get_k9colname(self, col_name):
-        return self.columns_byname[col_name][0]
-
-    def parse_tablenames(self, str):
-        # TODO: deal with alias
-        return self.get_k9colname(str)
-
+        return self.columns_byname[col_name].k9name
+    def add_alias(self, alias):
+        # TODO: Exception when alias already defined.
+        # TODO: Scoping of alias should be constrainted in the query.
+        self.cxt.tables_byname[alias] = self
+        self.alias.add(alias)
+        
+    def parse_tablenames(self, colExpr):
+        parsedColExpr = colExpr.split('.')
+        if len(parsedColExpr) <= 1:
+            return self.get_k9colname(colExpr)
+        else: 
+            datasource = self.cxt.tables_byname[parsedColExpr[0]]
+            if datasource is None:
+                raise ValueError(f'Table name/alias not defined{parsedColExpr[0]}')
+            else:
+                return datasource.get_k9colname(parsedColExpr[1])
+
+class View:
+    def __init__(self, context, table = None, tmp = True):
+        self.table: TableInfo = table
+        self.name = 'v'+base62uuid(7)
+        if type(table) is TableInfo:
+            table.views.add(self)
+        self.context = context
+         
+    def construct(self):
+        self.context.emit(f'{self.name}:()')
+            
 class Context:
     def __init__(self): 
         self.tables:List[TableInfo] = []
         self.tables_byname = dict()
         self.k9cols_byname = dict()
-
+        
         self.udf_map = dict()
-
+        # read header
         self.k9code = ''
+        with open('header.k', 'r') as outfile:
+            self.k9code = outfile.read()         
+        # datasource will be availible after `from' clause is parsed
+        # and will be deactivated when the `from' is out of scope
+        self.datasource = None
+
 
     def add_table(self, table_name, cols):
         tbl = TableInfo(table_name, cols, self)
@@ -63,7 +114,7 @@ class Context:
 
     def gen_tmptable(self):
         from engine.utils import base62uuid
-        return f'tmp{base62uuid()}'
+        return f't{base62uuid(7)}'
 
     def emit(self, codelet):
         self.k9code += codelet + '\n'
@@ -76,6 +127,7 @@ class ast_node:
     types = dict()
     def __init__(self, parent:"ast_node", node, context:Context = None):
         self.context = parent.context if context is None else context
+        self.parent = parent
         self.init(node)
         self.produce(node)
         self.spawn(node)
diff --git a/engine/expr.py b/engine/expr.py
index adf0819..9aa61c6 100644
--- a/engine/expr.py
+++ b/engine/expr.py
@@ -8,29 +8,38 @@ class expr(ast_node):
         'min': 'min', 
         'avg':'avg',
         'sum':'sum',
-
+        'mins': 'mins',
+        'maxs': 'maxs'
     }
     binary_ops = {
         'sub':'-', 
         'add':'+', 
         'mul':'*', 
         'div':'%',
-
+        'mod':'mod',
+        'and':'&',
+        'or':'|',
+        'gt':'>',
+        'lt':'<',
     }
     unary_ops = {
         'neg' : '-',
-        
+        'not' : '~'
     }
     def __init__(self, parent, node):
+        ast_node.__init__(self, parent, node, None)
+
+    def init(self, _):
         from engine.projection import projection
+        parent = self.parent
+        self.isvector = parent.isvector if type(parent) is expr else False
         if type(parent) in [projection, expr]:
             self.datasource = parent.datasource
         else:
-            self.datasource = None
+            self.datasource = self.context.datasource
         self.udf_map = parent.context.udf_map
         self.k9expr = ''
         self.func_maps = {**self.udf_map, **self.builtin_func_maps}
-        ast_node.__init__(self, parent, node, None)
 
     def produce(self, node):
         if type(node) is dict:
@@ -39,7 +48,6 @@ class expr(ast_node):
                     self.k9expr += f"{self.func_maps[key]}(" 
                     # if type(val) in [dict, str]:
                     self.k9expr += expr(self, val).k9expr
-
                     self.k9expr += ')'
                 elif key in self.binary_ops:
                     l = expr(self, val[0]).k9expr
@@ -51,7 +59,13 @@ class expr(ast_node):
                 else:
                     print(f'Undefined expr: {key}{val}')
         elif type(node) is str:
+            p = self.parent
+            while type(p) is expr and not p.isvector:
+                p.isvector = True
+                p = p.parent
             self.k9expr = self.datasource.parse_tablenames(node)
+        elif type(node) is bool:
+            self.k9expr = '1' if node else '0'
         else:
             self.k9expr = f'{node}'
     def __str__(self):
diff --git a/engine/groupby.py b/engine/groupby.py
new file mode 100644
index 0000000..e69de29
diff --git a/engine/projection.py b/engine/projection.py
index 40d02b8..8bc7593 100644
--- a/engine/projection.py
+++ b/engine/projection.py
@@ -1,6 +1,7 @@
 from engine.ast import TableInfo, ast_node, Context, include
 from engine.join import join
 from engine.expr import expr
+from engine.scan import filter
 from engine.utils import base62uuid
 
 class projection(ast_node):
@@ -15,7 +16,7 @@ class projection(ast_node):
 
     def produce(self, node):
         p = node['select']
-        self.projections = p if type(projection) == list else [p]
+        self.projections = p if type(p) is list else [p]
         print(node)
 
     def spawn(self, node):
@@ -47,27 +48,37 @@ class projection(ast_node):
             
             if self.datasource is None:
                 raise ValueError('spawn error: from clause')
+           
+        if self.datasource is not None:
+            self.datasource_changed = True
+            self.prev_datasource = self.context.datasource
+            self.context.datasource = self.datasource            
         if 'where' in node:
-            # apply filter
-            pass
+            self.datasource = filter(self, node['where'], True).output
+            self.context.datasource = self.datasource            
 
 
-    def consume(self, node):
-        disp_varname = 'disptmp' + base62uuid()
+    def consume(self, _):
+        disp_varname = 'd'+base62uuid(7)
         self.emit_no_ln(f'{disp_varname}:(')
-        for proj in self.projections:
+        for i, proj in enumerate(self.projections):
             if type(proj) is dict:
                 if 'value' in proj:
                     e = proj['value']
-                    
                     if type(e) is str:
-                        self.emit_no_ln(f"{self.datasource.parse_tablenames(proj['value'])};")
+                        self.emit_no_ln(f"{self.datasource.parse_tablenames(proj['value'])}")
                     elif type(e) is dict:
-                        self.emit_no_ln(f"{expr(self, e).k9expr};")
+                        self.emit_no_ln(f"{expr(self, e).k9expr}")
+                    self.emit_no_ln(';'if i < len(self.projections)-1 else '')
 
         self.emit(')')
         if self.disp:
-            self.emit(disp_varname)
+            if len(self.projections) > 1:
+                self.emit(f'+{disp_varname}')
+            else:
+                self.emit(f'+,(,{disp_varname})')
+        if self.datasource_changed:
+            self.context.datasource = self.prev_datasource
 
 
 import sys
diff --git a/engine/scan.py b/engine/scan.py
new file mode 100644
index 0000000..c1e8edd
--- /dev/null
+++ b/engine/scan.py
@@ -0,0 +1,107 @@
+from xmlrpc.client import Boolean
+from engine.ast import ColRef, TableInfo, View, ast_node
+from engine.utils import base62uuid
+from engine.expr import expr
+
+class scan(ast_node):
+    name = 'scan'
+    
+class filter(ast_node):
+    name = 'filter'
+    def __init__(self, parent: "ast_node", node, materialize = False, context = None):
+        self.materialize = materialize
+        super().__init__(parent, node, context)
+    def init(self, _):
+        self.datasource = self.context.datasource
+        self.view = View(self.context, self.datasource)
+        self.value = None
+        
+    def spawn(self, node):
+        # TODO: deal with subqueries
+        return super().spawn(node)
+    def __materialize__(self):
+        if self.materialize:
+            cols = [] if self.datasource is None else self.datasource.columns
+            self.output = TableInfo('tn'+base62uuid(6), cols, self.context)
+            self.output.construct()
+            if type(self.value) is View: # cond filtered on tables.
+                self.emit(f'{self.value.name}:&{self.value.name}')
+                for o, c in zip(self.output.columns,self.value.table.columns):
+                    self.emit(f'{o.k9name}:{c.k9name}[{self.value.name}]')
+            elif self.value is not None: # cond is scalar
+                tmpVar = 't'+base62uuid(7)
+                self.emit(f'{tmpVar}:{self.value}')
+                for o, c in zip(self.output.columns, self.datasource.columns):
+                    self.emit(f'{o.k9name}:$[{tmpVar};{c.k9name};()]')
+                
+    def consume(self, node):
+        # TODO: optimizations after converting expr to cnf
+        if type(node) is bool and node and self.materialize:
+            self.output = self.context.datasource if node else None
+            self.value = '1' if node else '0'
+        else:
+            if type(node) is dict:
+                def short_circuit(op, idx, inv = True):
+                    v = filter(self, node[op][idx]).value
+                    inv_filter = lambda x: not x if inv else x
+                    if type(v) is bool and inv_filter(v):
+                        self.value = inv_filter(v)
+                        self.__materialize__()
+                        return None
+                    return v
+                def binary(l, r, _ty = '&'):
+                    if type(l) is bool:
+                        self.value = r
+                    elif type(r) is bool:
+                        self.value = l
+                    elif type(l) is View:
+                        if type(r) is View:
+                            self.emit(f"{l.name}: {l.name} {_ty} {r.name if type(r) is View else f'({r})'}")
+                            self.value = l
+                    elif type(l) is str:
+                        if type(r) is str:
+                            self.value = f'({l}){_ty}({r})'
+                        else:
+                            self.emit(f'{r.name}:{r.name} {_ty} ({l})')
+                            self.value = r
+                if 'and' in node:
+                    l = short_circuit('and', 0)
+                    if l is not None:
+                        r = short_circuit('and', 1)
+                        if r is not None:                
+                            binary(l, r)
+                    
+                elif 'or' in node:
+                    l = short_circuit('or', 0, False)
+                    if l is not None:
+                        r = short_circuit('or', 1, False)
+                        if r is not None:                
+                            binary(l, r, '|')
+                    
+                elif 'not' in node:
+                    v = filter(self, node['not']).value
+                    if type(v) is bool:
+                        self.value = not v
+                        self.__materialize__()
+                    elif type(v) is View:
+                        if len(v.table.columns) > 0:
+                            all_rows = View(self.context, v.table)
+                            self.emit(f'{all_rows.name}:(#{v.table.columns[0].k9name})#1')
+                            self.emit(f'{v.name}:{all_rows.name}-{v.name}')
+                            self.value = v
+                    else:
+                        self.value = '~(' + v + ')'
+                    # TODO: arithmetic ops connecting logical ops.
+                else:
+                    e = expr(self, node)
+                    if e.isvector:
+                        v = View(self.context, self.datasource)
+                        v.construct()
+                        self.emit(f'{v.name}:{e.k9expr}')
+                        self.value = v
+                    else:
+                        self.value = e.k9expr
+            self.__materialize__()        
+
+        print(node)
+    
\ No newline at end of file
diff --git a/engine/utils.py b/engine/utils.py
index 60f3389..9c58764 100644
--- a/engine/utils.py
+++ b/engine/utils.py
@@ -4,7 +4,7 @@ def base62uuid(crop=8):
     alp = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
     id = uuid.uuid4().int
     ret = ''
-
+    
     while id:
         ret = alp[id % 62] + ret
         id //= 62
diff --git a/header.k b/header.k
new file mode 100644
index 0000000..0d1cc4c
--- /dev/null
+++ b/header.k
@@ -0,0 +1,2 @@
+maxs:{[L]{max(x, y)}\L}
+mins:{[L]{min(x, y)}\L}
diff --git a/run.py b/prompt.py
similarity index 82%
rename from run.py
rename to prompt.py
index 09db800..1395edc 100644
--- a/run.py
+++ b/prompt.py
@@ -1,7 +1,7 @@
-from multiprocessing.sharedctypes import Value
 import re
 import aquery_parser as parser
 import engine
+import subprocess
 
 test_parser = True
 
@@ -27,7 +27,11 @@ while test_parser:
                 engine.generate(stmts_stmts, cxt)
             print(cxt.k9code)
             with open('out.k', 'wb') as outfile:
-                outfile.write(cxt.k9code.encode('utf-8'))
+                outfile.write((cxt.k9code+'\n\\\\').encode('utf-8'))
+            subprocess.call(['bash.exe', '-c',"./k out.k"])
+            continue
+        elif q == 'k':
+            subprocess.call(['bash.exe', '-c',"./k"])
             continue
         elif q == 'print':
             print(stmts)
diff --git a/stock.a b/stock.a
index 992b952..a5578ba 100644
--- a/stock.a
+++ b/stock.a
@@ -18,8 +18,10 @@ INSERT INTO stocks VALUES(15,2)
 INSERT INTO stocks VALUES(16,5)
 
 SELECT max(price-min(timestamp)) FROM stocks 
+ 
+SELECT price, timestamp FROM stocks where price -timestamp > 1 and not (price*timestamp<100)
 
 
-/*SELECT max(price-mins(price))
+SELECT max(price-mins(price))
 FROM stocks
-     ASSUMING ASC timestamp*/
+     ASSUMING ASC timestamp