update

4 years ago · 277dad6b3e
parent 3b2dfb295e
commit 277dad6b3e
17 changed files with 274 additions and 42 deletions
--- a/.gitignore
+++ b/.gitignore
@ -18,3 +18,6 @@ out.k
 k
 *.so
 *.pdf
+test*.c*
+*.csv
+*.out
--- a/aquery_parser/keywords.py
+++ b/aquery_parser/keywords.py
@ -72,6 +72,7 @@ REFERENCES = keyword("references").suppress()
 RECURSIVE = keyword("recursive").suppress()
 VALUES = keyword("values").suppress()
 WINDOW = keyword("window")
+INTO = keyword("into").suppress()

 PRIMARY_KEY = Group(PRIMARY + KEY).set_parser_name("primary_key")
 FOREIGN_KEY = Group(FOREIGN + KEY).set_parser_name("foreign_key")
@ -226,6 +227,7 @@ RESERVED = MatchFirst([
    WINDOW,
    WITH,
    WITHIN,
+    INTO,
 ])
 L_INLINE = Literal("<k>").suppress()
 R_INLINE = Literal("</k>").suppress()
--- a/aquery_parser/sql_parser.py
+++ b/aquery_parser/sql_parser.py
@ -29,7 +29,7 @@ def common_parser():
        ansi_ident | mysql_backtick_ident | simple_ident, separator=".", combine=True,
    )).set_parser_name("identifier")

-    return parser(ansi_string, combined_ident)
+    return parser(ansi_string | mysql_doublequote_string, combined_ident)


 def mysql_parser():
@ -436,6 +436,19 @@ def parser(literal_string, ident, sqlserver=False):
            & Optional(assign("limit", expr))
        )

+        outfile = Optional( 
+            (
+                INTO
+                + keyword("outfile").suppress()
+                + literal_string ("loc") 
+                + Optional (
+                    keyword("fields")
+                    + keyword("terminated") 
+                    + keyword("by") 
+                    + literal_string ("term")
+                ) 
+            )("outfile")
+        )
        ordered_sql = (
            (
                (unordered_sql | (LB + query + RB))
@ -448,6 +461,7 @@ def parser(literal_string, ident, sqlserver=False):
            )("union")
            + Optional(ORDER_BY + delimited_list(Group(sort_column))("orderby"))
            + limit
+            + outfile
        ).set_parser_name("ordered sql") / to_union_call

        with_expr = delimited_list(Group(
@ -605,9 +619,27 @@ def parser(literal_string, ident, sqlserver=False):
            + Optional(assign("where", expr))
        ) / to_json_call

+        load = (
+            keyword("load")("op") 
+            + keyword("data").suppress() 
+            + keyword("infile")("loc")  
+            + literal_string ("file")
+            + INTO
+            + keyword("table").suppress()
+            + var_name ("table")
+            + Optional(
+                  keyword("fields").suppress()
+                  + keyword("terminated").suppress()
+                  + keyword("by").suppress() 
+                  + literal_string ("term")
+            )
+        ) ("load")
+
+
+
        sql_stmts = delimited_list( (
            query
-            | (insert | update | delete)
+            | (insert | update | delete | load)
            | (create_table | create_view | create_cache | create_index)
            | (drop_table | drop_view | drop_index)
        )("stmts"), ";")
@ -617,6 +649,10 @@ def parser(literal_string, ident, sqlserver=False):
            | udf
        ) ("stmts")
        
-        stmts = ZeroOrMore(sql_stmts|other_stmt)
+        stmts = ZeroOrMore(
+            sql_stmts
+            |other_stmt
+            | keyword(";").suppress() # empty stmt
+        )
        
        return stmts.finalize()
--- a/aquery_parser/utils.py
+++ b/aquery_parser/utils.py
@ -522,6 +522,7 @@ def to_union_call(tokens):
    output["limit"] = tokens["limit"]
    output["offset"] = tokens["offset"]
    output["fetch"] = tokens["fetch"]
+    output["outfile"] = tokens["outfile"]
    return output


--- a/engine/agg.py
+++ b/engine/agg.py
--- a/engine/ast.py
+++ b/engine/ast.py
@ -4,13 +4,15 @@ from engine.utils import base62uuid

 # replace column info with this later.
 class ColRef:
-    def __init__(self, k9name, type, cobj, cnt, table):
+    def __init__(self, k9name, _ty, cobj, cnt, table, name, id):
        self.k9name = k9name
-        self.type = type
+        self.type = _ty
        self.cobj = cobj
        self.cnt = cnt
        self.table = table
-        self.__arr__ = (k9name, type, cobj, cnt, table)
+        self.name = name
+        self.id = id
+        self.__arr__ = (k9name, _ty, cobj, cnt, table, name, id)
        
    def __getitem__(self, key):
        return self.__arr__[key]
@ -28,6 +30,7 @@ class TableInfo:
        self.columns = []
        self.cxt = cxt
        self.views = set()
+        self.rec = None 
        for c in cols:
            self.add_col(c)

@ -48,7 +51,7 @@ class TableInfo:
        #     root.cnt += 1

        # column: (k9name, type, original col_object, dup_count)
-        col_object =  ColRef(k9name, (list(c['type'].keys()))[0], c, 1, self)
+        col_object =  ColRef(k9name, (list(c['type'].keys()))[0], c, 1, self,c['name'], len(self.columns))

        self.cxt.k9cols_byname[k9name] = col_object
        self.columns_byname[c['name']] = col_object
@ -62,7 +65,11 @@ class TableInfo:
        return len(self.columns)

    def get_k9colname(self, col_name):
-        return self.columns_byname[col_name].k9name
+        col = self.columns_byname[col_name]
+        if type(self.rec) is list:
+            self.rec.append(col)
+        return col.k9name
+        
    def add_alias(self, alias):
        # TODO: Exception when alias already defined.
        # TODO: Scoping of alias should be constrainted in the query.
@ -158,5 +165,5 @@ class ast_node:
 def include(objs):
    import inspect
    for _, cls in inspect.getmembers(objs):
-        if inspect.isclass(cls) and issubclass(cls, ast_node):
+        if inspect.isclass(cls) and issubclass(cls, ast_node) and not cls.name.startswith('_'):
            ast_node.types[cls.name] = cls
--- a/engine/ddl.py
+++ b/engine/ddl.py
@ -1,7 +1,7 @@
 # code-gen for data decl languages

-from engine.ast import TableInfo, ast_node, include
-
+from engine.ast import ColRef, TableInfo, ast_node, include
+from engine.utils import base62uuid
 class create_table(ast_node):
    name = 'create_table'
    def produce(self, node):
@ -27,6 +27,34 @@ class insert(ast_node):
            else:
                # subquery, dispatch to select astnode
                pass
+            
+class k9(ast_node):
+    name='k9'
+    def produce(self, node):
+        self.emit(node[self.name])
        
+class load(ast_node):
+    name="load"
+    def produce(self, node):
+        node = node[self.name]
+        tablename = 'l'+base62uuid(7)
+        keys = 'k'+base62uuid(7)
+        self.emit(f"{tablename}:`csv ? 1:\"{node['file']['literal']}\"")
+        self.emit(f"{keys}:!{tablename}")
+        table:TableInfo = self.context.tables_byname[node['table']]
+        
+        for i, c in enumerate(table.columns):
+            c:ColRef
+            self.emit(f'{c.k9name}:{tablename}[({keys})[{i}]]')
+            
+class outfile(ast_node):
+    name="_outfile"
+    def produce(self, node):
+        out_table:TableInfo = self.parent.out_table
+        self.emit_no_ln(f"\"{node['loc']['literal']}\"1:`csv@[[]")
+        for i, c in enumerate(out_table.columns):
+            self.emit_no_ln(f"{c.name}:{c.k9name}{';' if i < len(out_table.columns) - 1 else ''}")
+        self.emit(']')
+            
 import sys
 include(sys.modules[__name__])
--- a/engine/expr.py
+++ b/engine/expr.py
@ -6,10 +6,12 @@ class expr(ast_node):
    builtin_func_maps = {
        'max': 'max',
        'min': 'min', 
-        'avg':'avg',
-        'sum':'sum',
-        'mins': 'mins',
-        'maxs': 'maxs'
+        'avg': 'avg',
+        'sum': 'sum',
+        'mins': ['mins', 'minsw'],
+        'maxs': ['maxs', 'maxsw'],
+        'avgs': ['avgs', 'avgsw'],
+        'sums': ['sums', 'sumsw'],
    }
    binary_ops = {
        'sub':'-', 
@ -22,6 +24,10 @@ class expr(ast_node):
        'gt':'>',
        'lt':'<',
    }
+    compound_ops = {
+        'ge' : [2, lambda x: f'~({x[0]}<{x[1]})'],
+        'le' : [2, lambda x: f'~({x[0]}>{x[1]})'],
+    }
    unary_ops = {
        'neg' : '-',
        'not' : '~'
@ -45,19 +51,32 @@ class expr(ast_node):
        if type(node) is dict:
            for key, val in node.items():
                if key in self.func_maps:
-                    self.k9expr += f"{self.func_maps[key]}(" 
                    # if type(val) in [dict, str]:
-                    self.k9expr += expr(self, val).k9expr
-                    self.k9expr += ')'
+                    if type(val) is list and len(val) > 1:
+                        k9func = self.func_maps[key]
+                        k9func = k9func[len(val) - 1] if type(k9func) is list else k9func
+                        self.k9expr += f"{k9func}[" 
+                        for i, p in enumerate(val):
+                            self.k9expr += expr(self, p).k9expr + (';'if i<len(val)-1 else '')
+                    else:
+                        self.k9expr += f"{self.func_maps[key]}[" 
+                        self.k9expr += expr(self, val).k9expr
+                    self.k9expr += ']'
                elif key in self.binary_ops:
                    l = expr(self, val[0]).k9expr
                    r = expr(self, val[1]).k9expr
                    self.k9expr += f'({l}{self.binary_ops[key]}{r})'
-                    
+                elif key in self.compound_ops:
+                    x = []
+                    if type(val) is list:
+                        for v in val:
+                            x.append(expr(self, v).k9expr)
+                    self.k9expr = self.compound_ops[key][1](x)
                elif key in self.unary_ops:
                    self.k9expr += f'({expr(self, val).k9expr}{self.unary_ops[key]})'
                else:
                    print(f'Undefined expr: {key}{val}')
+                    
        elif type(node) is str:
            p = self.parent
            while type(p) is expr and not p.isvector:
--- a/engine/groupby.py
+++ b/engine/groupby.py
@ -0,0 +1,37 @@
+from engine.ast import ast_node
+from engine.utils import base62uuid
+from engine.expr import expr
+
+class groupby(ast_node):
+    name = '_groupby'
+    def init(self, _):
+        self.group = 'g' + base62uuid(7)
+        self.datasource = self.parent.datasource
+        self.datasource.rec = []
+    def produce(self, node):
+        if type(node) is not list:
+            node = [node]
+        g_contents = '('
+        
+        for i, g in enumerate(node):
+            v = g['value']
+            e = expr(self, v).k9expr
+            # if v is compound expr, create tmp cols
+            if type(v) is not str:
+                tmpcol = 't' + base62uuid(7)
+                self.emit(f'{tmpcol}:{e}')
+                e = tmpcol
+
+            g_contents += e + (';'if i < len(node)-1 else '')
+            
+        self.emit(f'{self.group}:'+g_contents+')')
+        
+        if len(node) <= 1:
+            self.emit(f'{self.group}:={self.group}')
+        else:
+            self.emit(f'{self.group}:groupby[{self.group}[0];+{self.group}]')
+    
+    def consume(self, _):
+        self.referenced = self.datasource.rec
+        self.datasource.rec = None
+        return super().consume(_)
--- a/engine/projection.py
+++ b/engine/projection.py
@ -1,9 +1,10 @@
-from engine.ast import TableInfo, ast_node, Context, include
+from engine.ast import ColRef, TableInfo, ast_node, Context, include
+from engine.groupby import groupby
 from engine.join import join
 from engine.expr import expr
 from engine.scan import filter
-from engine.utils import base62uuid
-
+from engine.utils import base62uuid, enlist, base62alp
+from engine.ddl import outfile
 class projection(ast_node):
    name='select'
    def __init__(self, parent:ast_node, node, context:Context = None, outname = None, disp = True):
@ -35,12 +36,14 @@ class projection(ast_node):
                            projection(self, from_clause, disp = False)
                        else:
                            # TODO: from func over table
-                            print(f"from func over table{node}")
+                            print(f'from func over table{node}')
                    elif type(value) is str:
                        self.datasource = self.context.tables_byname[value]
                if 'assumptions' in from_clause:
-                    ord = from_clause['assumptions']['ord'] == 'asc'
-                    ord = '^' if ord else '|^'
+                    for assumption in enlist(from_clause['assumptions']):
+                        ord = assumption['ord'] == 'asc'
+                        attrib = assumption['attrib']
+                        ord = '^' if ord else '|^'
                    # TODO: generate view of table by order

            elif type(from_clause) is str:
@ -57,26 +60,70 @@ class projection(ast_node):
            self.datasource = filter(self, node['where'], True).output
            self.context.datasource = self.datasource            

-
-    def consume(self, _):
+        if 'groupby' in node:
+            self.group_node = groupby(self, node['groupby'])
+        else:
+            self.group_node = None
+            
+    def consume(self, node):
        disp_varname = 'd'+base62uuid(7)
+        pcolrefs = []
+        if type(self.group_node) is groupby:
+            grp_table = self.group_node.group
+            grp_refs = self.group_node.referenced
+            for i, proj in enumerate(self.projections):
+                self.datasource.rec = []
+                cname = ''
+                if type(proj) is dict:
+                    if 'value' in proj:
+                        e = proj['value']
+                        if type(e) is str:
+                            cname = self.datasource.parse_tablenames(proj['value'])
+                        elif type(e) is dict:
+                            cname = expr(self, e).k9expr
+                            cname = ''.join([a if a in base62alp else '' for a in cname])
+                pcolrefs.append(self.datasource.rec)
+                self.datasource.rec = None
+            keys = 'k'+base62uuid(7)
+            self.emit(f'{keys}:!{grp_table}')
+            fn = 'fn' + base62uuid(6)
+            # self.emit
+        
        self.emit_no_ln(f'{disp_varname}:(')
+        flatten = False
+        cols = []
+        self.out_table = TableInfo('out_'+base62uuid(4), [], self.context)
+        if 'outfile' in node:
+            flatten = True
+            
        for i, proj in enumerate(self.projections):
+            cname = ''
            if type(proj) is dict:
                if 'value' in proj:
                    e = proj['value']
                    if type(e) is str:
-                        self.emit_no_ln(f"{self.datasource.parse_tablenames(proj['value'])}")
+                        cname = self.datasource.parse_tablenames(proj['value'])
+                        self.emit_no_ln(f"{cname}")
                    elif type(e) is dict:
-                        self.emit_no_ln(f"{expr(self, e).k9expr}")
+                        cname = expr(self, e).k9expr
+                        self.emit_no_ln(f"{cname}")
+                        cname = ''.join([a if a in base62alp else '' for a in cname])
                    self.emit_no_ln(';'if i < len(self.projections)-1 else '')
-
+            cols.append(ColRef(f'(+{disp_varname})[{i}]', 'generic', self.out_table, 0, None, cname, i))
        self.emit(')')
-        if self.disp:
+        if flatten:
+            self.emit_no_ln(f'{disp_varname}:' if flatten else '')
+            
+        if flatten or self.disp:
            if len(self.projections) > 1:
-                self.emit(f'+{disp_varname}')
+                self.emit(f"+{disp_varname}")
            else:
                self.emit(f'+,(,{disp_varname})')
+            if flatten:
+                self.emit(f'{disp_varname}')
+        if flatten:
+            self.out_table.columns = cols
+            outfile(self, node['outfile'])
        if self.datasource_changed:
            self.context.datasource = self.prev_datasource

--- a/engine/utils.py
+++ b/engine/utils.py
@ -1,12 +1,19 @@
 import uuid

+base62alp = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
+
 def base62uuid(crop=8):
-    alp = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
    id = uuid.uuid4().int
    ret = ''
    
    while id:
-        ret = alp[id % 62] + ret
+        ret = base62alp[id % 62] + ret
        id //= 62
    
-    return ret[:crop] if len(ret) else '0'
+    return ret[:crop] if len(ret) else '0'
+
+def enlist(l): 
+    return l if type(l) is list else [l]
+
+def seps(s, i, l):
+    return s if i < len(l) - 1 else ''
--- a/header.k
+++ b/header.k
@ -1,2 +1,28 @@
+import`csv
+
 maxs:{[L]{max(x, y)}\L}
 mins:{[L]{min(x, y)}\L}
+sums:{[L]{(x + y)}\L}
+
+avgsimpl:{[L;i] curr:L[i]%(i+1); $[i<(#L)-1;curr, avgsimpl[L;i+1];curr]}
+avgs:{[L] avgsimpl[sums[L];0]}
+
+maxswimp:{[L;w;i] curr:max(L@(((i-w)+!w)|0)); $[i<#L;curr, maxswimp[L; w; i + 1];curr]}
+maxsw:{[w;L]maxswimp[L; w; 1]}
+
+minswimp:{[L;w;i] curr:min(L@(((i-w)+!w)|0)); $[i<#L;curr, maxswimp[L; w; i + 1];curr]}
+minsw:{[w;L]minswimp[L;w;1]}
+
+avgswimp:{[L;w;s;i] s:(s+L[i])-L[i-w];curr:s%((i+1)&w);$[i<(#L)-1; curr, avgswimp[L; w; s; i+1]; curr]}
+avgsw:{[w;L] avgswimp[L;w;0;0]}
+
+sumswimp:{[L;w;s;i] s:(s+L[i])-L[i-w];$[i<(#L)-1; s, sumswimp[L; w; s; i+1]; s]}
+sumsw:{[w;L] sumswimp[L;w;0;0]}
+
+groupbyi:{[L;GV;i] 
+            k:(,(L[i]));gvk:GV[k][0];
+            found:$[(gvk[0]+gvk[1])>0;1;L[i] in !GV];
+            cg:(,L[i])!$[found;,gvk[0],i;,(,i)]; 
+            $[i<(#L)-1; groupbyi[L;(GV,cg);i+1]; (GV,cg)]}
+groupbys:{[L;ll] GV1:(,(L[0]))!,(,0);$[ll>1;groupbyi[L;GV1;1];GV1]}
+groupby:{[l;L] $[(#l)=0;,();groupbys[L;#l]]}
--- a/moving_avg.csv
+++ b/moving_avg.csv
@ -1,6 +1,6 @@
 Month,sales
 1,100
 2,120
-4,140
 3,140
+4,140
 5,130
--- a/prompt.py
+++ b/prompt.py
@ -3,6 +3,13 @@ import aquery_parser as parser
 import engine
 import subprocess

+import sys
+if sys.platform != 'win32':
+    import readline
+    
+# else:
+#     import pyreadline3
+
 test_parser = True

 # code to test parser
@ -37,8 +44,8 @@ while test_parser:
            print(stmts)
            continue
        trimed = ws.sub(' ', q.lower()).split(' ') 
-        if trimed[0] == 'file':
-            fn = 'q.sql' if len(trimed) <= 1 or len(trimed[1]) == 0 \
+        if trimed[0].startswith('f'):
+            fn = 'stock.a' if len(trimed) <= 1 or len(trimed[1]) == 0 \
                            else trimed[1]
                
            with open(fn, 'r') as file:
@ -47,6 +54,6 @@ while test_parser:
            continue
        stmts = parser.parse(q)
        print(stmts)
-    except ValueError as e:
+    except (ValueError) as e:
        print(type(e), e)

--- a/q1.sql
+++ b/q1.sql
@ -0,0 +1,9 @@
+CREATE TABLE test(a INT, b INT, c INT, d INT)
+
+LOAD DATA INFILE "test.csv"
+INTO TABLE test
+FIELDS TERMINATED BY ","
+
+SELECT sum(c), b, d
+FROM test
+group by a,b,d
--- a/requirements.txt
+++ b/requirements.txt
@ -2,3 +2,4 @@ mo-future
 mo-dots==8.20.21357
 mo-parsing
 mo-imports
+readline; sys_platform != 'win32'
--- a/stock.a
+++ b/stock.a
@ -17,11 +17,13 @@ INSERT INTO stocks VALUES(14,5)
 INSERT INTO stocks VALUES(15,2)
 INSERT INTO stocks VALUES(16,5)

-SELECT max(price-min(timestamp)) FROM stocks 
- 
-SELECT price, timestamp FROM stocks where price -timestamp > 1 and not (price*timestamp<100)
+<k> "q1" </k>

+SELECT max(price-min(timestamp)) FROM stocks
+<k> "q2" </k>

+SELECT price, timestamp FROM stocks where price -timestamp > 1 and not (price*timestamp<100);
+<k> "q3"</k>
 SELECT max(price-mins(price))
 FROM stocks
     ASSUMING ASC timestamp