From ba21da23a340fbd3d97367243b9e06c7fde6165a Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Fri, 28 Oct 2022 16:38:02 +0800
Subject: [PATCH] bug fixes, more documentations

---
 README.md          | 97 +++++++++++++++++++++++++++++++++++++++++++++-
 aquery_config.py   |  2 +-
 build.py           |  4 +-
 engine/types.py    |  7 +++-
 reconstruct/ast.py |  6 ++-
 server/table.h     |  1 -
 tests/q4.a         | 15 ++++++-
 7 files changed, 124 insertions(+), 8 deletions(-)
diff --git a/README.md b/README.md
index fbc1076..fc06b4f 100644
--- a/README.md
+++ b/README.md
@@ -125,18 +125,113 @@ See files in ./tests/ for more examples.
 - See `test.aquery` as an example
 
 # User Manual
+AQuery++ has similar syntax to standard SQL with extensions for time-series analysis and user extensibility.
+## Basic Grammar
+```
+program : [query | create | insert | load | udf ]*
+
+/********* Queries *********/
+query : [WITH ID ['('columns')'] AS '(' single-query ')'] single-query
+
+single-query : SELECT projections FROM datasource assumption where-clause groupby-clause
+
+projections: [val as ID | val] (, [val as ID | val])*
+
+datasource : ID [ID | AS ID] |
+  ID, datasource |
+  ID [INNER] JOIN datasource [USING columns | ON conditions] |
+  ID NATURAL JOIN datasource
+
+order-clause: ASSUMING ([ASC|DESC] ID)+
+
+where-clause: WHERE conditions;
+
+groupby-clause: GROUP BY expr (, expr )* [HAVING conditions]
+
+conditions: <a boolean expression>
+
+/********* Creating data *********/
+create: CREATE TABLE ID [AS query | '(' schema ')']
+schema: ID type (, ID type)*
+
+insert: INSERT INTO ID [query | VALUES '(' literals ')']
+literals: literal (, literal)*;
+
+/********* Loading/Saving data *********/
+load: LOAD DATA INFILE string INTO TABLE ID FIELDS TERMINATED BY string
+
+save: query INTO OUTFILE string FIELDS TERMINATED BY string
+
+/********* User defined functions *********/
+udf: FUNCTION ID '(' arg-list ')' '{' fun-body '}'
+arg_list: ID (, ID)*
+fun_body: [stmts] expr
+/********* See more udf grammar later. **********/
+
+stmts: stmt+ 
+stmt: assignment; | if-stmt | for-stmt | ;
+assignment: l_value := expr
+l_value: ID | ID '[' ID ']'
+
+if-stmt: if '(' expr ')' if-body [else (stmt|block) ]
+if-body: stmt | block (elif '(' expr ')' if-body)*
+
+for-stmt: for '(' assignment (, assignment)* ';' expr ';' assignment ')' for-body
+for-body: stmt|block
+
+block:  '{' [stmts] '}'
+
+/********* Expressions *********/
+expr: expr binop expr | fun_call | unaryop expr | ID | literal
+fun: ID | sqrt | avg[s] | count | deltas | distinct 
+  | first | last | max[s] | min[s] | next
+  | prev | sum[s] | ratios | <... To be added> 
+fun_call: fun '(' expr (, expr)* ')'
+binop: +|-|=|*|+=|-=|*=|/=|!=|<|>|>=|<=| and | or
+unaryop: +|-| not
+literal:  numbers | strings | booleans
+
+```
 ## Data Types
 - String Types: `STRING` and `TEXT` are variable-length strings with unlimited length. `VARCHAR(n)` is for strings with upper-bound limits.
 - Integer Types: `INT` and `INTEGER` are 32-bit integers, `SMALLINT` is for 16-bit integers, `TINYINT` is for 8-bit integers and `BIGINT` is 64-bit integers. On Linux and macOS, `HGEINT` is 128-bit integers. 
 - Floating-Point Types: `REAL` denotes 32-bit floating point numbers while `DOUBLE` denotes 64-bit floating point numbers. 
 - Temporal Types: `DATE` only supports the format of `yyyy-mm-dd`, and `TIME` uses 24-hour format and has the form of `hh:mm:ss:ms` the milliseconds part can range from 0 to 999, `TIMESTAMP` has the format of `yyyy-mm-dd hh:mm:ss:ms`. When importing data from CSV files, please make sure the spreadsheet software (if they were used) doesn't change the format of the date and timestamp by double-checking the file with a plain-text editor.
-- Boolean Type: `BOOLEAN` is a boolean type with values `TRUE` and `FALSE`.
+- Boolean Type: `BOOLEAN` or `BOOL` is a boolean type with values `TRUE` and `FALSE`.
 
+## Create Table
+Tables can be created using `CREATE TABLE` statement. For example
+```
+CREATE TABLE my_table (c1 INT, c2 INT, c3 STRING)
+INSERT INTO my_table VALUES(10, 20, "example")
+INSERT INTO my_table SELECT * FROM my_table
+```
+You can also create tables using a query. For example:
+```
+CREATE TABLE my_table_derived
+AS
+  SELECT c1, c2 * 2 as twice_c2 FROM my_table
+```
+## Drop Table:
+Tables can be dropped using `DROP TABLE` statement. For example:
+```
+DROP TABLE my_table IF EXISTS
+```
 ## Load Data:
 - Use query like `LOAD DATA INFILE <filename> INTO <table_name> [OPTIONS <options>]`
 - File name is the relative path to the AQuery root directory (where prompy.py resides)
 - File name can also be absolute path.
 - See `data/q1.sql` for more information 
+
+## Built-in functions: 
+- `avg[s]`: average of a column. `avgs(col), avgs(w, col)` is rolling and moving average with window `w` of the column `col`.
+- `sum[s]`, `max[s]`, `min[s]`: similar to `avg[s]`
+- `ratios(w = 1, col)`: moving ratio of a column, e.g. `ratios(w, col)[i]=col[i-w]/col[i]`. Window `w` has default value of 1.  
+- `next(col), prev(col)`: moving column back and forth by 1, e.g. `next(col)[i] = col[i+1]`.
+- `first(col), last(col)`: first and last value of a column, i.e. `first(col)= col[0]`, `last(col) = col[n-1]`.
+- `sqrt(x), trunc(x), and other builtin math functions`: value-wise math operations. `sqrt(x)[i] = sqrt(x[i])`
+- `pack(cols, ...)`: pack multiple columns into a single column. 
+
 # Architecture 
 ![Architecture](./docs/arch-hybrid.svg)
 
diff --git a/aquery_config.py b/aquery_config.py
index cdff3b7..2d5939b 100644
--- a/aquery_config.py
+++ b/aquery_config.py
@@ -2,7 +2,7 @@
 
 ## GLOBAL CONFIGURATION FLAGS
 
-version_string = '0.4.9a'
+version_string = '0.5.0a'
 add_path_to_ldpath = True
 rebuild_backend = False
 run_backend = True
diff --git a/build.py b/build.py
index 8cd4b91..d817dc8 100644
--- a/build.py
+++ b/build.py
@@ -16,6 +16,7 @@ class checksums:
     server : Optional[Union[bytes, bool]] = None
     sources : Optional[Union[Dict[str, bytes], bool]] = None
     env : str = ''
+    
     def calc(self, compiler_name, libaquery_a = 'libaquery.a' , 
                 pch_hpp_gch = 'server/pch.hpp.gch', 
                 server = 'server.so'
@@ -24,7 +25,8 @@ class checksums:
         self.env = (aquery_config.os_platform +
                     machine() + 
                     aquery_config.build_driver + 
-                    compiler_name
+                    compiler_name + 
+                    aquery_config.version_string
                 )
         for key in self.__dict__.keys():
             try:
diff --git a/engine/types.py b/engine/types.py
index 5a56e12..3e217a3 100644
--- a/engine/types.py
+++ b/engine/types.py
@@ -295,7 +295,7 @@ opadd = OperatorBase('add', 2, auto_extension, cname = '+', sqlname = '+', call
 # monetdb wont extend int division to fp type
 # opdiv = OperatorBase('div', 2, fp(auto_extension), cname = '/', sqlname = '/', call = binary_op_behavior)
 opdiv = OperatorBase('div', 2, auto_extension, cname = '/', sqlname = '/', call = binary_op_behavior)
-opmul = OperatorBase('mul', 2, fp(auto_extension), cname = '*', sqlname = '*', call = binary_op_behavior)
+opmul = OperatorBase('mul', 2, auto_extension, cname = '*', sqlname = '*', call = binary_op_behavior)
 opsub = OperatorBase('sub', 2, auto_extension, cname = '-', sqlname = '-', call = binary_op_behavior)
 opmod = OperatorBase('mod', 2, auto_extension_int, cname = '%', sqlname = '%', call = binary_op_behavior)
 opneg = OperatorBase('neg', 1, as_is, cname = '-', sqlname = '-', call = unary_op_behavior)
@@ -367,4 +367,7 @@ builtin_operators : Dict[str, OperatorBase] = {**builtin_binary_arith, **builtin
     **builtin_unary_arith, **builtin_unary_logical, **builtin_unary_special, **builtin_func, **builtin_cstdlib, 
     **user_module_func}
 
-type_table = {**builtin_types, **type_table}
\ No newline at end of file
+type_table = {**builtin_types, **type_table}
+
+# Additional Aliases for type names
+type_table['boolean'] = BoolT
diff --git a/reconstruct/ast.py b/reconstruct/ast.py
index b8228c1..66342df 100644
--- a/reconstruct/ast.py
+++ b/reconstruct/ast.py
@@ -262,6 +262,9 @@ class projection(ast_node):
         if self.col_ext or self.group_node and self.group_node.use_sp_gb:
             self.has_postproc = True
         
+        if self.group_node and self.group_node.use_sp_gb :
+            self.group_node.dedicated_glist
+            ...
         o = self.assumptions
         if 'orderby' in node:
             o.extend(enlist(node['orderby']))
@@ -433,7 +436,7 @@ class orderby(ast_node):
                 o_str += ' ' + 'DESC'
             o_list.append(o_str)
         self.add(', '.join(o_list))
-            
+
 
 class scan(ast_node):
     class Position(Enum):
@@ -622,6 +625,7 @@ class groupby(ast_node):
         o_list = []
         self.refs = set()
         self.gb_cols = set()
+        # dedicated_glist -> cols populated for special group by
         self.dedicated_glist : List[Tuple[expr, Set[ColRef]]] = []
         self.use_sp_gb = False
         for g in node:
diff --git a/server/table.h b/server/table.h
index 782397e..da75cc1 100644
--- a/server/table.h
+++ b/server/table.h
@@ -859,7 +859,6 @@ VT<bool> operator >(const T2& lhs, const VT<T1>& rhs) {
 }
 
 
-
 template <class ...Types>
 void print(const TableInfo<Types...>& v, const char* delimiter = " ", const char* endline = "\n") {
 	v.print(delimiter, endline);
diff --git a/tests/q4.a b/tests/q4.a
index d38a246..8e9e626 100644
--- a/tests/q4.a
+++ b/tests/q4.a
@@ -23,4 +23,17 @@ CREATE TABLE ticks2(ID VARCHAR(20), max REAL, min REAL)
 INSERT INTO ticks2 SELECT ID AS ID, max(ratios(endofdayprice)) AS max, min(ratios(endofdayprice)) AS min from ticks  group by ID;
 
 SELECT ID, max, min
-FROM ticks2;
\ No newline at end of file
+FROM ticks2;
+
+CREATE TABLE my_table (c1 INT, c2 INT, c3 STRING)
+INSERT INTO my_table VALUES(10, 20, "example")
+select * from my_table;
+INSERT INTO my_table SELECT * FROM my_table
+select * from my_table;
+SELECT c1, c2 + c2 as twice_c2 FROM my_table;
+
+CREATE TABLE my_table_derived
+AS
+  SELECT c1, c2 + c2 as twice_c2 FROM my_table;
+SELECT * FROM my_table_derived;
+