From ba21da23a340fbd3d97367243b9e06c7fde6165a Mon Sep 17 00:00:00 2001 From: Bill Date: Fri, 28 Oct 2022 16:38:02 +0800 Subject: [PATCH] bug fixes, more documentations --- README.md | 97 +++++++++++++++++++++++++++++++++++++++++++++- aquery_config.py | 2 +- build.py | 4 +- engine/types.py | 7 +++- reconstruct/ast.py | 6 ++- server/table.h | 1 - tests/q4.a | 15 ++++++- 7 files changed, 124 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index fbc1076..fc06b4f 100644 --- a/README.md +++ b/README.md @@ -125,18 +125,113 @@ See files in ./tests/ for more examples. - See `test.aquery` as an example # User Manual +AQuery++ has similar syntax to standard SQL with extensions for time-series analysis and user extensibility. +## Basic Grammar +``` +program : [query | create | insert | load | udf ]* + +/********* Queries *********/ +query : [WITH ID ['('columns')'] AS '(' single-query ')'] single-query + +single-query : SELECT projections FROM datasource assumption where-clause groupby-clause + +projections: [val as ID | val] (, [val as ID | val])* + +datasource : ID [ID | AS ID] | + ID, datasource | + ID [INNER] JOIN datasource [USING columns | ON conditions] | + ID NATURAL JOIN datasource + +order-clause: ASSUMING ([ASC|DESC] ID)+ + +where-clause: WHERE conditions; + +groupby-clause: GROUP BY expr (, expr )* [HAVING conditions] + +conditions: + +/********* Creating data *********/ +create: CREATE TABLE ID [AS query | '(' schema ')'] +schema: ID type (, ID type)* + +insert: INSERT INTO ID [query | VALUES '(' literals ')'] +literals: literal (, literal)*; + +/********* Loading/Saving data *********/ +load: LOAD DATA INFILE string INTO TABLE ID FIELDS TERMINATED BY string + +save: query INTO OUTFILE string FIELDS TERMINATED BY string + +/********* User defined functions *********/ +udf: FUNCTION ID '(' arg-list ')' '{' fun-body '}' +arg_list: ID (, ID)* +fun_body: [stmts] expr +/********* See more udf grammar later. **********/ + +stmts: stmt+ +stmt: assignment; | if-stmt | for-stmt | ; +assignment: l_value := expr +l_value: ID | ID '[' ID ']' + +if-stmt: if '(' expr ')' if-body [else (stmt|block) ] +if-body: stmt | block (elif '(' expr ')' if-body)* + +for-stmt: for '(' assignment (, assignment)* ';' expr ';' assignment ')' for-body +for-body: stmt|block + +block: '{' [stmts] '}' + +/********* Expressions *********/ +expr: expr binop expr | fun_call | unaryop expr | ID | literal +fun: ID | sqrt | avg[s] | count | deltas | distinct + | first | last | max[s] | min[s] | next + | prev | sum[s] | ratios | <... To be added> +fun_call: fun '(' expr (, expr)* ')' +binop: +|-|=|*|+=|-=|*=|/=|!=|<|>|>=|<=| and | or +unaryop: +|-| not +literal: numbers | strings | booleans + +``` ## Data Types - String Types: `STRING` and `TEXT` are variable-length strings with unlimited length. `VARCHAR(n)` is for strings with upper-bound limits. - Integer Types: `INT` and `INTEGER` are 32-bit integers, `SMALLINT` is for 16-bit integers, `TINYINT` is for 8-bit integers and `BIGINT` is 64-bit integers. On Linux and macOS, `HGEINT` is 128-bit integers. - Floating-Point Types: `REAL` denotes 32-bit floating point numbers while `DOUBLE` denotes 64-bit floating point numbers. - Temporal Types: `DATE` only supports the format of `yyyy-mm-dd`, and `TIME` uses 24-hour format and has the form of `hh:mm:ss:ms` the milliseconds part can range from 0 to 999, `TIMESTAMP` has the format of `yyyy-mm-dd hh:mm:ss:ms`. When importing data from CSV files, please make sure the spreadsheet software (if they were used) doesn't change the format of the date and timestamp by double-checking the file with a plain-text editor. -- Boolean Type: `BOOLEAN` is a boolean type with values `TRUE` and `FALSE`. +- Boolean Type: `BOOLEAN` or `BOOL` is a boolean type with values `TRUE` and `FALSE`. +## Create Table +Tables can be created using `CREATE TABLE` statement. For example +``` +CREATE TABLE my_table (c1 INT, c2 INT, c3 STRING) +INSERT INTO my_table VALUES(10, 20, "example") +INSERT INTO my_table SELECT * FROM my_table +``` +You can also create tables using a query. For example: +``` +CREATE TABLE my_table_derived +AS + SELECT c1, c2 * 2 as twice_c2 FROM my_table +``` +## Drop Table: +Tables can be dropped using `DROP TABLE` statement. For example: +``` +DROP TABLE my_table IF EXISTS +``` ## Load Data: - Use query like `LOAD DATA INFILE INTO [OPTIONS ]` - File name is the relative path to the AQuery root directory (where prompy.py resides) - File name can also be absolute path. - See `data/q1.sql` for more information + +## Built-in functions: +- `avg[s]`: average of a column. `avgs(col), avgs(w, col)` is rolling and moving average with window `w` of the column `col`. +- `sum[s]`, `max[s]`, `min[s]`: similar to `avg[s]` +- `ratios(w = 1, col)`: moving ratio of a column, e.g. `ratios(w, col)[i]=col[i-w]/col[i]`. Window `w` has default value of 1. +- `next(col), prev(col)`: moving column back and forth by 1, e.g. `next(col)[i] = col[i+1]`. +- `first(col), last(col)`: first and last value of a column, i.e. `first(col)= col[0]`, `last(col) = col[n-1]`. +- `sqrt(x), trunc(x), and other builtin math functions`: value-wise math operations. `sqrt(x)[i] = sqrt(x[i])` +- `pack(cols, ...)`: pack multiple columns into a single column. + # Architecture ![Architecture](./docs/arch-hybrid.svg) diff --git a/aquery_config.py b/aquery_config.py index cdff3b7..2d5939b 100644 --- a/aquery_config.py +++ b/aquery_config.py @@ -2,7 +2,7 @@ ## GLOBAL CONFIGURATION FLAGS -version_string = '0.4.9a' +version_string = '0.5.0a' add_path_to_ldpath = True rebuild_backend = False run_backend = True diff --git a/build.py b/build.py index 8cd4b91..d817dc8 100644 --- a/build.py +++ b/build.py @@ -16,6 +16,7 @@ class checksums: server : Optional[Union[bytes, bool]] = None sources : Optional[Union[Dict[str, bytes], bool]] = None env : str = '' + def calc(self, compiler_name, libaquery_a = 'libaquery.a' , pch_hpp_gch = 'server/pch.hpp.gch', server = 'server.so' @@ -24,7 +25,8 @@ class checksums: self.env = (aquery_config.os_platform + machine() + aquery_config.build_driver + - compiler_name + compiler_name + + aquery_config.version_string ) for key in self.__dict__.keys(): try: diff --git a/engine/types.py b/engine/types.py index 5a56e12..3e217a3 100644 --- a/engine/types.py +++ b/engine/types.py @@ -295,7 +295,7 @@ opadd = OperatorBase('add', 2, auto_extension, cname = '+', sqlname = '+', call # monetdb wont extend int division to fp type # opdiv = OperatorBase('div', 2, fp(auto_extension), cname = '/', sqlname = '/', call = binary_op_behavior) opdiv = OperatorBase('div', 2, auto_extension, cname = '/', sqlname = '/', call = binary_op_behavior) -opmul = OperatorBase('mul', 2, fp(auto_extension), cname = '*', sqlname = '*', call = binary_op_behavior) +opmul = OperatorBase('mul', 2, auto_extension, cname = '*', sqlname = '*', call = binary_op_behavior) opsub = OperatorBase('sub', 2, auto_extension, cname = '-', sqlname = '-', call = binary_op_behavior) opmod = OperatorBase('mod', 2, auto_extension_int, cname = '%', sqlname = '%', call = binary_op_behavior) opneg = OperatorBase('neg', 1, as_is, cname = '-', sqlname = '-', call = unary_op_behavior) @@ -367,4 +367,7 @@ builtin_operators : Dict[str, OperatorBase] = {**builtin_binary_arith, **builtin **builtin_unary_arith, **builtin_unary_logical, **builtin_unary_special, **builtin_func, **builtin_cstdlib, **user_module_func} -type_table = {**builtin_types, **type_table} \ No newline at end of file +type_table = {**builtin_types, **type_table} + +# Additional Aliases for type names +type_table['boolean'] = BoolT diff --git a/reconstruct/ast.py b/reconstruct/ast.py index b8228c1..66342df 100644 --- a/reconstruct/ast.py +++ b/reconstruct/ast.py @@ -262,6 +262,9 @@ class projection(ast_node): if self.col_ext or self.group_node and self.group_node.use_sp_gb: self.has_postproc = True + if self.group_node and self.group_node.use_sp_gb : + self.group_node.dedicated_glist + ... o = self.assumptions if 'orderby' in node: o.extend(enlist(node['orderby'])) @@ -433,7 +436,7 @@ class orderby(ast_node): o_str += ' ' + 'DESC' o_list.append(o_str) self.add(', '.join(o_list)) - + class scan(ast_node): class Position(Enum): @@ -622,6 +625,7 @@ class groupby(ast_node): o_list = [] self.refs = set() self.gb_cols = set() + # dedicated_glist -> cols populated for special group by self.dedicated_glist : List[Tuple[expr, Set[ColRef]]] = [] self.use_sp_gb = False for g in node: diff --git a/server/table.h b/server/table.h index 782397e..da75cc1 100644 --- a/server/table.h +++ b/server/table.h @@ -859,7 +859,6 @@ VT operator >(const T2& lhs, const VT& rhs) { } - template void print(const TableInfo& v, const char* delimiter = " ", const char* endline = "\n") { v.print(delimiter, endline); diff --git a/tests/q4.a b/tests/q4.a index d38a246..8e9e626 100644 --- a/tests/q4.a +++ b/tests/q4.a @@ -23,4 +23,17 @@ CREATE TABLE ticks2(ID VARCHAR(20), max REAL, min REAL) INSERT INTO ticks2 SELECT ID AS ID, max(ratios(endofdayprice)) AS max, min(ratios(endofdayprice)) AS min from ticks group by ID; SELECT ID, max, min -FROM ticks2; \ No newline at end of file +FROM ticks2; + +CREATE TABLE my_table (c1 INT, c2 INT, c3 STRING) +INSERT INTO my_table VALUES(10, 20, "example") +select * from my_table; +INSERT INTO my_table SELECT * FROM my_table +select * from my_table; +SELECT c1, c2 + c2 as twice_c2 FROM my_table; + +CREATE TABLE my_table_derived +AS + SELECT c1, c2 + c2 as twice_c2 FROM my_table; +SELECT * FROM my_table_derived; +