bug fixes and clarification

3 years ago · dda68bd9e1
parent 80d2cdb36e
commit dda68bd9e1
9 changed files with 70 additions and 17 deletions
--- a/README.md
+++ b/README.md
@ -114,6 +114,20 @@ See ./tests/ for more examples.
 - A series of commands can be put in a script file and execute using `script` command.
 - Can be executed using `script` command
 - See `test.aquery` as an example
+
+# User Manual
+## Data Types
+- String Types: `STRING` and `TEXT` are variable-length strings with unlimited length. `VARCHAR(n)` is for strings with upper-bound limits.
+- Integer Types: `INT` and `INTEGER` are 32-bit integers, `SMALLINT` is for 16-bit integers, `TINYINT` is for 8-bit integers and `BIGINT` is 64-bit integers. On Linux and macOS, `HGEINT` is 128-bit integers. 
+- Floating-Point Types: `REAL` denotes 32-bit floating point numbers while `DOUBLE` denotes 64-bit floating point numbers. 
+- Temporal Types: `DATE` only supports the format of `yyyy-mm-dd`, and `TIME` uses 24-hour format and has the form of `hh:mm:ss:ms` the milliseconds part can range from 0 to 999, `TIMESTAMP` has the format of `yyyy-mm-dd hh:mm:ss:ms`. When importing data from CSV files, please make sure the spreadsheet software (if they were used) doesn't change the format of the date and timestamp by double-checking the file with a plain-text editor.
+- Boolean Type: `BOOLEAN` is a boolean type with values `TRUE` and `FALSE`.
+
+## Load Data:
+- Use query like `LOAD DATA INFILE <filename> INTO <table_name> [OPTIONS <options>]`
+- File name is the relative path to the AQuery root directory (where prompy.py resides)
+- File name can also be absolute path.
+- See `data/q1.sql` for more information 
 # Architecture 
 ![Architecture](./docs/arch-hybrid.svg)

@ -123,8 +137,8 @@ See ./tests/ for more examples.
 - Backend of AQuery++ Compiler generates target code dependent on the Execution Engine. It can either be the C++ code for AQuery Execution Engine or sql and C++ post-processor for Hybrid Engine or k9 for the k9 Engine.
 ## Execution Engines
 - AQuery++ supports different execution engines thanks to the decoupled compiler structure.
- AQuery Execution Engine: executes queries by compiling the query plan to C++ code. Doesn't support joins and udf functions. 
 - Hybrid Execution Engine: decouples the query into two parts. The sql-compliant part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed.
+- AQuery Execution Engine: executes queries by compiling the query plan to C++ code. Doesn't support joins and udf functions. 
 - K9 Execution Engine: (discontinued).
  
 # Roadmap
--- a/prompt.py
+++ b/prompt.py
@ -390,6 +390,8 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
                print('stdin inreadable, Exiting...')
                exit(0)
            q = og_q.lower().strip()
+            if (not re.sub(r'[ \r\n\t;]', '', q)):
+                continue
            if False and q == 'exec': # generate build and run (AQuery Engine)
                state.cfg.backend_type = Backend_Type.BACKEND_AQuery.value
                cxt = engine.exec(state.stmts, cxt, keep)
@ -483,7 +485,7 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
                continue
            elif q == 'format' or q == 'fmt':
                subprocess.call(['clang-format', 'out.cpp'])
-            elif q == 'exit' or q == 'exit()':
+            elif q == 'exit' or q == 'exit()' or q == 'quit' or q == 'quit()' or q == '\\q':
                rm(state)
                exit()
            elif q == 'r': # build and run
@ -553,7 +555,7 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
                state.stmts = parser.parse(contents)
                state.currstats.parse_time = state.currstats.stop()
                continue
-            state.stmts = parser.parse(q)
+            state.stmts = parser.parse(og_q.strip())
            cxt.Info(state.stmts)
            state.currstats.parse_time = state.currstats.stop()
        except ParseException as e:
--- a/reconstruct/ast.py
+++ b/reconstruct/ast.py
@ -144,14 +144,20 @@ class projection(ast_node):
                    alias = proj['name']
                        
                if not proj_expr.is_special:
-                    if proj_expr.node == '*':
+                    if str(proj_expr.node).strip().endswith('*'):
+                        _datasource = self.datasource
+                        if '.' in proj_expr.node:
+                            tbl = proj_expr.node.split('.')[0]
+                            if tbl in self.datasource.tables_dir:
+                                _datasource = self.datasource.tables_dir[tbl]
+                        _datasource = _datasource.all_cols(ordered = True, stripped = True)
                        name = [(c.get_name()
                                 if self.datasource.single_table
                                 else c.get_full_name()
-                                 ) for c in self.datasource.rec]
-                        this_type = [c.type for c in self.datasource.rec]
-                        compound = [c.compound for c in self.datasource.rec]
-                        proj_expr = [expr(self, c.name) for c in self.datasource.rec]
+                                 ) for c in _datasource]
+                        this_type = [c.type for c in _datasource]
+                        compound = [c.compound for c in _datasource]
+                        proj_expr = [expr(self, c.name) for c in _datasource]
                    else:
                        y = lambda x:x
                        count = lambda : 'count(*)'
@ -185,7 +191,7 @@ class projection(ast_node):
                this_type = enlist(this_type)
                
            elif type(proj) is str:
-                col = self.datasource.get_col(proj)
+                col = self.datasource.get_cols(proj)
                this_type = col.type
                disp_name = proj
                print('Unknown behavior:', proj, 'is str')    
@ -620,6 +626,15 @@ class join(ast_node):
            joint_cols |= self.joint_cols.get(col, set())
        return joint_cols

+    def strip_joint_cols(self, cols : Set[ColRef]):
+        stripped = type(cols)(cols)
+        for c in stripped:
+            jc = self.get_joint_cols([c])
+            for j in jc:
+                if j != c and j in stripped:
+                    stripped.remove(j)
+        return stripped
+    
    def init(self, _):
        self.joins : List[join] = []
        self.tables : List[TableInfo] = []
@ -724,6 +739,8 @@ class join(ast_node):
                print(f'Error: table {node} not found.')
    
    def get_cols(self, colExpr: str) -> Optional[ColRef]:
+        if colExpr == '*':
+            return self.all_cols(ordered = True, stripped = True)
        for t in self.tables:
            if colExpr in t.columns_byname:
                col = t.columns_byname[colExpr]
@ -751,13 +768,16 @@ class join(ast_node):
        return len(self.tables) == 1
    
 #    @property
-    def all_cols(self):
-        ret = set()
+    def all_cols(self, ordered = False, stripped = True):
+        from ordered_set import OrderedSet
+        ret = OrderedSet() if ordered else set()
        for table in self.tables:
            rec = table.rec
            table.rec = self.rec
-            ret.update(table.all_cols())
+            ret.update(table.all_cols(ordered = ordered))
            table.rec = rec
+        if stripped:
+            return self.strip_joint_cols(ret)
        return ret
    
    # TODO: join condition awareness
--- a/reconstruct/expr.py
+++ b/reconstruct/expr.py
@ -257,7 +257,7 @@ class expr(ast_node):
                    if (node == '*' and 
                        not (type(self.parent) is expr 
                             and 'count' in self.parent.node)):
-                        self.datasource.all_cols()
+                        self.datasource.all_cols(ordered = True)
                    else:
                        self.raw_col = self.datasource.parse_col_names(node)
                        self.raw_col = self.raw_col if type(self.raw_col) is ColRef else None
--- a/reconstruct/storage.py
+++ b/reconstruct/storage.py
@ -108,10 +108,12 @@ class TableInfo:
            else:
                return datasource.parse_col_names(parsedColExpr[1])
    
-    def all_cols(self):
+    def all_cols(self, ordered = False):
+        from ordered_set import OrderedSet
+        _ret_set_t = OrderedSet if ordered else set
        if type(self.rec) is set:
            self.rec.update(self.columns)
-        return set(self.columns)
+        return _ret_set_t(self.columns)
            
    @property
    def single_table(self):
--- a/requirements.txt
+++ b/requirements.txt
@ -2,6 +2,7 @@ mo-future==6.2.21303
 mo-dots==9.173.22126
 mo-parsing==8.183.22158
 mo-imports==7.169.22121
+ordered-set
 dataclasses; python_version < '3.7'
 vswhere; sys_platform == 'win32'
 numpy
--- a/test.aquery
+++ b/test.aquery
@ -2,7 +2,7 @@

 # stats on 

-select "hello world"
+select "Hello, World!"
 xexec

 echo Testing Insert, Filters and Nested Aggregation
--- a/tests/joins.a
+++ b/tests/joins.a
@ -32,3 +32,5 @@ FIELDS TERMINATED BY ","

 select sd(a) + sales from tt, sale1 where tt.a = sale1.Mont

+select * from tt, sale1 where tt.a = sale1.Mont
+
--- a/tests/q4.a
+++ b/tests/q4.a
@ -0,0 +1,12 @@
+create table ticks(id varchar(20), timestamp int, tradeDate date, price int)
+
+load data infile "data/stocksym_price.csv"
+into table ticks fields terminated by ","
+
+-- select max(price - mins(price))
+-- from ticks assuming asc timestamp
+-- where ID = "S" and tradeDate= '01-10-22';
+
+select max(price - mins(price))
+from ticks assuming asc timestamp
+where ID = "S" and tradeDate= '2022-10-01';