bug fixes and clarification

4 years ago · dda68bd9e1
parent 80d2cdb36e
commit dda68bd9e1
9 changed files with 70 additions and 17 deletions
--- a/README.md
+++ b/README.md
@ -114,6 +114,20 @@ See ./tests/ for more examples.
 - A series of commands can be put in a script file and execute using `script` command.
 - Can be executed using `script` command
 - See `test.aquery` as an example
 # User Manual
 ## Data Types
 - String Types: `STRING` and `TEXT` are variable-length strings with unlimited length. `VARCHAR(n)` is for strings with upper-bound limits.
 - Integer Types: `INT` and `INTEGER` are 32-bit integers, `SMALLINT` is for 16-bit integers, `TINYINT` is for 8-bit integers and `BIGINT` is 64-bit integers. On Linux and macOS, `HGEINT` is 128-bit integers. 
 - Floating-Point Types: `REAL` denotes 32-bit floating point numbers while `DOUBLE` denotes 64-bit floating point numbers. 
 - Temporal Types: `DATE` only supports the format of `yyyy-mm-dd`, and `TIME` uses 24-hour format and has the form of `hh:mm:ss:ms` the milliseconds part can range from 0 to 999, `TIMESTAMP` has the format of `yyyy-mm-dd hh:mm:ss:ms`. When importing data from CSV files, please make sure the spreadsheet software (if they were used) doesn't change the format of the date and timestamp by double-checking the file with a plain-text editor.
 - Boolean Type: `BOOLEAN` is a boolean type with values `TRUE` and `FALSE`.
 ## Load Data:
 - Use query like `LOAD DATA INFILE <filename> INTO <table_name> [OPTIONS <options>]`
 - File name is the relative path to the AQuery root directory (where prompy.py resides)
 - File name can also be absolute path.
 - See `data/q1.sql` for more information 
 # Architecture 
 ![Architecture](./docs/arch-hybrid.svg)
@ -123,8 +137,8 @@ See ./tests/ for more examples.
 - Backend of AQuery++ Compiler generates target code dependent on the Execution Engine. It can either be the C++ code for AQuery Execution Engine or sql and C++ post-processor for Hybrid Engine or k9 for the k9 Engine.
 ## Execution Engines
 - AQuery++ supports different execution engines thanks to the decoupled compiler structure.
 - AQuery Execution Engine: executes queries by compiling the query plan to C++ code. Doesn't support joins and udf functions. 
 - Hybrid Execution Engine: decouples the query into two parts. The sql-compliant part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed.
 - AQuery Execution Engine: executes queries by compiling the query plan to C++ code. Doesn't support joins and udf functions. 
 - K9 Execution Engine: (discontinued).
 # Roadmap
@ -160,4 +174,4 @@ See ./tests/ for more examples.
 - [x] Functionality: Basic helper functions in aquery 
 - [ ] Bug: Join-Aware Column management
 - [ ] Bug: Order By after Group By
- [ ] Functionality: Having clause
+- [ ] Functionality: Having clause
--- a/prompt.py
+++ b/prompt.py
@ -390,6 +390,8 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
                print('stdin inreadable, Exiting...')
                exit(0)
            q = og_q.lower().strip()
            if (not re.sub(r'[ \r\n\t;]', '', q)):
                continue
            if False and q == 'exec': # generate build and run (AQuery Engine)
                state.cfg.backend_type = Backend_Type.BACKEND_AQuery.value
                cxt = engine.exec(state.stmts, cxt, keep)
@ -483,7 +485,7 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
                continue
            elif q == 'format' or q == 'fmt':
                subprocess.call(['clang-format', 'out.cpp'])
-            elif q == 'exit' or q == 'exit()':
+            elif q == 'exit' or q == 'exit()' or q == 'quit' or q == 'quit()' or q == '\\q':
                rm(state)
                exit()
            elif q == 'r': # build and run
@ -553,7 +555,7 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
                state.stmts = parser.parse(contents)
                state.currstats.parse_time = state.currstats.stop()
                continue
-            state.stmts = parser.parse(q)
+            state.stmts = parser.parse(og_q.strip())
            cxt.Info(state.stmts)
            state.currstats.parse_time = state.currstats.stop()
        except ParseException as e:
--- a/reconstruct/ast.py
+++ b/reconstruct/ast.py
@ -144,14 +144,20 @@ class projection(ast_node):
                    alias = proj['name']
                if not proj_expr.is_special:
-                    if proj_expr.node == '*':
+                    if str(proj_expr.node).strip().endswith('*'):
                        _datasource = self.datasource
                        if '.' in proj_expr.node:
                            tbl = proj_expr.node.split('.')[0]
                            if tbl in self.datasource.tables_dir:
                                _datasource = self.datasource.tables_dir[tbl]
                        _datasource = _datasource.all_cols(ordered = True, stripped = True)
                        name = [(c.get_name()
                                 if self.datasource.single_table
                                 else c.get_full_name()
-                                 ) for c in self.datasource.rec]
+                                 ) for c in _datasource]
-                        this_type = [c.type for c in self.datasource.rec]
+                        this_type = [c.type for c in _datasource]
-                        compound = [c.compound for c in self.datasource.rec]
+                        compound = [c.compound for c in _datasource]
-                        proj_expr = [expr(self, c.name) for c in self.datasource.rec]
+                        proj_expr = [expr(self, c.name) for c in _datasource]
                    else:
                        y = lambda x:x
                        count = lambda : 'count(*)'
@ -185,7 +191,7 @@ class projection(ast_node):
                this_type = enlist(this_type)
            elif type(proj) is str:
-                col = self.datasource.get_col(proj)
+                col = self.datasource.get_cols(proj)
                this_type = col.type
                disp_name = proj
                print('Unknown behavior:', proj, 'is str')    
@ -619,6 +625,15 @@ class join(ast_node):
        for col in cols:
            joint_cols |= self.joint_cols.get(col, set())
        return joint_cols
    def strip_joint_cols(self, cols : Set[ColRef]):
        stripped = type(cols)(cols)
        for c in stripped:
            jc = self.get_joint_cols([c])
            for j in jc:
                if j != c and j in stripped:
                    stripped.remove(j)
        return stripped
    def init(self, _):
        self.joins : List[join] = []
@ -724,6 +739,8 @@ class join(ast_node):
                print(f'Error: table {node} not found.')
    def get_cols(self, colExpr: str) -> Optional[ColRef]:
        if colExpr == '*':
            return self.all_cols(ordered = True, stripped = True)
        for t in self.tables:
            if colExpr in t.columns_byname:
                col = t.columns_byname[colExpr]
@ -751,13 +768,16 @@ class join(ast_node):
        return len(self.tables) == 1
 #    @property
-    def all_cols(self):
+    def all_cols(self, ordered = False, stripped = True):
-        ret = set()
+        from ordered_set import OrderedSet
        ret = OrderedSet() if ordered else set()
        for table in self.tables:
            rec = table.rec
            table.rec = self.rec
-            ret.update(table.all_cols())
+            ret.update(table.all_cols(ordered = ordered))
            table.rec = rec
        if stripped:
            return self.strip_joint_cols(ret)
        return ret
    # TODO: join condition awareness
--- a/reconstruct/expr.py
+++ b/reconstruct/expr.py
@ -257,7 +257,7 @@ class expr(ast_node):
                    if (node == '*' and 
                        not (type(self.parent) is expr 
                             and 'count' in self.parent.node)):
-                        self.datasource.all_cols()
+                        self.datasource.all_cols(ordered = True)
                    else:
                        self.raw_col = self.datasource.parse_col_names(node)
                        self.raw_col = self.raw_col if type(self.raw_col) is ColRef else None
--- a/reconstruct/storage.py
+++ b/reconstruct/storage.py
@ -108,10 +108,12 @@ class TableInfo:
            else:
                return datasource.parse_col_names(parsedColExpr[1])
-    def all_cols(self):
+    def all_cols(self, ordered = False):
        from ordered_set import OrderedSet
        _ret_set_t = OrderedSet if ordered else set
        if type(self.rec) is set:
            self.rec.update(self.columns)
-        return set(self.columns)
+        return _ret_set_t(self.columns)
    @property
    def single_table(self):
--- a/requirements.txt
+++ b/requirements.txt
@ -2,6 +2,7 @@ mo-future==6.2.21303
 mo-dots==9.173.22126
 mo-parsing==8.183.22158
 mo-imports==7.169.22121
 ordered-set
 dataclasses; python_version < '3.7'
 vswhere; sys_platform == 'win32'
 numpy
--- a/test.aquery
+++ b/test.aquery
@ -2,7 +2,7 @@
 # stats on 
-select "hello world"
+select "Hello, World!"
 xexec
 echo Testing Insert, Filters and Nested Aggregation
--- a/tests/joins.a
+++ b/tests/joins.a
@ -32,3 +32,5 @@ FIELDS TERMINATED BY ","
 select sd(a) + sales from tt, sale1 where tt.a = sale1.Mont
 select * from tt, sale1 where tt.a = sale1.Mont
--- a/tests/q4.a
+++ b/tests/q4.a
@ -0,0 +1,12 @@
 create table ticks(id varchar(20), timestamp int, tradeDate date, price int)
 load data infile "data/stocksym_price.csv"
 into table ticks fields terminated by ","
 -- select max(price - mins(price))
 -- from ticks assuming asc timestamp
 -- where ID = "S" and tradeDate= '01-10-22';
 select max(price - mins(price))
 from ticks assuming asc timestamp
 where ID = "S" and tradeDate= '2022-10-01';
`@ -32,3 +32,5 @@ FIELDS TERMINATED BY ","`

	`select sd(a) + sales from tt, sale1 where tt.a = sale1.Mont`	`select sd(a) + sales from tt, sale1 where tt.a = sale1.Mont`

		`select * from tt, sale1 where tt.a = sale1.Mont`