bug fixes and clarification

dev
Bill 2 years ago
parent 80d2cdb36e
commit dda68bd9e1

@ -114,6 +114,20 @@ See ./tests/ for more examples.
- A series of commands can be put in a script file and execute using `script` command.
- Can be executed using `script` command
- See `test.aquery` as an example
# User Manual
## Data Types
- String Types: `STRING` and `TEXT` are variable-length strings with unlimited length. `VARCHAR(n)` is for strings with upper-bound limits.
- Integer Types: `INT` and `INTEGER` are 32-bit integers, `SMALLINT` is for 16-bit integers, `TINYINT` is for 8-bit integers and `BIGINT` is 64-bit integers. On Linux and macOS, `HGEINT` is 128-bit integers.
- Floating-Point Types: `REAL` denotes 32-bit floating point numbers while `DOUBLE` denotes 64-bit floating point numbers.
- Temporal Types: `DATE` only supports the format of `yyyy-mm-dd`, and `TIME` uses 24-hour format and has the form of `hh:mm:ss:ms` the milliseconds part can range from 0 to 999, `TIMESTAMP` has the format of `yyyy-mm-dd hh:mm:ss:ms`. When importing data from CSV files, please make sure the spreadsheet software (if they were used) doesn't change the format of the date and timestamp by double-checking the file with a plain-text editor.
- Boolean Type: `BOOLEAN` is a boolean type with values `TRUE` and `FALSE`.
## Load Data:
- Use query like `LOAD DATA INFILE <filename> INTO <table_name> [OPTIONS <options>]`
- File name is the relative path to the AQuery root directory (where prompy.py resides)
- File name can also be absolute path.
- See `data/q1.sql` for more information
# Architecture
![Architecture](./docs/arch-hybrid.svg)
@ -123,8 +137,8 @@ See ./tests/ for more examples.
- Backend of AQuery++ Compiler generates target code dependent on the Execution Engine. It can either be the C++ code for AQuery Execution Engine or sql and C++ post-processor for Hybrid Engine or k9 for the k9 Engine.
## Execution Engines
- AQuery++ supports different execution engines thanks to the decoupled compiler structure.
- AQuery Execution Engine: executes queries by compiling the query plan to C++ code. Doesn't support joins and udf functions.
- Hybrid Execution Engine: decouples the query into two parts. The sql-compliant part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed.
- AQuery Execution Engine: executes queries by compiling the query plan to C++ code. Doesn't support joins and udf functions.
- K9 Execution Engine: (discontinued).
# Roadmap

@ -390,6 +390,8 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
print('stdin inreadable, Exiting...')
exit(0)
q = og_q.lower().strip()
if (not re.sub(r'[ \r\n\t;]', '', q)):
continue
if False and q == 'exec': # generate build and run (AQuery Engine)
state.cfg.backend_type = Backend_Type.BACKEND_AQuery.value
cxt = engine.exec(state.stmts, cxt, keep)
@ -483,7 +485,7 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
continue
elif q == 'format' or q == 'fmt':
subprocess.call(['clang-format', 'out.cpp'])
elif q == 'exit' or q == 'exit()':
elif q == 'exit' or q == 'exit()' or q == 'quit' or q == 'quit()' or q == '\\q':
rm(state)
exit()
elif q == 'r': # build and run
@ -553,7 +555,7 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
state.stmts = parser.parse(contents)
state.currstats.parse_time = state.currstats.stop()
continue
state.stmts = parser.parse(q)
state.stmts = parser.parse(og_q.strip())
cxt.Info(state.stmts)
state.currstats.parse_time = state.currstats.stop()
except ParseException as e:

@ -144,14 +144,20 @@ class projection(ast_node):
alias = proj['name']
if not proj_expr.is_special:
if proj_expr.node == '*':
if str(proj_expr.node).strip().endswith('*'):
_datasource = self.datasource
if '.' in proj_expr.node:
tbl = proj_expr.node.split('.')[0]
if tbl in self.datasource.tables_dir:
_datasource = self.datasource.tables_dir[tbl]
_datasource = _datasource.all_cols(ordered = True, stripped = True)
name = [(c.get_name()
if self.datasource.single_table
else c.get_full_name()
) for c in self.datasource.rec]
this_type = [c.type for c in self.datasource.rec]
compound = [c.compound for c in self.datasource.rec]
proj_expr = [expr(self, c.name) for c in self.datasource.rec]
) for c in _datasource]
this_type = [c.type for c in _datasource]
compound = [c.compound for c in _datasource]
proj_expr = [expr(self, c.name) for c in _datasource]
else:
y = lambda x:x
count = lambda : 'count(*)'
@ -185,7 +191,7 @@ class projection(ast_node):
this_type = enlist(this_type)
elif type(proj) is str:
col = self.datasource.get_col(proj)
col = self.datasource.get_cols(proj)
this_type = col.type
disp_name = proj
print('Unknown behavior:', proj, 'is str')
@ -620,6 +626,15 @@ class join(ast_node):
joint_cols |= self.joint_cols.get(col, set())
return joint_cols
def strip_joint_cols(self, cols : Set[ColRef]):
stripped = type(cols)(cols)
for c in stripped:
jc = self.get_joint_cols([c])
for j in jc:
if j != c and j in stripped:
stripped.remove(j)
return stripped
def init(self, _):
self.joins : List[join] = []
self.tables : List[TableInfo] = []
@ -724,6 +739,8 @@ class join(ast_node):
print(f'Error: table {node} not found.')
def get_cols(self, colExpr: str) -> Optional[ColRef]:
if colExpr == '*':
return self.all_cols(ordered = True, stripped = True)
for t in self.tables:
if colExpr in t.columns_byname:
col = t.columns_byname[colExpr]
@ -751,13 +768,16 @@ class join(ast_node):
return len(self.tables) == 1
# @property
def all_cols(self):
ret = set()
def all_cols(self, ordered = False, stripped = True):
from ordered_set import OrderedSet
ret = OrderedSet() if ordered else set()
for table in self.tables:
rec = table.rec
table.rec = self.rec
ret.update(table.all_cols())
ret.update(table.all_cols(ordered = ordered))
table.rec = rec
if stripped:
return self.strip_joint_cols(ret)
return ret
# TODO: join condition awareness

@ -257,7 +257,7 @@ class expr(ast_node):
if (node == '*' and
not (type(self.parent) is expr
and 'count' in self.parent.node)):
self.datasource.all_cols()
self.datasource.all_cols(ordered = True)
else:
self.raw_col = self.datasource.parse_col_names(node)
self.raw_col = self.raw_col if type(self.raw_col) is ColRef else None

@ -108,10 +108,12 @@ class TableInfo:
else:
return datasource.parse_col_names(parsedColExpr[1])
def all_cols(self):
def all_cols(self, ordered = False):
from ordered_set import OrderedSet
_ret_set_t = OrderedSet if ordered else set
if type(self.rec) is set:
self.rec.update(self.columns)
return set(self.columns)
return _ret_set_t(self.columns)
@property
def single_table(self):

@ -2,6 +2,7 @@ mo-future==6.2.21303
mo-dots==9.173.22126
mo-parsing==8.183.22158
mo-imports==7.169.22121
ordered-set
dataclasses; python_version < '3.7'
vswhere; sys_platform == 'win32'
numpy

@ -2,7 +2,7 @@
# stats on
select "hello world"
select "Hello, World!"
xexec
echo Testing Insert, Filters and Nested Aggregation

@ -32,3 +32,5 @@ FIELDS TERMINATED BY ","
select sd(a) + sales from tt, sale1 where tt.a = sale1.Mont
select * from tt, sale1 where tt.a = sale1.Mont

@ -0,0 +1,12 @@
create table ticks(id varchar(20), timestamp int, tradeDate date, price int)
load data infile "data/stocksym_price.csv"
into table ticks fields terminated by ","
-- select max(price - mins(price))
-- from ticks assuming asc timestamp
-- where ID = "S" and tradeDate= '01-10-22';
select max(price - mins(price))
from ticks assuming asc timestamp
where ID = "S" and tradeDate= '2022-10-01';
Loading…
Cancel
Save