From 98890884dac91e4f044b9f3c8fd35a1e7a729897 Mon Sep 17 00:00:00 2001 From: Bill Date: Fri, 30 Sep 2022 22:34:02 +0800 Subject: [PATCH] updated documentations. --- Makefile | 2 +- README.md | 30 ++++++++++------ cims.sh | 2 ++ prompt.py | 85 +++++++++++++++++++++++++++++++++++++++++---- reconstruct/ast.py | 18 +++++++--- reconstruct/expr.py | 4 ++- sdk/aquery.h | 9 +++-- sdk/aquery_mem.cpp | 4 +++ server/libaquery.h | 2 +- server/types.h | 20 +++++++++++ test.aquery | 4 +++ 11 files changed, 154 insertions(+), 26 deletions(-) diff --git a/Makefile b/Makefile index 30e57bb..43295fa 100644 --- a/Makefile +++ b/Makefile @@ -119,4 +119,4 @@ docker: clean: rm .cached *.shm *.o dll.so server.so server.bin libaquery.a libaquery.lib -rf 2> $(NULL_DEVICE) || true - +.PHONY: clean diff --git a/README.md b/README.md index 6f07aa7..189e9ec 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,13 @@ AQuery++ Database is a cross-platform, In-Memory Column-Store Database that inco ## Installation AQuery is tested on mainstream operating systems such as Windows, macOS and Linux + +### Docker (Recommended): + - See installation instructions from [docker.com](https://www.docker.com). Run **docker desktop** to start docker engine. + - In AQuery root directory, type `make docker` to build the docker image from scratch. + - For Arm-based Mac users, you would have to build and run the **x86_64** docker image because MonetDB doesn't offer official binaries for arm64 Linux. (Run `docker buildx build --platform=linux/amd64 -t aquery .` instead of `make docker`) + - Finally run the image in **interactive** mode (`docker run -it --rm aquery`) + - If there is a need to access the system shell, type `dbg` to activate python interpreter and type `os.system('sh')` to launch a shell. ### Windows There're multiple options to run AQuery on Windows. You can use the native toolchain from Microsoft Visual Studio or gcc from Cygwin/MinGW or run it under Windows Subsystem for Linux. @@ -68,19 +75,19 @@ There're multiple options to run AQuery on Windows. You can use the native toolc In this case, upgrade anaconda or your compiler or use the python from your OS or package manager instead. Or (**NOT recommended**) copy/link the library from your system (e.g. /usr/lib/x86_64-linux-gnu/libstdc++.so.6) to anaconda's library directory (e.g. ~/Anaconda3/lib/). -### Docker: - - See installation instructions from [docker.com](https://www.docker.com). Run docker desktop to start docker engine. - - In AQuery root directory, type `make docker` to build the docker image from scratch. - - For Arm-based Mac users, you would have to build and run the **x86_64** docker image because MonetDB doesn't offer official binaries for arm64 Linux. (Run `docker buildx build --platform=linux/amd64 -t aquery .` instead of `make docker`) - - Finally run the image in **interactive** mode (`docker run -it --rm aquery`) + ## Usage `python3 prompt.py` will launch the interactive command prompt. The server binary will be automatically rebuilt and started. #### Commands: - ``: parse AQuery statement - `f `: parse all AQuery statements in file - `exec`: execute last parsed statement(s) with Hybrid Execution Engine. Hybrid Execution Engine decouples the query into two parts. The standard SQL (MonetDB dialect) part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed. -- `dbg` start debugging session -- `print`: printout parsed AQuery statements +- `stats ` configure statistics. + - no options: show statistics for all queries so far. + - `on` : statistics will be shown for every future query. + - `off`: statistics will not be shown for every future query. +- `dbg` start python interactive interpreter at the current context. +- `print`: print parsed AQuery statements (AST in JSON form) - `save `: save current code snippet. will use random filename if not specified. - `exit`: quit the prompt - `r`: run the last generated code snippet @@ -116,14 +123,16 @@ See ./tests/ for more examples. - [x] Order by - [x] Assumption - [x] Flatten - - [x] UDFs (Hybrid Engine only) - - [x] User Module - - [ ] Triggers - [x] Join (Hybrid Engine only) - [ ] Subqueries - [x] Query Optimization - [x] Selection/Order by push-down - [x] Join Optimization (Only in Hybrid Engine) + - [ ] Threaded GC +- [ ] Extensibility + - [x] UDFs (Hybrid Engine only) + - [x] SDK and User Module + - [ ] Triggers ## Known Issues: @@ -133,6 +142,5 @@ See ./tests/ for more examples. - [ ] Investigation: Using postproc only for q1 in Hybrid Engine (make is_special always on) - [ ] C++ Meta-Programming: Eliminate template recursions as much as possible. - [ ] Functionality: Basic helper functions in aquery -- [x] Improvement: More DDLs, e.g. drop table, update table, etc. - [ ] Bug: Join-Aware Column management - [ ] Bug: Order By after Group By diff --git a/cims.sh b/cims.sh index c701c8f..2d4865d 100644 --- a/cims.sh +++ b/cims.sh @@ -1,4 +1,6 @@ #!/usr/bash +echo "Don't execute this script if it's not on CIMS servers." +echo "run this script with source command. e.g. \`source ./cims.sh\` or \`. ./cims.sh\`" module load g++-11.2 PWD=`pwd` export LD_LIBRARY_PATH=$PWD/usr/lib64:$LD_LIBRARY_PATH:/lib:/lib64:/usr/lib:/usr/lib64 diff --git a/prompt.py b/prompt.py index 4bd01e8..73a52d8 100644 --- a/prompt.py +++ b/prompt.py @@ -118,8 +118,50 @@ class Backend_Type(enum.Enum): BACKEND_MonetDB = 1 BACKEND_MariaDB = 2 +@dataclass +class QueryStats: + last_time : int = time.time() + parse_time : int = 0 + codegen_time : int = 0 + compile_time : int = 0 + exec_time : int = 0 + need_print : bool = False + def clear(self): + self.parse_time = 0 + self.codegen_time = 0 + self.compile_time = 0 + self.exec_time = 0 + self.last_time = time.time() + + def stop(self): + ret = time.time() - self.last_time + self.last_time = time.time() + return ret + + def cumulate(self, other : Optional['QueryStats']): + if other: + other.parse_time += self.parse_time + other.codegen_time += self.codegen_time + other.compile_time += self.compile_time + other.exec_time += self.exec_time + + def print(self, cumulative = None, clear = True, need_print = True): + if self.need_print: + if cumulative: + self.exec_time = self.stop() + self.cumulate(cumulative) + if need_print: + print(f'Parse Time: {self.parse_time}, Codegen Time: {self.codegen_time}, Compile Time: {self.compile_time}, Execution Time: {self.exec_time}.') + print(f'Total Time: {self.parse_time + self.codegen_time + self.compile_time + self.exec_time}') + self.need_print = False + if clear: + self.clear() class Config: - __all_attrs__ = ['running', 'new_query', 'server_mode', 'backend_type', 'has_dll', 'n_buffers'] + __all_attrs__ = ['running', 'new_query', 'server_mode', + 'backend_type', 'has_dll', + 'postproc_time', 'sql_time', + 'n_buffers' + ] __init_attributes__ = False @staticmethod @@ -134,7 +176,7 @@ class Config: def __init__(self, mode, nq = 0, n_bufs = 0, bf_szs = []) -> None: Config.__init_self__() self.int_size = 4 - self.n_attrib = 6 + self.n_attrib = len(Config.__all_attrs__) self.buf = bytearray((self.n_attrib + n_bufs) * self.int_size) self.np_buf = np.ndarray(shape=(self.n_attrib), buffer=self.buf, dtype=np.int32) self.new_query = nq @@ -179,6 +221,9 @@ class PromptState(): init : Callable[['PromptState'], None] = lambda _:None stmts = [''] payloads = {} + need_print : bool = False + stats : Optional[QueryStats] = None + currstats : Optional[QueryStats] = None buildmgr : Optional[build_manager]= None ## CLASSES END @@ -274,7 +319,9 @@ def init_prompt() -> PromptState: state.buildmgr = build_manager() state.buildmgr.build_caches() state.cfg = Config(state.server_mode) - + state.stats = QueryStats() + state.currstats = QueryStats() + if state.server_mode == RunType.IPC: atexit.register(lambda: rm(state)) state.init = init_ipc @@ -327,15 +374,18 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None): payload = None keep = True cxt = engine.initialize() - + # state.currstats = QueryStats() + # state.need_print = False while running(): try: if state.server_status(): state.init() while state.get_ready(): time.sleep(.00001) + state.currstats.print(state.stats, need_print=state.need_print) try: og_q : str = next() + state.currstats.stop() except EOFError: print('stdin inreadable, Exiting...') exit(0) @@ -376,20 +426,25 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None): state.send(sz, payload) except TypeError as e: print(e) - + state.currstats.codegen_time = state.currstats.stop() + state.currstats.compile_time = 0 + state.currstats.exec_time = 0 qs = re.split(r'[ \t]', q) build_this = not(len(qs) > 1 and qs[1].startswith('n')) if cxt.has_dll: with open('out.cpp', 'wb') as outfile: outfile.write((cxt.finalize()).encode('utf-8')) + state.currstats.codegen_time += state.currstats.stop() + if build_this: state.buildmgr.build_dll() state.cfg.has_dll = 1 else: state.cfg.has_dll = 0 + state.currstats.compile_time = state.currstats.stop() if build_this: state.set_ready() - + state.currstats.need_print = True continue elif q == 'dbg': @@ -469,6 +524,22 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None): with open(filename, 'wb') as outfile: outfile.write((cxt.finalize()).encode('utf-8')) continue + elif q.startswith('stats'): + qs = re.split(r'[ \t]', q) + if len(qs) > 1: + if qs[1].startswith('on'): + state.need_print = True + continue + elif qs[1].startswith('off'): + state.need_print = False + continue + elif qs[1].startswith('last'): + state.currstats.need_print = True + state.currstats.print() + continue + state.stats.need_print = True + state.stats.print(clear = False) + continue trimed = ws.sub(' ', q.lower()).split(' ') if trimed[0].startswith('f'): fn = 'stock.a' if len(trimed) <= 1 or len(trimed[1]) == 0 \ @@ -480,9 +551,11 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None): with open('tests/' + fn, 'r') as file: contents = file.read() state.stmts = parser.parse(contents) + state.currstats.parse_time = state.currstats.stop() continue state.stmts = parser.parse(q) cxt.Info(state.stmts) + state.currstats.parse_time = state.currstats.stop() except ParseException as e: print(e) continue diff --git a/reconstruct/ast.py b/reconstruct/ast.py index 44fb969..001eccc 100644 --- a/reconstruct/ast.py +++ b/reconstruct/ast.py @@ -96,6 +96,9 @@ class projection(ast_node): else: self.where = None + if type(self.datasource) is join: + self.datasource.process_join_conditions() + if 'groupby' in node: self.context.special_gb = groupby.check_special(self, node['groupby']) @@ -624,6 +627,7 @@ class join(ast_node): self.joins.append((alias(tbls.__str__()), tbls.have_sep)) self.tables += tbls.tables self.tables_dir = {**self.tables_dir, **tbls.tables_dir} + self.join_conditions += tbls.join_conditions elif type(tbls) is TableInfo: self.joins.append((alias(tbls.table_name), False)) @@ -661,12 +665,17 @@ class join(ast_node): if keys[0].lower().endswith('join'): self.have_sep = True j = join(self, node[keys[0]]) + self.join_conditions += j.join_conditions tablename = f' {keys[0]} {j}' if len(keys) > 1 : + _ex = expr(self, node[keys[1]]) if keys[1].lower() == 'on': - tablename += f' ON {expr(self, node[keys[1]])}' + self.join_conditions += _ex.join_conditions + tablename += f' ON {_ex}' elif keys[1].lower() == 'using': - tablename += f' USING {expr(self, node[keys[1]])}' + if _ex.is_ColExpr: + self.join_conditions += (_ex.raw_col, j.get_cols(_ex.raw_col.name)) + tablename += f' USING {_ex}' self.joins.append((tablename, self.have_sep)) self.tables += j.tables self.tables_dir = {**self.tables_dir, **j.tables_dir} @@ -711,7 +720,9 @@ class join(ast_node): # TODO: join condition awareness def process_join_conditions(self): - pass + # This is done after both from + # and where clause are processed + print(self.join_conditions) def consume(self, node): self.sql = '' @@ -720,7 +731,6 @@ class join(ast_node): self.sql += j[0] # using JOIN keyword else: self.sql += ', ' + j[0] # using comma - self.process_join_conditions() if node and self.sql and self.top_level: self.sql = ' FROM ' + self.sql diff --git a/reconstruct/expr.py b/reconstruct/expr.py index 0faf9a5..f2128fb 100644 --- a/reconstruct/expr.py +++ b/reconstruct/expr.py @@ -219,7 +219,9 @@ class expr(ast_node): if (is_joincond and len(self.children) == 2 and all([c.is_ColExpr for c in self.children])) : - self.root.join_conditions.append((c.raw_col for c in self.children)) + self.root.join_conditions.append( + self.children[0].raw_col, self.children[1].raw_col + ) if type(node) is str: if self.is_udfexpr: diff --git a/sdk/aquery.h b/sdk/aquery.h index 4c9c779..15848f9 100644 --- a/sdk/aquery.h +++ b/sdk/aquery.h @@ -67,10 +67,15 @@ struct Context{ #define __AQEXPORT__(_Ty) extern "C" _Ty __DLLEXPORT__ typedef void (*deallocator_t) (void*); +extern void default_deallocator(void* ptr); -extern void* Aalloc(unsigned long long sz); +extern void* Aalloc(unsigned long long sz, + deallocator_t deallocator = default_deallocator +); extern void Afree(void * mem); -extern void register_memory(void* ptr, deallocator_t deallocator); +extern void register_memory(void* ptr, + deallocator_t deallocator = default_deallocator +); __AQEXPORT__(void) init_session(Context* cxt); diff --git a/sdk/aquery_mem.cpp b/sdk/aquery_mem.cpp index ebd9690..56e473c 100644 --- a/sdk/aquery_mem.cpp +++ b/sdk/aquery_mem.cpp @@ -26,6 +26,10 @@ void register_memory(void* ptr, deallocator_t deallocator){ memmap->operator[](ptr) = deallocator; } +void default_deallocator(void* ptr){ + free(ptr); +} + __AQEXPORT__(void) init_session(Context* cxt){ session = &cxt->current; } diff --git a/server/libaquery.h b/server/libaquery.h index 0475156..551d205 100644 --- a/server/libaquery.h +++ b/server/libaquery.h @@ -17,7 +17,7 @@ enum Backend_Type { }; struct Config{ int running, new_query, server_mode, - backend_type, has_dll, n_buffers; + backend_type, has_dll, exec_time, n_buffers; int buffer_sizes[]; }; diff --git a/server/types.h b/server/types.h index f1b041d..78be49a 100644 --- a/server/types.h +++ b/server/types.h @@ -297,6 +297,26 @@ template using decays = typename decayS::type>::type; template using decay_inner = typename decayS::type; +template +struct aqis_same_impl { + constexpr static bool value = + std::conditional_t< + std::is_signed_v == std::is_signed_v, + Cond( + std::is_floating_point_v == std::is_floating_point_v, + Cond( + sizeof(T1) == sizeof(T2), + std::true_type, + std::false_type + ), + std::false_type + ), + std::false_type + >::value; +}; + +template +constexpr bool aqis_same = aqis_same_impl::value; template class T> struct instance_of_impl : std::false_type {}; diff --git a/test.aquery b/test.aquery index 7756ff4..ebfbd02 100644 --- a/test.aquery +++ b/test.aquery @@ -1,5 +1,7 @@ #!aquery +# stats on + select "hello world" xexec @@ -39,4 +41,6 @@ echo Testing Sample Queries f network.a xexec +stats + exit