updated documentations.

3 years ago · 98890884da
parent 4974db3117
commit 98890884da
11 changed files with 154 additions and 26 deletions
--- a/2
+++ b/2
@ -119,4 +119,4 @@ docker:
 clean:
 	rm .cached *.shm *.o dll.so server.so server.bin libaquery.a libaquery.lib -rf 2> $(NULL_DEVICE) || true
-
+.PHONY: clean
--- a/README.md
+++ b/README.md
@ -19,6 +19,13 @@ AQuery++ Database is a cross-platform, In-Memory Column-Store Database that inco
 ## Installation
 AQuery is tested on mainstream operating systems such as Windows, macOS and Linux
 ### Docker (Recommended): 
   - See installation instructions from [docker.com](https://www.docker.com). Run **docker desktop** to start docker engine.
   - In AQuery root directory, type `make docker` to build the docker image from scratch. 
   - For Arm-based Mac users, you would have to build and run the **x86_64** docker image because MonetDB doesn't offer official binaries for arm64 Linux. (Run `docker buildx build --platform=linux/amd64 -t aquery .` instead of `make docker`)
   - Finally run the image in **interactive** mode (`docker run -it --rm aquery`)
   - If there is a need to access the system shell, type `dbg` to activate python interpreter and type `os.system('sh')` to launch a shell.
 ### Windows
 There're multiple options to run AQuery on Windows. You can use the native toolchain from Microsoft Visual Studio or gcc from Cygwin/MinGW or run it under Windows Subsystem for Linux.
@ -68,19 +75,19 @@ There're multiple options to run AQuery on Windows. You can use the native toolc
   In this case, upgrade anaconda or your compiler or use the python from your OS or package manager instead. Or (**NOT recommended**) copy/link the library from your system (e.g. /usr/lib/x86_64-linux-gnu/libstdc++.so.6) to anaconda's library directory (e.g. ~/Anaconda3/lib/).
-### Docker: 
+
   - See installation instructions from [docker.com](https://www.docker.com). Run docker desktop to start docker engine.
   - In AQuery root directory, type `make docker` to build the docker image from scratch. 
   - For Arm-based Mac users, you would have to build and run the **x86_64** docker image because MonetDB doesn't offer official binaries for arm64 Linux. (Run `docker buildx build --platform=linux/amd64 -t aquery .` instead of `make docker`)
   - Finally run the image in **interactive** mode (`docker run -it --rm aquery`)
 ## Usage
 `python3 prompt.py` will launch the interactive command prompt. The server binary will be automatically rebuilt and started.
 #### Commands:
 - `<sql statement>`: parse AQuery statement
 - `f <filename>`: parse all AQuery statements in file
 - `exec`: execute last parsed statement(s) with Hybrid Execution Engine. Hybrid Execution Engine decouples the query into two parts. The standard SQL (MonetDB dialect) part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed.
- `dbg` start debugging session 
+- `stats <OPTIONAL: options>` configure statistics.
- `print`: printout parsed AQuery statements
+  - no options: show statistics for all queries so far.
  - `on` : statistics will be shown for every future query.
  - `off`: statistics will not be shown for every future query.
 - `dbg` start python interactive interpreter at the current context. 
 - `print`: print parsed AQuery statements (AST in JSON form)
 - `save <OPTIONAL: filename>`: save current code snippet. will use random filename if not specified.
 - `exit`: quit the prompt
 - `r`: run the last generated code snippet
@ -116,14 +123,16 @@ See ./tests/ for more examples.
   -  [x] Order by
   -  [x] Assumption
   -  [x] Flatten
   -  [x] UDFs (Hybrid Engine only)
   -  [x] User Module
   -  [ ] Triggers 
   -  [x] Join (Hybrid Engine only)
   -  [ ] Subqueries 
 - [x] Query Optimization
  - [x] Selection/Order by push-down
  - [x] Join Optimization (Only in Hybrid Engine)
  - [ ] Threaded GC
 - [ ] Extensibility 
  - [x] UDFs (Hybrid Engine only)
  - [x] SDK and User Module 
  - [ ] Triggers 
 ## Known Issues:
@ -133,6 +142,5 @@ See ./tests/ for more examples.
 - [ ] Investigation: Using postproc only for q1 in Hybrid Engine (make is_special always on)
 - [ ] C++ Meta-Programming: Eliminate template recursions as much as possible.
 - [ ] Functionality: Basic helper functions in aquery 
 - [x] Improvement: More DDLs, e.g. drop table, update table, etc.
 - [ ] Bug: Join-Aware Column management
 - [ ] Bug: Order By after Group By
--- a/cims.sh
+++ b/cims.sh
@ -1,4 +1,6 @@
 #!/usr/bash
 echo  "Don't execute this script if it's not on CIMS servers."
 echo "run this script with source command. e.g. \`source ./cims.sh\` or \`. ./cims.sh\`"
 module load g++-11.2
 PWD=`pwd`
 export LD_LIBRARY_PATH=$PWD/usr/lib64:$LD_LIBRARY_PATH:/lib:/lib64:/usr/lib:/usr/lib64
--- a/prompt.py
+++ b/prompt.py
@ -118,8 +118,50 @@ class Backend_Type(enum.Enum):
 	BACKEND_MonetDB = 1
 	BACKEND_MariaDB = 2
@dataclass 
 class QueryStats:
    last_time : int = time.time()
    parse_time : int = 0
    codegen_time : int = 0
    compile_time : int = 0
    exec_time : int = 0
    need_print : bool = False
    def clear(self):
        self.parse_time = 0
        self.codegen_time = 0
        self.compile_time = 0
        self.exec_time  = 0
        self.last_time = time.time()
    def stop(self):
        ret = time.time() - self.last_time
        self.last_time = time.time()
        return ret
    def cumulate(self, other : Optional['QueryStats']):
        if other:
            other.parse_time += self.parse_time
            other.codegen_time += self.codegen_time
            other.compile_time += self.compile_time
            other.exec_time += self.exec_time
    def print(self, cumulative = None, clear = True, need_print = True):
        if self.need_print:
            if cumulative:
                self.exec_time = self.stop()
                self.cumulate(cumulative)
            if need_print:
                print(f'Parse Time: {self.parse_time}, Codegen Time: {self.codegen_time}, Compile Time: {self.compile_time}, Execution Time: {self.exec_time}.')
                print(f'Total Time: {self.parse_time + self.codegen_time + self.compile_time + self.exec_time}')
                self.need_print = False
            if clear:
                self.clear()
 class Config:
-    __all_attrs__ = ['running', 'new_query', 'server_mode', 'backend_type', 'has_dll', 'n_buffers']
+    __all_attrs__ = ['running', 'new_query', 'server_mode', 
                     'backend_type', 'has_dll', 
                     'postproc_time', 'sql_time', 
                     'n_buffers'
                     ]
    __init_attributes__ = False
    @staticmethod
@ -134,7 +176,7 @@ class Config:
    def __init__(self, mode, nq = 0, n_bufs = 0, bf_szs = []) -> None:
        Config.__init_self__()
        self.int_size = 4
-        self.n_attrib = 6
+        self.n_attrib = len(Config.__all_attrs__)
        self.buf = bytearray((self.n_attrib + n_bufs) * self.int_size)
        self.np_buf = np.ndarray(shape=(self.n_attrib), buffer=self.buf, dtype=np.int32)
        self.new_query = nq
@ -179,6 +221,9 @@ class PromptState():
    init : Callable[['PromptState'], None] = lambda _:None
    stmts = ['']
    payloads = {}
    need_print : bool = False
    stats : Optional[QueryStats] = None
    currstats : Optional[QueryStats] = None
    buildmgr : Optional[build_manager]= None
 ## CLASSES END
@ -274,7 +319,9 @@ def init_prompt() -> PromptState:
    state.buildmgr = build_manager()  
    state.buildmgr.build_caches()  
    state.cfg = Config(state.server_mode)
-        
+    state.stats = QueryStats()
    state.currstats = QueryStats()
    if state.server_mode == RunType.IPC:
        atexit.register(lambda: rm(state))
        state.init = init_ipc
@ -327,15 +374,18 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
    payload = None
    keep = True
    cxt = engine.initialize()
-
+    # state.currstats = QueryStats()
    # state.need_print = False
    while running():
        try:
            if state.server_status():
                state.init()
            while state.get_ready():
                time.sleep(.00001)
            state.currstats.print(state.stats, need_print=state.need_print)
            try:
                og_q : str = next()
                state.currstats.stop()
            except EOFError:
                print('stdin inreadable, Exiting...')
                exit(0)
@ -376,20 +426,25 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
                        state.send(sz, payload)
                    except TypeError as e:
                        print(e)
-
+                state.currstats.codegen_time = state.currstats.stop()
                state.currstats.compile_time = 0
                state.currstats.exec_time = 0
                qs = re.split(r'[ \t]', q)
                build_this = not(len(qs) > 1 and qs[1].startswith('n'))
                if cxt.has_dll:
                    with open('out.cpp', 'wb') as outfile:
                        outfile.write((cxt.finalize()).encode('utf-8'))
                    state.currstats.codegen_time += state.currstats.stop()
                    if build_this:
                        state.buildmgr.build_dll()
                        state.cfg.has_dll = 1
                else:
                    state.cfg.has_dll = 0
                state.currstats.compile_time = state.currstats.stop()
                if build_this:
                    state.set_ready()
-                
+                state.currstats.need_print = True
                continue
            elif q == 'dbg':
@ -469,6 +524,22 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
                with open(filename, 'wb') as outfile:
                    outfile.write((cxt.finalize()).encode('utf-8'))
                continue
            elif q.startswith('stats'):
                qs = re.split(r'[ \t]', q)
                if len(qs) > 1:
                    if qs[1].startswith('on'):
                        state.need_print = True
                        continue
                    elif qs[1].startswith('off'):
                        state.need_print = False
                        continue
                    elif qs[1].startswith('last'):
                        state.currstats.need_print = True
                        state.currstats.print()
                        continue
                state.stats.need_print = True
                state.stats.print(clear = False)
                continue
            trimed = ws.sub(' ', q.lower()).split(' ') 
            if trimed[0].startswith('f'):
                fn = 'stock.a' if len(trimed) <= 1 or len(trimed[1]) == 0 \
@ -480,9 +551,11 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
                    with open('tests/' + fn, 'r') as file:
                        contents = file.read()
                state.stmts = parser.parse(contents)
                state.currstats.parse_time = state.currstats.stop()
                continue
            state.stmts = parser.parse(q)
            cxt.Info(state.stmts)
            state.currstats.parse_time = state.currstats.stop()
        except ParseException as e:
            print(e)
            continue
--- a/reconstruct/ast.py
+++ b/reconstruct/ast.py
@ -96,6 +96,9 @@ class projection(ast_node):
        else:
            self.where = None    
        if type(self.datasource) is join:
            self.datasource.process_join_conditions()
        if 'groupby' in node:
            self.context.special_gb = groupby.check_special(self, node['groupby'])
@ -624,6 +627,7 @@ class join(ast_node):
            self.joins.append((alias(tbls.__str__()), tbls.have_sep))
            self.tables += tbls.tables
            self.tables_dir = {**self.tables_dir, **tbls.tables_dir}
            self.join_conditions += tbls.join_conditions
        elif type(tbls) is TableInfo:
            self.joins.append((alias(tbls.table_name), False))
@ -661,12 +665,17 @@ class join(ast_node):
                if keys[0].lower().endswith('join'):
                    self.have_sep = True
                    j = join(self, node[keys[0]])
                    self.join_conditions += j.join_conditions
                    tablename = f' {keys[0]} {j}'
                    if len(keys) > 1 :
                        _ex = expr(self, node[keys[1]])
                        if keys[1].lower() == 'on':
-                            tablename += f' ON {expr(self, node[keys[1]])}' 
+                            self.join_conditions += _ex.join_conditions
                            tablename += f' ON {_ex}' 
                        elif keys[1].lower() == 'using':
-                            tablename += f' USING {expr(self, node[keys[1]])}'
+                            if _ex.is_ColExpr:
                                self.join_conditions += (_ex.raw_col, j.get_cols(_ex.raw_col.name))
                            tablename += f' USING {_ex}'
                    self.joins.append((tablename, self.have_sep))
                    self.tables += j.tables
                    self.tables_dir = {**self.tables_dir, **j.tables_dir}
@ -711,7 +720,9 @@ class join(ast_node):
    # TODO: join condition awareness
    def process_join_conditions(self):
-        pass
+        # This is done after both from 
        # and where clause are processed
        print(self.join_conditions)
    def consume(self, node):
        self.sql = ''
@ -720,7 +731,6 @@ class join(ast_node):
                self.sql += j[0] # using JOIN keyword
            else:
                self.sql += ', ' + j[0] # using comma
        self.process_join_conditions()
        if node and self.sql and self.top_level:
            self.sql = ' FROM ' + self.sql 
--- a/reconstruct/expr.py
+++ b/reconstruct/expr.py
@ -219,7 +219,9 @@ class expr(ast_node):
                if (is_joincond and len(self.children) == 2
                    and all([c.is_ColExpr for c in self.children])) :
-                    self.root.join_conditions.append((c.raw_col for c in self.children))
+                    self.root.join_conditions.append(
                            self.children[0].raw_col, self.children[1].raw_col
                        )
        if type(node) is str:
            if self.is_udfexpr:
--- a/sdk/aquery.h
+++ b/sdk/aquery.h
@ -67,10 +67,15 @@ struct Context{
 #define __AQEXPORT__(_Ty) extern "C" _Ty __DLLEXPORT__ 
 typedef void (*deallocator_t) (void*);
 extern void default_deallocator(void* ptr);
-extern void* Aalloc(unsigned long long sz);
+extern void* Aalloc(unsigned long long sz, 
 	deallocator_t deallocator = default_deallocator
 );
 extern void Afree(void * mem);
-extern void register_memory(void* ptr, deallocator_t deallocator);
+extern void register_memory(void* ptr,
 	deallocator_t deallocator = default_deallocator
 );
 __AQEXPORT__(void) init_session(Context* cxt);
--- a/sdk/aquery_mem.cpp
+++ b/sdk/aquery_mem.cpp
@ -26,6 +26,10 @@ void register_memory(void* ptr, deallocator_t deallocator){
    memmap->operator[](ptr) = deallocator;
 }
 void default_deallocator(void* ptr){
    free(ptr);
 }
 __AQEXPORT__(void) init_session(Context* cxt){
    session = &cxt->current;
 }
--- a/server/libaquery.h
+++ b/server/libaquery.h
@ -17,7 +17,7 @@ enum Backend_Type {
 };
 struct Config{
    int running, new_query, server_mode,
-	 	backend_type, has_dll, n_buffers;
+	 	backend_type, has_dll, exec_time, n_buffers;
    int buffer_sizes[];
 };
--- a/server/types.h
+++ b/server/types.h
@ -297,6 +297,26 @@ template <class T>
 using decays = typename decayS<typename std::decay<T>::type>::type;
 template <class T>
 using decay_inner = typename decayS<T>::type;
 template <class T1, class T2>
 struct aqis_same_impl {
 	constexpr static bool value = 
 		std::conditional_t<
 			std::is_signed_v<T1> == std::is_signed_v<T2>,
 			Cond(
 				std::is_floating_point_v<T1> == std::is_floating_point_v<T2>,
 				Cond(
 					sizeof(T1) == sizeof(T2),
 					std::true_type,
 					std::false_type
 				),
 				std::false_type
 			),
 			std::false_type
 		>::value;
 };
 template <class T1, class T2>
 constexpr bool aqis_same = aqis_same_impl<T1, T2>::value;
 template <class, template <class...> class T>
 struct instance_of_impl : std::false_type {};
--- a/test.aquery
+++ b/test.aquery
@ -1,5 +1,7 @@
 #!aquery
 # stats on 
 select "hello world"
 xexec
@ -39,4 +41,6 @@ echo Testing Sample Queries
 f network.a
 xexec 
 stats
 exit