updated documentations.

3 years ago · 98890884da
parent 4974db3117
commit 98890884da
11 changed files with 154 additions and 26 deletions
--- a/2
+++ b/2
@ -119,4 +119,4 @@ docker:
 clean:
 	rm .cached *.shm *.o dll.so server.so server.bin libaquery.a libaquery.lib -rf 2> $(NULL_DEVICE) || true

-
+.PHONY: clean
--- a/README.md
+++ b/README.md
@ -19,6 +19,13 @@ AQuery++ Database is a cross-platform, In-Memory Column-Store Database that inco

 ## Installation
 AQuery is tested on mainstream operating systems such as Windows, macOS and Linux
+
+### Docker (Recommended): 
+   - See installation instructions from [docker.com](https://www.docker.com). Run **docker desktop** to start docker engine.
+   - In AQuery root directory, type `make docker` to build the docker image from scratch. 
+   - For Arm-based Mac users, you would have to build and run the **x86_64** docker image because MonetDB doesn't offer official binaries for arm64 Linux. (Run `docker buildx build --platform=linux/amd64 -t aquery .` instead of `make docker`)
+   - Finally run the image in **interactive** mode (`docker run -it --rm aquery`)
+   - If there is a need to access the system shell, type `dbg` to activate python interpreter and type `os.system('sh')` to launch a shell.
 ### Windows
 There're multiple options to run AQuery on Windows. You can use the native toolchain from Microsoft Visual Studio or gcc from Cygwin/MinGW or run it under Windows Subsystem for Linux.

@ -68,19 +75,19 @@ There're multiple options to run AQuery on Windows. You can use the native toolc
   
   In this case, upgrade anaconda or your compiler or use the python from your OS or package manager instead. Or (**NOT recommended**) copy/link the library from your system (e.g. /usr/lib/x86_64-linux-gnu/libstdc++.so.6) to anaconda's library directory (e.g. ~/Anaconda3/lib/).

-### Docker: 
-   - See installation instructions from [docker.com](https://www.docker.com). Run docker desktop to start docker engine.
-   - In AQuery root directory, type `make docker` to build the docker image from scratch. 
-   - For Arm-based Mac users, you would have to build and run the **x86_64** docker image because MonetDB doesn't offer official binaries for arm64 Linux. (Run `docker buildx build --platform=linux/amd64 -t aquery .` instead of `make docker`)
-   - Finally run the image in **interactive** mode (`docker run -it --rm aquery`)
+
 ## Usage
 `python3 prompt.py` will launch the interactive command prompt. The server binary will be automatically rebuilt and started.
 #### Commands:
 - `<sql statement>`: parse AQuery statement
 - `f <filename>`: parse all AQuery statements in file
 - `exec`: execute last parsed statement(s) with Hybrid Execution Engine. Hybrid Execution Engine decouples the query into two parts. The standard SQL (MonetDB dialect) part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed.
- `dbg` start debugging session 
- `print`: printout parsed AQuery statements
+- `stats <OPTIONAL: options>` configure statistics.
+  - no options: show statistics for all queries so far.
+  - `on` : statistics will be shown for every future query.
+  - `off`: statistics will not be shown for every future query.
+- `dbg` start python interactive interpreter at the current context. 
+- `print`: print parsed AQuery statements (AST in JSON form)
 - `save <OPTIONAL: filename>`: save current code snippet. will use random filename if not specified.
 - `exit`: quit the prompt
 - `r`: run the last generated code snippet
@ -116,14 +123,16 @@ See ./tests/ for more examples.
   -  [x] Order by
   -  [x] Assumption
   -  [x] Flatten
-   -  [x] UDFs (Hybrid Engine only)
-   -  [x] User Module
-   -  [ ] Triggers 
   -  [x] Join (Hybrid Engine only)
   -  [ ] Subqueries 
 - [x] Query Optimization
  - [x] Selection/Order by push-down
  - [x] Join Optimization (Only in Hybrid Engine)
+  - [ ] Threaded GC
+- [ ] Extensibility 
+  - [x] UDFs (Hybrid Engine only)
+  - [x] SDK and User Module 
+  - [ ] Triggers 

 ## Known Issues:

@ -133,6 +142,5 @@ See ./tests/ for more examples.
 - [ ] Investigation: Using postproc only for q1 in Hybrid Engine (make is_special always on)
 - [ ] C++ Meta-Programming: Eliminate template recursions as much as possible.
 - [ ] Functionality: Basic helper functions in aquery 
- [x] Improvement: More DDLs, e.g. drop table, update table, etc.
 - [ ] Bug: Join-Aware Column management
 - [ ] Bug: Order By after Group By
--- a/cims.sh
+++ b/cims.sh
@ -1,4 +1,6 @@
 #!/usr/bash
+echo  "Don't execute this script if it's not on CIMS servers."
+echo "run this script with source command. e.g. \`source ./cims.sh\` or \`. ./cims.sh\`"
 module load g++-11.2
 PWD=`pwd`
 export LD_LIBRARY_PATH=$PWD/usr/lib64:$LD_LIBRARY_PATH:/lib:/lib64:/usr/lib:/usr/lib64
--- a/prompt.py
+++ b/prompt.py
@ -118,8 +118,50 @@ class Backend_Type(enum.Enum):
 	BACKEND_MonetDB = 1
 	BACKEND_MariaDB = 2

+@dataclass 
+class QueryStats:
+    last_time : int = time.time()
+    parse_time : int = 0
+    codegen_time : int = 0
+    compile_time : int = 0
+    exec_time : int = 0
+    need_print : bool = False
+    def clear(self):
+        self.parse_time = 0
+        self.codegen_time = 0
+        self.compile_time = 0
+        self.exec_time  = 0
+        self.last_time = time.time()
+        
+    def stop(self):
+        ret = time.time() - self.last_time
+        self.last_time = time.time()
+        return ret
+    
+    def cumulate(self, other : Optional['QueryStats']):
+        if other:
+            other.parse_time += self.parse_time
+            other.codegen_time += self.codegen_time
+            other.compile_time += self.compile_time
+            other.exec_time += self.exec_time
+        
+    def print(self, cumulative = None, clear = True, need_print = True):
+        if self.need_print:
+            if cumulative:
+                self.exec_time = self.stop()
+                self.cumulate(cumulative)
+            if need_print:
+                print(f'Parse Time: {self.parse_time}, Codegen Time: {self.codegen_time}, Compile Time: {self.compile_time}, Execution Time: {self.exec_time}.')
+                print(f'Total Time: {self.parse_time + self.codegen_time + self.compile_time + self.exec_time}')
+                self.need_print = False
+            if clear:
+                self.clear()
 class Config:
-    __all_attrs__ = ['running', 'new_query', 'server_mode', 'backend_type', 'has_dll', 'n_buffers']
+    __all_attrs__ = ['running', 'new_query', 'server_mode', 
+                     'backend_type', 'has_dll', 
+                     'postproc_time', 'sql_time', 
+                     'n_buffers'
+                     ]
    __init_attributes__ = False
    
    @staticmethod
@ -134,7 +176,7 @@ class Config:
    def __init__(self, mode, nq = 0, n_bufs = 0, bf_szs = []) -> None:
        Config.__init_self__()
        self.int_size = 4
-        self.n_attrib = 6
+        self.n_attrib = len(Config.__all_attrs__)
        self.buf = bytearray((self.n_attrib + n_bufs) * self.int_size)
        self.np_buf = np.ndarray(shape=(self.n_attrib), buffer=self.buf, dtype=np.int32)
        self.new_query = nq
@ -179,6 +221,9 @@ class PromptState():
    init : Callable[['PromptState'], None] = lambda _:None
    stmts = ['']
    payloads = {}
+    need_print : bool = False
+    stats : Optional[QueryStats] = None
+    currstats : Optional[QueryStats] = None
    buildmgr : Optional[build_manager]= None
 ## CLASSES END

@ -274,7 +319,9 @@ def init_prompt() -> PromptState:
    state.buildmgr = build_manager()  
    state.buildmgr.build_caches()  
    state.cfg = Config(state.server_mode)
-        
+    state.stats = QueryStats()
+    state.currstats = QueryStats()
+    
    if state.server_mode == RunType.IPC:
        atexit.register(lambda: rm(state))
        state.init = init_ipc
@ -327,15 +374,18 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
    payload = None
    keep = True
    cxt = engine.initialize()
-
+    # state.currstats = QueryStats()
+    # state.need_print = False
    while running():
        try:
            if state.server_status():
                state.init()
            while state.get_ready():
                time.sleep(.00001)
+            state.currstats.print(state.stats, need_print=state.need_print)
            try:
                og_q : str = next()
+                state.currstats.stop()
            except EOFError:
                print('stdin inreadable, Exiting...')
                exit(0)
@ -376,20 +426,25 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
                        state.send(sz, payload)
                    except TypeError as e:
                        print(e)
-
+                state.currstats.codegen_time = state.currstats.stop()
+                state.currstats.compile_time = 0
+                state.currstats.exec_time = 0
                qs = re.split(r'[ \t]', q)
                build_this = not(len(qs) > 1 and qs[1].startswith('n'))
                if cxt.has_dll:
                    with open('out.cpp', 'wb') as outfile:
                        outfile.write((cxt.finalize()).encode('utf-8'))
+                    state.currstats.codegen_time += state.currstats.stop()
+                        
                    if build_this:
                        state.buildmgr.build_dll()
                        state.cfg.has_dll = 1
                else:
                    state.cfg.has_dll = 0
+                state.currstats.compile_time = state.currstats.stop()
                if build_this:
                    state.set_ready()
-                
+                state.currstats.need_print = True
                continue
            
            elif q == 'dbg':
@ -469,6 +524,22 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
                with open(filename, 'wb') as outfile:
                    outfile.write((cxt.finalize()).encode('utf-8'))
                continue
+            elif q.startswith('stats'):
+                qs = re.split(r'[ \t]', q)
+                if len(qs) > 1:
+                    if qs[1].startswith('on'):
+                        state.need_print = True
+                        continue
+                    elif qs[1].startswith('off'):
+                        state.need_print = False
+                        continue
+                    elif qs[1].startswith('last'):
+                        state.currstats.need_print = True
+                        state.currstats.print()
+                        continue
+                state.stats.need_print = True
+                state.stats.print(clear = False)
+                continue
            trimed = ws.sub(' ', q.lower()).split(' ') 
            if trimed[0].startswith('f'):
                fn = 'stock.a' if len(trimed) <= 1 or len(trimed[1]) == 0 \
@ -480,9 +551,11 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
                    with open('tests/' + fn, 'r') as file:
                        contents = file.read()
                state.stmts = parser.parse(contents)
+                state.currstats.parse_time = state.currstats.stop()
                continue
            state.stmts = parser.parse(q)
            cxt.Info(state.stmts)
+            state.currstats.parse_time = state.currstats.stop()
        except ParseException as e:
            print(e)
            continue
--- a/reconstruct/ast.py
+++ b/reconstruct/ast.py
@ -96,6 +96,9 @@ class projection(ast_node):
        else:
            self.where = None    

+        if type(self.datasource) is join:
+            self.datasource.process_join_conditions()
+        
        if 'groupby' in node:
            self.context.special_gb = groupby.check_special(self, node['groupby'])

@ -624,6 +627,7 @@ class join(ast_node):
            self.joins.append((alias(tbls.__str__()), tbls.have_sep))
            self.tables += tbls.tables
            self.tables_dir = {**self.tables_dir, **tbls.tables_dir}
+            self.join_conditions += tbls.join_conditions
            
        elif type(tbls) is TableInfo:
            self.joins.append((alias(tbls.table_name), False))
@ -661,12 +665,17 @@ class join(ast_node):
                if keys[0].lower().endswith('join'):
                    self.have_sep = True
                    j = join(self, node[keys[0]])
+                    self.join_conditions += j.join_conditions
                    tablename = f' {keys[0]} {j}'
                    if len(keys) > 1 :
+                        _ex = expr(self, node[keys[1]])
                        if keys[1].lower() == 'on':
-                            tablename += f' ON {expr(self, node[keys[1]])}' 
+                            self.join_conditions += _ex.join_conditions
+                            tablename += f' ON {_ex}' 
                        elif keys[1].lower() == 'using':
-                            tablename += f' USING {expr(self, node[keys[1]])}'
+                            if _ex.is_ColExpr:
+                                self.join_conditions += (_ex.raw_col, j.get_cols(_ex.raw_col.name))
+                            tablename += f' USING {_ex}'
                    self.joins.append((tablename, self.have_sep))
                    self.tables += j.tables
                    self.tables_dir = {**self.tables_dir, **j.tables_dir}
@ -711,7 +720,9 @@ class join(ast_node):
    
    # TODO: join condition awareness
    def process_join_conditions(self):
-        pass
+        # This is done after both from 
+        # and where clause are processed
+        print(self.join_conditions)
    
    def consume(self, node):
        self.sql = ''
@ -720,7 +731,6 @@ class join(ast_node):
                self.sql += j[0] # using JOIN keyword
            else:
                self.sql += ', ' + j[0] # using comma
-        self.process_join_conditions()
                    
        if node and self.sql and self.top_level:
            self.sql = ' FROM ' + self.sql 
--- a/reconstruct/expr.py
+++ b/reconstruct/expr.py
@ -219,7 +219,9 @@ class expr(ast_node):
                
                if (is_joincond and len(self.children) == 2
                    and all([c.is_ColExpr for c in self.children])) :
-                    self.root.join_conditions.append((c.raw_col for c in self.children))
+                    self.root.join_conditions.append(
+                            self.children[0].raw_col, self.children[1].raw_col
+                        )
                    
        if type(node) is str:
            if self.is_udfexpr:
--- a/sdk/aquery.h
+++ b/sdk/aquery.h
@ -67,10 +67,15 @@ struct Context{
 #define __AQEXPORT__(_Ty) extern "C" _Ty __DLLEXPORT__ 

 typedef void (*deallocator_t) (void*);
+extern void default_deallocator(void* ptr);

-extern void* Aalloc(unsigned long long sz);
+extern void* Aalloc(unsigned long long sz, 
+	deallocator_t deallocator = default_deallocator
+);
 extern void Afree(void * mem);
-extern void register_memory(void* ptr, deallocator_t deallocator);
+extern void register_memory(void* ptr,
+ 	deallocator_t deallocator = default_deallocator
+);

 __AQEXPORT__(void) init_session(Context* cxt);

--- a/sdk/aquery_mem.cpp
+++ b/sdk/aquery_mem.cpp
@ -26,6 +26,10 @@ void register_memory(void* ptr, deallocator_t deallocator){
    memmap->operator[](ptr) = deallocator;
 }

+void default_deallocator(void* ptr){
+    free(ptr);
+}
+
 __AQEXPORT__(void) init_session(Context* cxt){
    session = &cxt->current;
 }
--- a/server/libaquery.h
+++ b/server/libaquery.h
@ -17,7 +17,7 @@ enum Backend_Type {
 };
 struct Config{
    int running, new_query, server_mode,
-	 	backend_type, has_dll, n_buffers;
+	 	backend_type, has_dll, exec_time, n_buffers;
    int buffer_sizes[];
 };

--- a/server/types.h
+++ b/server/types.h
@ -297,6 +297,26 @@ template <class T>
 using decays = typename decayS<typename std::decay<T>::type>::type;
 template <class T>
 using decay_inner = typename decayS<T>::type;
+template <class T1, class T2>
+struct aqis_same_impl {
+	constexpr static bool value = 
+		std::conditional_t<
+			std::is_signed_v<T1> == std::is_signed_v<T2>,
+			Cond(
+				std::is_floating_point_v<T1> == std::is_floating_point_v<T2>,
+				Cond(
+					sizeof(T1) == sizeof(T2),
+					std::true_type,
+					std::false_type
+				),
+				std::false_type
+			),
+			std::false_type
+		>::value;
+};
+
+template <class T1, class T2>
+constexpr bool aqis_same = aqis_same_impl<T1, T2>::value;

 template <class, template <class...> class T>
 struct instance_of_impl : std::false_type {};
--- a/test.aquery
+++ b/test.aquery
@ -1,5 +1,7 @@
 #!aquery

+# stats on 
+
 select "hello world"
 xexec

@ -39,4 +41,6 @@ echo Testing Sample Queries
 f network.a
 xexec 

+stats
+
 exit