bug fixes, restructure, user module parsing

dev
Bill 2 years ago
parent 1732835692
commit 42c334af84

@ -18,6 +18,7 @@ info:
$(info $(OS)) $(info $(OS))
$(info $(Threading)) $(info $(Threading))
$(info "test") $(info "test")
$(info $(CXX))
server.bin: server.bin:
$(CXX) server/server.cpp server/io.cpp server/table.cpp $(OS_SUPPORT) $(Threading) -flto --std=c++1z -O3 -march=native -o server.bin $(CXX) server/server.cpp server/io.cpp server/table.cpp $(OS_SUPPORT) $(Threading) -flto --std=c++1z -O3 -march=native -o server.bin
server.so: server.so:

@ -2,7 +2,17 @@
## Introduction ## Introduction
AQuery++ Database is a cross-platform, In-Memory Column-Store Database that incorporates compiled query execution. AQuery++ Database is a cross-platform, In-Memory Column-Store Database that incorporates compiled query execution.
Compiler frontend built on top of [mo-sql-parsing](https://github.com/klahnakoski/mo-sql-parsing).
## Architecture
### AQuery Compiler
- The query is first processed by the AQuery Compiler which is composed of a frontend that parses the query into AST and a backend that generates target code that delivers the query.
- Front end of AQuery++ Compiler is built on top of [mo-sql-parsing](https://github.com/klahnakoski/mo-sql-parsing) with modifications to handle AQuery dialect and extension.
- Backend of AQuery++ Compiler generates target code dependent on the Execution Engine. It can either be the C++ code for AQuery Execution Engine or sql and C++ post-processor for Hybrid Engine or k9 for the k9 Engine.
### Execution Engines
- AQuery++ supports different execution engines thanks to the decoupled compiler structure.
- AQuery Execution Engine: executes query by compiling the query plan to C++ code. Doesn't support joins and udf functions.
- Hybrid Execution Engine: decouples the query into two parts. The sql-compliant part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed.
- K9 Execution Engine (discontinued).
## Roadmap ## Roadmap
- [x] SQL Parser -> AQuery Parser (Front End) - [x] SQL Parser -> AQuery Parser (Front End)
@ -16,29 +26,39 @@ Compiler frontend built on top of [mo-sql-parsing](https://github.com/klahnakosk
- [x] Order by - [x] Order by
- [x] Assumption - [x] Assumption
- [x] Flatten - [x] Flatten
- [ ] Multi-table - [x] Multi-table
- [ ] Join - [x] Join
- [ ] Subqueries - [ ] Subqueries
- [ ] -> Optimizing Compiler - [ ] -> Optimizing Compiler
## TODO: ## TODO:
- [ ] C++ Meta-Programming: Elimilate template recursions as much as possible.
- [ ] User Module load syntax parsing (fn definition/registration)
- [ ] User Module test
- [ ] Interval based triggers
- [ ] C++ Meta-Programming: Eliminate template recursions as much as possible.
- [ ] IPC: Better ways to communicate between Interpreter (Python) and Executer (C++). - [ ] IPC: Better ways to communicate between Interpreter (Python) and Executer (C++).
- [ ] Sockets? stdin/stdout capture? - [ ] Sockets? stdin/stdout capture?
## Requirements ## Requirements
Recent version of Linux, Windows or MacOS, with recent C++ compiler that has C++17 (1z) support (e.g. gcc 6.0, MSVC 2017, clang 6.0), and python 3.6 or above. Recent version of Linux, Windows or MacOS, with recent C++ compiler that has C++17 (1z) support.
- GCC: 9.0 or above (g++ 7.x, 8.x fail to handle variadic template expansion due to compiler bug)
- Clang: 6.0 or above (Recommended)
- MSVC: 2019 or later
## Usage ## Usage
`python3 prompt.py` will launch the interactive command prompt. The server binary will be autometically rebuilt and started. `python3 prompt.py` will launch the interactive command prompt. The server binary will be autometically rebuilt and started.
#### Commands: #### Commands:
- `<sql statement>`: parse sql statement - `<sql statement>`: parse sql statement
- `f <filename>`: parse all sql statements in file - `f <filename>`: parse all sql statements in file
- `dbg` start debugging session
- `print`: printout parsed sql statements - `print`: printout parsed sql statements
- `exec`: execute last parsed statement(s) - `exec`: execute last parsed statement(s) with AQuery Execution Engine. AQuery Execution Engine executes query by compiling it to C++ code and then executing it.
- `xexec`: execute last parsed statement(s) with Hybrid Execution Engine. Hybrid Execution Engine decouples the query into two parts. The sql-compliant part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed.
- `r`: run the last generated code snippet - `r`: run the last generated code snippet
- `save <OPTIONAL: filename>`: save current code snippet. will use random filename if not specified. - `save <OPTIONAL: filename>`: save current code snippet. will use random filename if not specified.
- `exit`: quit the prompt - `exit`: quit the prompt
#### Example: #### Example:
`f moving_avg.a` <br> `f moving_avg.a` <br>
`exec` `xexec`

@ -7,6 +7,7 @@
# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # Contact: Kyle Lahnakoski (kyle@lahnakoski.com)
# #
from sre_parse import WHITESPACE
from mo_parsing.helpers import restOfLine from mo_parsing.helpers import restOfLine
from mo_parsing.infix import delimited_list from mo_parsing.infix import delimited_list
from mo_parsing.whitespaces import NO_WHITESPACE, Whitespace from mo_parsing.whitespaces import NO_WHITESPACE, Whitespace
@ -648,9 +649,8 @@ def parser(literal_string, ident, sqlserver=False):
+ Optional(assign("where", expr)) + Optional(assign("where", expr))
) / to_json_call ) / to_json_call
load = ( load_data = (
keyword("load")("op") keyword("data").suppress()
+ keyword("data").suppress()
+ keyword("infile")("loc") + keyword("infile")("loc")
+ literal_string ("file") + literal_string ("file")
+ INTO + INTO
@ -662,6 +662,42 @@ def parser(literal_string, ident, sqlserver=False):
+ keyword("by").suppress() + keyword("by").suppress()
+ literal_string ("term") + literal_string ("term")
) )
)
module_func_def = (
var_name("fname")
+ LB
+ delimited_list(
(
var_name("arg")
+ COLON
+ var_name("type")
)("vars")
)
+ RB
+ LAMBDA
+ var_name("ret_type")
)
load_module = (
keyword("module").suppress()
+ FROM
+ literal_string ("file")
+ Optional(
keyword("FUNCTIONS").suppress()
+ LB
+ module_func_def("funcs")
+ ZeroOrMore(Suppress(',')
+ module_func_def("funcs"),
Whitespace()
)
+ RB
)
)
load = (
keyword("load")("op")
+ (load_data | load_module)
) ("load") ) ("load")

@ -1,3 +1,4 @@
from copy import deepcopy
from engine.utils import defval from engine.utils import defval
from aquery_config import have_hge from aquery_config import have_hge
from typing import Dict, List from typing import Dict, List
@ -51,6 +52,12 @@ class Types:
else: else:
raise Exception(f'Illeagal cast: from {ty.name} to {self.name}.') raise Exception(f'Illeagal cast: from {ty.name} to {self.name}.')
def __call__(self, args):
arg_str = ', '.join([a.__str__() for a in args])
ret = deepcopy(self)
ret.sqlname = self.sqlname + f'({arg_str})'
return ret
def __repr__(self) -> str: def __repr__(self) -> str:
return self.sqlname return self.sqlname
def __str__(self) -> str: def __str__(self) -> str:

@ -24,6 +24,36 @@ Run prompt.py without supplying with any arguments to run in interactive mode.
parse only: parse the file and print out the AST parse only: parse the file and print out the AST
''' '''
prompt_help = '''\
******** AQuery Prompt Help *********
help:
print out this message
help commandline:
print help message for AQuery Commandline
<sql statement>:
parse sql statement
f <query file>:
parse all AQuery statements in file
script <AQuery Script file>:
run AQuery Script in file
dbg:
start debugging session with current context
print:
printout parsed sql statements
exec:
execute last parsed statement(s) with AQuery Execution Engine
xexec:
execute last parsed statement(s) with Hybrid Execution Engine
r:
run the last generated code snippet
save <OPTIONAL: filename>:
save current code snippet. will use timestamp as filename if not specified.
exit or Ctrl+C:
exit prompt mode
'''
if __name__ == '__main__': if __name__ == '__main__':
import mimetypes import mimetypes
mimetypes._winreg = None mimetypes._winreg = None
@ -308,7 +338,13 @@ def main(running = lambda:True, next = input, state = None):
if subprocess.call(['make', 'snippet'], stdout = nullstream) == 0: if subprocess.call(['make', 'snippet'], stdout = nullstream) == 0:
state.set_ready() state.set_ready()
continue continue
if q.startswith('help'):
qs = re.split(r'[ \t]', q)
if len(qs) > 1 and qs[1].startswith('c'):
print(help_message)
else:
print(prompt_help)
continue
elif q == 'xexec': # generate build and run (MonetDB Engine) elif q == 'xexec': # generate build and run (MonetDB Engine)
state.cfg.backend_type = Backend_Type.BACKEND_MonetDB.value state.cfg.backend_type = Backend_Type.BACKEND_MonetDB.value
cxt = xengine.exec(state.stmts, cxt, keep) cxt = xengine.exec(state.stmts, cxt, keep)

@ -1,10 +1,12 @@
from engine.types import * from engine.types import *
from engine.utils import enlist
class ColRef: class ColRef:
def __init__(self, _ty, cobj, table:'TableInfo', name, id, compound = False): def __init__(self, _ty, cobj, table:'TableInfo', name, id, compound = False, _ty_args = None):
self.type : Types = AnyT self.type : Types = AnyT
if type(_ty) is str: if type(_ty) is str:
self.type = builtin_types[_ty.lower()] self.type = builtin_types[_ty.lower()]
if _ty_args:
self.type = self.type(enlist(_ty_args))
elif type(_ty) is Types: elif type(_ty) is Types:
self.type = _ty self.type = _ty
self.cobj = cobj self.cobj = cobj
@ -47,9 +49,13 @@ class TableInfo:
def add_col(self, c, new = True, i = 0): def add_col(self, c, new = True, i = 0):
_ty = c['type'] _ty = c['type']
_ty_args = None
if type(_ty) is dict:
_ty_val = list(_ty.keys())[0]
_ty_args = _ty[_ty_val]
_ty = _ty_val
if new: if new:
_ty = _ty if type(c) is ColRef else list(_ty.keys())[0] col_object = ColRef(_ty, c, self, c['name'], len(self.columns), _ty_args = _ty_args)
col_object = ColRef(_ty, c, self, c['name'], len(self.columns))
else: else:
col_object = c col_object = c
c.table = self c.table = self

@ -153,8 +153,8 @@ extern "C" int __DLLEXPORT__ main(int argc, char** argv) {
#ifdef THREADING #ifdef THREADING
auto tp = new ThreadPool(); auto tp = new ThreadPool();
cxt->thread_pool = tp; cxt->thread_pool = tp;
#endif
#endif
const char* shmname; const char* shmname;
if (argc < 0) if (argc < 0)

@ -3,12 +3,8 @@
#include <stdint.h> #include <stdint.h>
class ThreadPool{ typedef int(*payload_fn_t)(void*);
struct payload_t{
public:
typedef void(*payload_fn_t)(void*);
struct payload_t{
payload_fn_t f; payload_fn_t f;
void* args; void* args;
constexpr payload_t(payload_fn_t f, void* args) noexcept constexpr payload_t(payload_fn_t f, void* args) noexcept
@ -18,7 +14,11 @@ public:
bool is_empty() const { return f && args; } bool is_empty() const { return f && args; }
void empty() { f = nullptr; args = nullptr; } void empty() { f = nullptr; args = nullptr; }
void operator()() { f(args); } void operator()() { f(args); }
}; };
class ThreadPool{
public:
ThreadPool(uint32_t n_threads = 0); ThreadPool(uint32_t n_threads = 0);
void enqueue_task(const payload_t& payload); void enqueue_task(const payload_t& payload);
bool busy(); bool busy();
@ -39,4 +39,31 @@ private:
}; };
class Trigger{
private:
void* triggers; //min-heap by t-rem
virtual void tick() = 0;
public:
Trigger(ThreadPool* tp);
};
class IntervalBasedTrigger : public Trigger{
public:
struct timer{
uint32_t interval; // in milliseconds
uint32_t time_remaining;
};
void add_trigger();
private:
void tick() override;
};
class CallbackBasedTrigger : public Trigger{
public:
void add_trigger();
private:
void tick() override;
};
#endif #endif

@ -1,4 +1,5 @@
LOAD MODULE FROM "test.so" LOAD MODULE FROM "test.so"
FUNCTIONS (div(a:int, b:int) -> double, FUNCTIONS (
div(a:int, b:int) -> double,
mulvec(a:int, b:vecfloat) -> vecfloat mulvec(a:int, b:vecfloat) -> vecfloat
); );

@ -0,0 +1,7 @@
CREATE TABLE types_test(names varchar(10), val real, id int)
LOAD DATA INFILE "datatypes.csv"
INTO TABLE types_test
FIELDS TERMINATED BY ","
select names, val * 10000 + id from types_test

@ -1,14 +0,0 @@
#include "udf.hpp"
int main(){
vector_type _a{1,2,3,4};
vector_type _b{2,3,3,5};
ColRef<int> a("a");
ColRef<int> b("b");
a.initfrom(_a, "a");
b.initfrom(_b, "b");
ColRef<decltype(covariances2_gettype(a,b,0))> ret{4};
covariances2(a,b,2,4,ret);
print(ret);
}

@ -1,5 +0,0 @@
#include "udf.hpp"
int main(){
}
Loading…
Cancel
Save