bug fixes, restructure, user module parsing

dev
Bill 2 years ago
parent 1732835692
commit 42c334af84

@ -18,6 +18,7 @@ info:
$(info $(OS))
$(info $(Threading))
$(info "test")
$(info $(CXX))
server.bin:
$(CXX) server/server.cpp server/io.cpp server/table.cpp $(OS_SUPPORT) $(Threading) -flto --std=c++1z -O3 -march=native -o server.bin
server.so:

@ -2,8 +2,18 @@
## Introduction
AQuery++ Database is a cross-platform, In-Memory Column-Store Database that incorporates compiled query execution.
Compiler frontend built on top of [mo-sql-parsing](https://github.com/klahnakoski/mo-sql-parsing).
## Architecture
### AQuery Compiler
- The query is first processed by the AQuery Compiler which is composed of a frontend that parses the query into AST and a backend that generates target code that delivers the query.
- Front end of AQuery++ Compiler is built on top of [mo-sql-parsing](https://github.com/klahnakoski/mo-sql-parsing) with modifications to handle AQuery dialect and extension.
- Backend of AQuery++ Compiler generates target code dependent on the Execution Engine. It can either be the C++ code for AQuery Execution Engine or sql and C++ post-processor for Hybrid Engine or k9 for the k9 Engine.
### Execution Engines
- AQuery++ supports different execution engines thanks to the decoupled compiler structure.
- AQuery Execution Engine: executes query by compiling the query plan to C++ code. Doesn't support joins and udf functions.
- Hybrid Execution Engine: decouples the query into two parts. The sql-compliant part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed.
- K9 Execution Engine (discontinued).
## Roadmap
- [x] SQL Parser -> AQuery Parser (Front End)
- [ ] AQuery-C++ Compiler (Back End)
@ -16,29 +26,39 @@ Compiler frontend built on top of [mo-sql-parsing](https://github.com/klahnakosk
- [x] Order by
- [x] Assumption
- [x] Flatten
- [ ] Multi-table
- [ ] Join
- [x] Multi-table
- [x] Join
- [ ] Subqueries
- [ ] -> Optimizing Compiler
## TODO:
- [ ] C++ Meta-Programming: Elimilate template recursions as much as possible.
- [ ] User Module load syntax parsing (fn definition/registration)
- [ ] User Module test
- [ ] Interval based triggers
- [ ] C++ Meta-Programming: Eliminate template recursions as much as possible.
- [ ] IPC: Better ways to communicate between Interpreter (Python) and Executer (C++).
- [ ] Sockets? stdin/stdout capture?
## Requirements
Recent version of Linux, Windows or MacOS, with recent C++ compiler that has C++17 (1z) support (e.g. gcc 6.0, MSVC 2017, clang 6.0), and python 3.6 or above.
Recent version of Linux, Windows or MacOS, with recent C++ compiler that has C++17 (1z) support.
- GCC: 9.0 or above (g++ 7.x, 8.x fail to handle variadic template expansion due to compiler bug)
- Clang: 6.0 or above (Recommended)
- MSVC: 2019 or later
## Usage
`python3 prompt.py` will launch the interactive command prompt. The server binary will be autometically rebuilt and started.
#### Commands:
- `<sql statement>`: parse sql statement
- `f <filename>`: parse all sql statements in file
- `dbg` start debugging session
- `print`: printout parsed sql statements
- `exec`: execute last parsed statement(s)
- `exec`: execute last parsed statement(s) with AQuery Execution Engine. AQuery Execution Engine executes query by compiling it to C++ code and then executing it.
- `xexec`: execute last parsed statement(s) with Hybrid Execution Engine. Hybrid Execution Engine decouples the query into two parts. The sql-compliant part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed.
- `r`: run the last generated code snippet
- `save <OPTIONAL: filename>`: save current code snippet. will use random filename if not specified.
- `exit`: quit the prompt
#### Example:
`f moving_avg.a` <br>
`exec`
`xexec`

@ -7,6 +7,7 @@
# Contact: Kyle Lahnakoski (kyle@lahnakoski.com)
#
from sre_parse import WHITESPACE
from mo_parsing.helpers import restOfLine
from mo_parsing.infix import delimited_list
from mo_parsing.whitespaces import NO_WHITESPACE, Whitespace
@ -648,9 +649,8 @@ def parser(literal_string, ident, sqlserver=False):
+ Optional(assign("where", expr))
) / to_json_call
load = (
keyword("load")("op")
+ keyword("data").suppress()
load_data = (
keyword("data").suppress()
+ keyword("infile")("loc")
+ literal_string ("file")
+ INTO
@ -662,6 +662,42 @@ def parser(literal_string, ident, sqlserver=False):
+ keyword("by").suppress()
+ literal_string ("term")
)
)
module_func_def = (
var_name("fname")
+ LB
+ delimited_list(
(
var_name("arg")
+ COLON
+ var_name("type")
)("vars")
)
+ RB
+ LAMBDA
+ var_name("ret_type")
)
load_module = (
keyword("module").suppress()
+ FROM
+ literal_string ("file")
+ Optional(
keyword("FUNCTIONS").suppress()
+ LB
+ module_func_def("funcs")
+ ZeroOrMore(Suppress(',')
+ module_func_def("funcs"),
Whitespace()
)
+ RB
)
)
load = (
keyword("load")("op")
+ (load_data | load_module)
) ("load")

@ -1,3 +1,4 @@
from copy import deepcopy
from engine.utils import defval
from aquery_config import have_hge
from typing import Dict, List
@ -50,6 +51,12 @@ class Types:
return self.cast_from_dict[ty.name](ty)
else:
raise Exception(f'Illeagal cast: from {ty.name} to {self.name}.')
def __call__(self, args):
arg_str = ', '.join([a.__str__() for a in args])
ret = deepcopy(self)
ret.sqlname = self.sqlname + f'({arg_str})'
return ret
def __repr__(self) -> str:
return self.sqlname

@ -24,6 +24,36 @@ Run prompt.py without supplying with any arguments to run in interactive mode.
parse only: parse the file and print out the AST
'''
prompt_help = '''\
******** AQuery Prompt Help *********
help:
print out this message
help commandline:
print help message for AQuery Commandline
<sql statement>:
parse sql statement
f <query file>:
parse all AQuery statements in file
script <AQuery Script file>:
run AQuery Script in file
dbg:
start debugging session with current context
print:
printout parsed sql statements
exec:
execute last parsed statement(s) with AQuery Execution Engine
xexec:
execute last parsed statement(s) with Hybrid Execution Engine
r:
run the last generated code snippet
save <OPTIONAL: filename>:
save current code snippet. will use timestamp as filename if not specified.
exit or Ctrl+C:
exit prompt mode
'''
if __name__ == '__main__':
import mimetypes
mimetypes._winreg = None
@ -308,7 +338,13 @@ def main(running = lambda:True, next = input, state = None):
if subprocess.call(['make', 'snippet'], stdout = nullstream) == 0:
state.set_ready()
continue
if q.startswith('help'):
qs = re.split(r'[ \t]', q)
if len(qs) > 1 and qs[1].startswith('c'):
print(help_message)
else:
print(prompt_help)
continue
elif q == 'xexec': # generate build and run (MonetDB Engine)
state.cfg.backend_type = Backend_Type.BACKEND_MonetDB.value
cxt = xengine.exec(state.stmts, cxt, keep)
@ -429,7 +465,7 @@ def main(running = lambda:True, next = input, state = None):
sh.interact(banner = traceback.format_exc(), exitmsg = 'debugging session ended.')
save('', cxt)
rm(state)
raise
raise
rm(state)
## FUNCTIONS END

@ -1,10 +1,12 @@
from engine.types import *
from engine.utils import enlist
class ColRef:
def __init__(self, _ty, cobj, table:'TableInfo', name, id, compound = False):
def __init__(self, _ty, cobj, table:'TableInfo', name, id, compound = False, _ty_args = None):
self.type : Types = AnyT
if type(_ty) is str:
self.type = builtin_types[_ty.lower()]
if _ty_args:
self.type = self.type(enlist(_ty_args))
elif type(_ty) is Types:
self.type = _ty
self.cobj = cobj
@ -47,9 +49,13 @@ class TableInfo:
def add_col(self, c, new = True, i = 0):
_ty = c['type']
_ty_args = None
if type(_ty) is dict:
_ty_val = list(_ty.keys())[0]
_ty_args = _ty[_ty_val]
_ty = _ty_val
if new:
_ty = _ty if type(c) is ColRef else list(_ty.keys())[0]
col_object = ColRef(_ty, c, self, c['name'], len(self.columns))
col_object = ColRef(_ty, c, self, c['name'], len(self.columns), _ty_args = _ty_args)
else:
col_object = c
c.table = self

@ -153,9 +153,9 @@ extern "C" int __DLLEXPORT__ main(int argc, char** argv) {
#ifdef THREADING
auto tp = new ThreadPool();
cxt->thread_pool = tp;
#endif
const char* shmname;
if (argc < 0)
return dll_main(argc, argv, cxt);

@ -3,22 +3,22 @@
#include <stdint.h>
typedef int(*payload_fn_t)(void*);
struct payload_t{
payload_fn_t f;
void* args;
constexpr payload_t(payload_fn_t f, void* args) noexcept
: f(f), args(args) {}
constexpr payload_t() noexcept
: f(nullptr), args(nullptr) {};
bool is_empty() const { return f && args; }
void empty() { f = nullptr; args = nullptr; }
void operator()() { f(args); }
};
class ThreadPool{
public:
typedef void(*payload_fn_t)(void*);
struct payload_t{
payload_fn_t f;
void* args;
constexpr payload_t(payload_fn_t f, void* args) noexcept
: f(f), args(args) {}
constexpr payload_t() noexcept
: f(nullptr), args(nullptr) {};
bool is_empty() const { return f && args; }
void empty() { f = nullptr; args = nullptr; }
void operator()() { f(args); }
};
ThreadPool(uint32_t n_threads = 0);
void enqueue_task(const payload_t& payload);
bool busy();
@ -39,4 +39,31 @@ private:
};
class Trigger{
private:
void* triggers; //min-heap by t-rem
virtual void tick() = 0;
public:
Trigger(ThreadPool* tp);
};
class IntervalBasedTrigger : public Trigger{
public:
struct timer{
uint32_t interval; // in milliseconds
uint32_t time_remaining;
};
void add_trigger();
private:
void tick() override;
};
class CallbackBasedTrigger : public Trigger{
public:
void add_trigger();
private:
void tick() override;
};
#endif

@ -1,4 +1,5 @@
LOAD MODULE FROM "test.so"
FUNCTIONS (div(a:int, b:int) -> double,
FUNCTIONS (
div(a:int, b:int) -> double,
mulvec(a:int, b:vecfloat) -> vecfloat
);

@ -0,0 +1,7 @@
CREATE TABLE types_test(names varchar(10), val real, id int)
LOAD DATA INFILE "datatypes.csv"
INTO TABLE types_test
FIELDS TERMINATED BY ","
select names, val * 10000 + id from types_test

@ -1,14 +0,0 @@
#include "udf.hpp"
int main(){
vector_type _a{1,2,3,4};
vector_type _b{2,3,3,5};
ColRef<int> a("a");
ColRef<int> b("b");
a.initfrom(_a, "a");
b.initfrom(_b, "b");
ColRef<decltype(covariances2_gettype(a,b,0))> ret{4};
covariances2(a,b,2,4,ret);
print(ret);
}

@ -1,5 +0,0 @@
#include "udf.hpp"
int main(){
}
Loading…
Cancel
Save