updated documentations.

dev
Bill 2 years ago
parent 4974db3117
commit 98890884da

@ -119,4 +119,4 @@ docker:
clean: clean:
rm .cached *.shm *.o dll.so server.so server.bin libaquery.a libaquery.lib -rf 2> $(NULL_DEVICE) || true rm .cached *.shm *.o dll.so server.so server.bin libaquery.a libaquery.lib -rf 2> $(NULL_DEVICE) || true
.PHONY: clean

@ -19,6 +19,13 @@ AQuery++ Database is a cross-platform, In-Memory Column-Store Database that inco
## Installation ## Installation
AQuery is tested on mainstream operating systems such as Windows, macOS and Linux AQuery is tested on mainstream operating systems such as Windows, macOS and Linux
### Docker (Recommended):
- See installation instructions from [docker.com](https://www.docker.com). Run **docker desktop** to start docker engine.
- In AQuery root directory, type `make docker` to build the docker image from scratch.
- For Arm-based Mac users, you would have to build and run the **x86_64** docker image because MonetDB doesn't offer official binaries for arm64 Linux. (Run `docker buildx build --platform=linux/amd64 -t aquery .` instead of `make docker`)
- Finally run the image in **interactive** mode (`docker run -it --rm aquery`)
- If there is a need to access the system shell, type `dbg` to activate python interpreter and type `os.system('sh')` to launch a shell.
### Windows ### Windows
There're multiple options to run AQuery on Windows. You can use the native toolchain from Microsoft Visual Studio or gcc from Cygwin/MinGW or run it under Windows Subsystem for Linux. There're multiple options to run AQuery on Windows. You can use the native toolchain from Microsoft Visual Studio or gcc from Cygwin/MinGW or run it under Windows Subsystem for Linux.
@ -68,19 +75,19 @@ There're multiple options to run AQuery on Windows. You can use the native toolc
In this case, upgrade anaconda or your compiler or use the python from your OS or package manager instead. Or (**NOT recommended**) copy/link the library from your system (e.g. /usr/lib/x86_64-linux-gnu/libstdc++.so.6) to anaconda's library directory (e.g. ~/Anaconda3/lib/). In this case, upgrade anaconda or your compiler or use the python from your OS or package manager instead. Or (**NOT recommended**) copy/link the library from your system (e.g. /usr/lib/x86_64-linux-gnu/libstdc++.so.6) to anaconda's library directory (e.g. ~/Anaconda3/lib/).
### Docker:
- See installation instructions from [docker.com](https://www.docker.com). Run docker desktop to start docker engine.
- In AQuery root directory, type `make docker` to build the docker image from scratch.
- For Arm-based Mac users, you would have to build and run the **x86_64** docker image because MonetDB doesn't offer official binaries for arm64 Linux. (Run `docker buildx build --platform=linux/amd64 -t aquery .` instead of `make docker`)
- Finally run the image in **interactive** mode (`docker run -it --rm aquery`)
## Usage ## Usage
`python3 prompt.py` will launch the interactive command prompt. The server binary will be automatically rebuilt and started. `python3 prompt.py` will launch the interactive command prompt. The server binary will be automatically rebuilt and started.
#### Commands: #### Commands:
- `<sql statement>`: parse AQuery statement - `<sql statement>`: parse AQuery statement
- `f <filename>`: parse all AQuery statements in file - `f <filename>`: parse all AQuery statements in file
- `exec`: execute last parsed statement(s) with Hybrid Execution Engine. Hybrid Execution Engine decouples the query into two parts. The standard SQL (MonetDB dialect) part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed. - `exec`: execute last parsed statement(s) with Hybrid Execution Engine. Hybrid Execution Engine decouples the query into two parts. The standard SQL (MonetDB dialect) part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed.
- `dbg` start debugging session - `stats <OPTIONAL: options>` configure statistics.
- `print`: printout parsed AQuery statements - no options: show statistics for all queries so far.
- `on` : statistics will be shown for every future query.
- `off`: statistics will not be shown for every future query.
- `dbg` start python interactive interpreter at the current context.
- `print`: print parsed AQuery statements (AST in JSON form)
- `save <OPTIONAL: filename>`: save current code snippet. will use random filename if not specified. - `save <OPTIONAL: filename>`: save current code snippet. will use random filename if not specified.
- `exit`: quit the prompt - `exit`: quit the prompt
- `r`: run the last generated code snippet - `r`: run the last generated code snippet
@ -116,14 +123,16 @@ See ./tests/ for more examples.
- [x] Order by - [x] Order by
- [x] Assumption - [x] Assumption
- [x] Flatten - [x] Flatten
- [x] UDFs (Hybrid Engine only)
- [x] User Module
- [ ] Triggers
- [x] Join (Hybrid Engine only) - [x] Join (Hybrid Engine only)
- [ ] Subqueries - [ ] Subqueries
- [x] Query Optimization - [x] Query Optimization
- [x] Selection/Order by push-down - [x] Selection/Order by push-down
- [x] Join Optimization (Only in Hybrid Engine) - [x] Join Optimization (Only in Hybrid Engine)
- [ ] Threaded GC
- [ ] Extensibility
- [x] UDFs (Hybrid Engine only)
- [x] SDK and User Module
- [ ] Triggers
## Known Issues: ## Known Issues:
@ -133,6 +142,5 @@ See ./tests/ for more examples.
- [ ] Investigation: Using postproc only for q1 in Hybrid Engine (make is_special always on) - [ ] Investigation: Using postproc only for q1 in Hybrid Engine (make is_special always on)
- [ ] C++ Meta-Programming: Eliminate template recursions as much as possible. - [ ] C++ Meta-Programming: Eliminate template recursions as much as possible.
- [ ] Functionality: Basic helper functions in aquery - [ ] Functionality: Basic helper functions in aquery
- [x] Improvement: More DDLs, e.g. drop table, update table, etc.
- [ ] Bug: Join-Aware Column management - [ ] Bug: Join-Aware Column management
- [ ] Bug: Order By after Group By - [ ] Bug: Order By after Group By

@ -1,4 +1,6 @@
#!/usr/bash #!/usr/bash
echo "Don't execute this script if it's not on CIMS servers."
echo "run this script with source command. e.g. \`source ./cims.sh\` or \`. ./cims.sh\`"
module load g++-11.2 module load g++-11.2
PWD=`pwd` PWD=`pwd`
export LD_LIBRARY_PATH=$PWD/usr/lib64:$LD_LIBRARY_PATH:/lib:/lib64:/usr/lib:/usr/lib64 export LD_LIBRARY_PATH=$PWD/usr/lib64:$LD_LIBRARY_PATH:/lib:/lib64:/usr/lib:/usr/lib64

@ -118,8 +118,50 @@ class Backend_Type(enum.Enum):
BACKEND_MonetDB = 1 BACKEND_MonetDB = 1
BACKEND_MariaDB = 2 BACKEND_MariaDB = 2
@dataclass
class QueryStats:
last_time : int = time.time()
parse_time : int = 0
codegen_time : int = 0
compile_time : int = 0
exec_time : int = 0
need_print : bool = False
def clear(self):
self.parse_time = 0
self.codegen_time = 0
self.compile_time = 0
self.exec_time = 0
self.last_time = time.time()
def stop(self):
ret = time.time() - self.last_time
self.last_time = time.time()
return ret
def cumulate(self, other : Optional['QueryStats']):
if other:
other.parse_time += self.parse_time
other.codegen_time += self.codegen_time
other.compile_time += self.compile_time
other.exec_time += self.exec_time
def print(self, cumulative = None, clear = True, need_print = True):
if self.need_print:
if cumulative:
self.exec_time = self.stop()
self.cumulate(cumulative)
if need_print:
print(f'Parse Time: {self.parse_time}, Codegen Time: {self.codegen_time}, Compile Time: {self.compile_time}, Execution Time: {self.exec_time}.')
print(f'Total Time: {self.parse_time + self.codegen_time + self.compile_time + self.exec_time}')
self.need_print = False
if clear:
self.clear()
class Config: class Config:
__all_attrs__ = ['running', 'new_query', 'server_mode', 'backend_type', 'has_dll', 'n_buffers'] __all_attrs__ = ['running', 'new_query', 'server_mode',
'backend_type', 'has_dll',
'postproc_time', 'sql_time',
'n_buffers'
]
__init_attributes__ = False __init_attributes__ = False
@staticmethod @staticmethod
@ -134,7 +176,7 @@ class Config:
def __init__(self, mode, nq = 0, n_bufs = 0, bf_szs = []) -> None: def __init__(self, mode, nq = 0, n_bufs = 0, bf_szs = []) -> None:
Config.__init_self__() Config.__init_self__()
self.int_size = 4 self.int_size = 4
self.n_attrib = 6 self.n_attrib = len(Config.__all_attrs__)
self.buf = bytearray((self.n_attrib + n_bufs) * self.int_size) self.buf = bytearray((self.n_attrib + n_bufs) * self.int_size)
self.np_buf = np.ndarray(shape=(self.n_attrib), buffer=self.buf, dtype=np.int32) self.np_buf = np.ndarray(shape=(self.n_attrib), buffer=self.buf, dtype=np.int32)
self.new_query = nq self.new_query = nq
@ -179,6 +221,9 @@ class PromptState():
init : Callable[['PromptState'], None] = lambda _:None init : Callable[['PromptState'], None] = lambda _:None
stmts = [''] stmts = ['']
payloads = {} payloads = {}
need_print : bool = False
stats : Optional[QueryStats] = None
currstats : Optional[QueryStats] = None
buildmgr : Optional[build_manager]= None buildmgr : Optional[build_manager]= None
## CLASSES END ## CLASSES END
@ -274,7 +319,9 @@ def init_prompt() -> PromptState:
state.buildmgr = build_manager() state.buildmgr = build_manager()
state.buildmgr.build_caches() state.buildmgr.build_caches()
state.cfg = Config(state.server_mode) state.cfg = Config(state.server_mode)
state.stats = QueryStats()
state.currstats = QueryStats()
if state.server_mode == RunType.IPC: if state.server_mode == RunType.IPC:
atexit.register(lambda: rm(state)) atexit.register(lambda: rm(state))
state.init = init_ipc state.init = init_ipc
@ -327,15 +374,18 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
payload = None payload = None
keep = True keep = True
cxt = engine.initialize() cxt = engine.initialize()
# state.currstats = QueryStats()
# state.need_print = False
while running(): while running():
try: try:
if state.server_status(): if state.server_status():
state.init() state.init()
while state.get_ready(): while state.get_ready():
time.sleep(.00001) time.sleep(.00001)
state.currstats.print(state.stats, need_print=state.need_print)
try: try:
og_q : str = next() og_q : str = next()
state.currstats.stop()
except EOFError: except EOFError:
print('stdin inreadable, Exiting...') print('stdin inreadable, Exiting...')
exit(0) exit(0)
@ -376,20 +426,25 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
state.send(sz, payload) state.send(sz, payload)
except TypeError as e: except TypeError as e:
print(e) print(e)
state.currstats.codegen_time = state.currstats.stop()
state.currstats.compile_time = 0
state.currstats.exec_time = 0
qs = re.split(r'[ \t]', q) qs = re.split(r'[ \t]', q)
build_this = not(len(qs) > 1 and qs[1].startswith('n')) build_this = not(len(qs) > 1 and qs[1].startswith('n'))
if cxt.has_dll: if cxt.has_dll:
with open('out.cpp', 'wb') as outfile: with open('out.cpp', 'wb') as outfile:
outfile.write((cxt.finalize()).encode('utf-8')) outfile.write((cxt.finalize()).encode('utf-8'))
state.currstats.codegen_time += state.currstats.stop()
if build_this: if build_this:
state.buildmgr.build_dll() state.buildmgr.build_dll()
state.cfg.has_dll = 1 state.cfg.has_dll = 1
else: else:
state.cfg.has_dll = 0 state.cfg.has_dll = 0
state.currstats.compile_time = state.currstats.stop()
if build_this: if build_this:
state.set_ready() state.set_ready()
state.currstats.need_print = True
continue continue
elif q == 'dbg': elif q == 'dbg':
@ -469,6 +524,22 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
with open(filename, 'wb') as outfile: with open(filename, 'wb') as outfile:
outfile.write((cxt.finalize()).encode('utf-8')) outfile.write((cxt.finalize()).encode('utf-8'))
continue continue
elif q.startswith('stats'):
qs = re.split(r'[ \t]', q)
if len(qs) > 1:
if qs[1].startswith('on'):
state.need_print = True
continue
elif qs[1].startswith('off'):
state.need_print = False
continue
elif qs[1].startswith('last'):
state.currstats.need_print = True
state.currstats.print()
continue
state.stats.need_print = True
state.stats.print(clear = False)
continue
trimed = ws.sub(' ', q.lower()).split(' ') trimed = ws.sub(' ', q.lower()).split(' ')
if trimed[0].startswith('f'): if trimed[0].startswith('f'):
fn = 'stock.a' if len(trimed) <= 1 or len(trimed[1]) == 0 \ fn = 'stock.a' if len(trimed) <= 1 or len(trimed[1]) == 0 \
@ -480,9 +551,11 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
with open('tests/' + fn, 'r') as file: with open('tests/' + fn, 'r') as file:
contents = file.read() contents = file.read()
state.stmts = parser.parse(contents) state.stmts = parser.parse(contents)
state.currstats.parse_time = state.currstats.stop()
continue continue
state.stmts = parser.parse(q) state.stmts = parser.parse(q)
cxt.Info(state.stmts) cxt.Info(state.stmts)
state.currstats.parse_time = state.currstats.stop()
except ParseException as e: except ParseException as e:
print(e) print(e)
continue continue

@ -96,6 +96,9 @@ class projection(ast_node):
else: else:
self.where = None self.where = None
if type(self.datasource) is join:
self.datasource.process_join_conditions()
if 'groupby' in node: if 'groupby' in node:
self.context.special_gb = groupby.check_special(self, node['groupby']) self.context.special_gb = groupby.check_special(self, node['groupby'])
@ -624,6 +627,7 @@ class join(ast_node):
self.joins.append((alias(tbls.__str__()), tbls.have_sep)) self.joins.append((alias(tbls.__str__()), tbls.have_sep))
self.tables += tbls.tables self.tables += tbls.tables
self.tables_dir = {**self.tables_dir, **tbls.tables_dir} self.tables_dir = {**self.tables_dir, **tbls.tables_dir}
self.join_conditions += tbls.join_conditions
elif type(tbls) is TableInfo: elif type(tbls) is TableInfo:
self.joins.append((alias(tbls.table_name), False)) self.joins.append((alias(tbls.table_name), False))
@ -661,12 +665,17 @@ class join(ast_node):
if keys[0].lower().endswith('join'): if keys[0].lower().endswith('join'):
self.have_sep = True self.have_sep = True
j = join(self, node[keys[0]]) j = join(self, node[keys[0]])
self.join_conditions += j.join_conditions
tablename = f' {keys[0]} {j}' tablename = f' {keys[0]} {j}'
if len(keys) > 1 : if len(keys) > 1 :
_ex = expr(self, node[keys[1]])
if keys[1].lower() == 'on': if keys[1].lower() == 'on':
tablename += f' ON {expr(self, node[keys[1]])}' self.join_conditions += _ex.join_conditions
tablename += f' ON {_ex}'
elif keys[1].lower() == 'using': elif keys[1].lower() == 'using':
tablename += f' USING {expr(self, node[keys[1]])}' if _ex.is_ColExpr:
self.join_conditions += (_ex.raw_col, j.get_cols(_ex.raw_col.name))
tablename += f' USING {_ex}'
self.joins.append((tablename, self.have_sep)) self.joins.append((tablename, self.have_sep))
self.tables += j.tables self.tables += j.tables
self.tables_dir = {**self.tables_dir, **j.tables_dir} self.tables_dir = {**self.tables_dir, **j.tables_dir}
@ -711,7 +720,9 @@ class join(ast_node):
# TODO: join condition awareness # TODO: join condition awareness
def process_join_conditions(self): def process_join_conditions(self):
pass # This is done after both from
# and where clause are processed
print(self.join_conditions)
def consume(self, node): def consume(self, node):
self.sql = '' self.sql = ''
@ -720,7 +731,6 @@ class join(ast_node):
self.sql += j[0] # using JOIN keyword self.sql += j[0] # using JOIN keyword
else: else:
self.sql += ', ' + j[0] # using comma self.sql += ', ' + j[0] # using comma
self.process_join_conditions()
if node and self.sql and self.top_level: if node and self.sql and self.top_level:
self.sql = ' FROM ' + self.sql self.sql = ' FROM ' + self.sql

@ -219,7 +219,9 @@ class expr(ast_node):
if (is_joincond and len(self.children) == 2 if (is_joincond and len(self.children) == 2
and all([c.is_ColExpr for c in self.children])) : and all([c.is_ColExpr for c in self.children])) :
self.root.join_conditions.append((c.raw_col for c in self.children)) self.root.join_conditions.append(
self.children[0].raw_col, self.children[1].raw_col
)
if type(node) is str: if type(node) is str:
if self.is_udfexpr: if self.is_udfexpr:

@ -67,10 +67,15 @@ struct Context{
#define __AQEXPORT__(_Ty) extern "C" _Ty __DLLEXPORT__ #define __AQEXPORT__(_Ty) extern "C" _Ty __DLLEXPORT__
typedef void (*deallocator_t) (void*); typedef void (*deallocator_t) (void*);
extern void default_deallocator(void* ptr);
extern void* Aalloc(unsigned long long sz); extern void* Aalloc(unsigned long long sz,
deallocator_t deallocator = default_deallocator
);
extern void Afree(void * mem); extern void Afree(void * mem);
extern void register_memory(void* ptr, deallocator_t deallocator); extern void register_memory(void* ptr,
deallocator_t deallocator = default_deallocator
);
__AQEXPORT__(void) init_session(Context* cxt); __AQEXPORT__(void) init_session(Context* cxt);

@ -26,6 +26,10 @@ void register_memory(void* ptr, deallocator_t deallocator){
memmap->operator[](ptr) = deallocator; memmap->operator[](ptr) = deallocator;
} }
void default_deallocator(void* ptr){
free(ptr);
}
__AQEXPORT__(void) init_session(Context* cxt){ __AQEXPORT__(void) init_session(Context* cxt){
session = &cxt->current; session = &cxt->current;
} }

@ -17,7 +17,7 @@ enum Backend_Type {
}; };
struct Config{ struct Config{
int running, new_query, server_mode, int running, new_query, server_mode,
backend_type, has_dll, n_buffers; backend_type, has_dll, exec_time, n_buffers;
int buffer_sizes[]; int buffer_sizes[];
}; };

@ -297,6 +297,26 @@ template <class T>
using decays = typename decayS<typename std::decay<T>::type>::type; using decays = typename decayS<typename std::decay<T>::type>::type;
template <class T> template <class T>
using decay_inner = typename decayS<T>::type; using decay_inner = typename decayS<T>::type;
template <class T1, class T2>
struct aqis_same_impl {
constexpr static bool value =
std::conditional_t<
std::is_signed_v<T1> == std::is_signed_v<T2>,
Cond(
std::is_floating_point_v<T1> == std::is_floating_point_v<T2>,
Cond(
sizeof(T1) == sizeof(T2),
std::true_type,
std::false_type
),
std::false_type
),
std::false_type
>::value;
};
template <class T1, class T2>
constexpr bool aqis_same = aqis_same_impl<T1, T2>::value;
template <class, template <class...> class T> template <class, template <class...> class T>
struct instance_of_impl : std::false_type {}; struct instance_of_impl : std::false_type {};

@ -1,5 +1,7 @@
#!aquery #!aquery
# stats on
select "hello world" select "hello world"
xexec xexec
@ -39,4 +41,6 @@ echo Testing Sample Queries
f network.a f network.a
xexec xexec
stats
exit exit

Loading…
Cancel
Save