updated documentations.

dev
Bill 2 years ago
parent 4974db3117
commit 98890884da

@ -119,4 +119,4 @@ docker:
clean:
rm .cached *.shm *.o dll.so server.so server.bin libaquery.a libaquery.lib -rf 2> $(NULL_DEVICE) || true
.PHONY: clean

@ -19,6 +19,13 @@ AQuery++ Database is a cross-platform, In-Memory Column-Store Database that inco
## Installation
AQuery is tested on mainstream operating systems such as Windows, macOS and Linux
### Docker (Recommended):
- See installation instructions from [docker.com](https://www.docker.com). Run **docker desktop** to start docker engine.
- In AQuery root directory, type `make docker` to build the docker image from scratch.
- For Arm-based Mac users, you would have to build and run the **x86_64** docker image because MonetDB doesn't offer official binaries for arm64 Linux. (Run `docker buildx build --platform=linux/amd64 -t aquery .` instead of `make docker`)
- Finally run the image in **interactive** mode (`docker run -it --rm aquery`)
- If there is a need to access the system shell, type `dbg` to activate python interpreter and type `os.system('sh')` to launch a shell.
### Windows
There're multiple options to run AQuery on Windows. You can use the native toolchain from Microsoft Visual Studio or gcc from Cygwin/MinGW or run it under Windows Subsystem for Linux.
@ -68,19 +75,19 @@ There're multiple options to run AQuery on Windows. You can use the native toolc
In this case, upgrade anaconda or your compiler or use the python from your OS or package manager instead. Or (**NOT recommended**) copy/link the library from your system (e.g. /usr/lib/x86_64-linux-gnu/libstdc++.so.6) to anaconda's library directory (e.g. ~/Anaconda3/lib/).
### Docker:
- See installation instructions from [docker.com](https://www.docker.com). Run docker desktop to start docker engine.
- In AQuery root directory, type `make docker` to build the docker image from scratch.
- For Arm-based Mac users, you would have to build and run the **x86_64** docker image because MonetDB doesn't offer official binaries for arm64 Linux. (Run `docker buildx build --platform=linux/amd64 -t aquery .` instead of `make docker`)
- Finally run the image in **interactive** mode (`docker run -it --rm aquery`)
## Usage
`python3 prompt.py` will launch the interactive command prompt. The server binary will be automatically rebuilt and started.
#### Commands:
- `<sql statement>`: parse AQuery statement
- `f <filename>`: parse all AQuery statements in file
- `exec`: execute last parsed statement(s) with Hybrid Execution Engine. Hybrid Execution Engine decouples the query into two parts. The standard SQL (MonetDB dialect) part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed.
- `dbg` start debugging session
- `print`: printout parsed AQuery statements
- `stats <OPTIONAL: options>` configure statistics.
- no options: show statistics for all queries so far.
- `on` : statistics will be shown for every future query.
- `off`: statistics will not be shown for every future query.
- `dbg` start python interactive interpreter at the current context.
- `print`: print parsed AQuery statements (AST in JSON form)
- `save <OPTIONAL: filename>`: save current code snippet. will use random filename if not specified.
- `exit`: quit the prompt
- `r`: run the last generated code snippet
@ -116,14 +123,16 @@ See ./tests/ for more examples.
- [x] Order by
- [x] Assumption
- [x] Flatten
- [x] UDFs (Hybrid Engine only)
- [x] User Module
- [ ] Triggers
- [x] Join (Hybrid Engine only)
- [ ] Subqueries
- [x] Query Optimization
- [x] Selection/Order by push-down
- [x] Join Optimization (Only in Hybrid Engine)
- [ ] Threaded GC
- [ ] Extensibility
- [x] UDFs (Hybrid Engine only)
- [x] SDK and User Module
- [ ] Triggers
## Known Issues:
@ -133,6 +142,5 @@ See ./tests/ for more examples.
- [ ] Investigation: Using postproc only for q1 in Hybrid Engine (make is_special always on)
- [ ] C++ Meta-Programming: Eliminate template recursions as much as possible.
- [ ] Functionality: Basic helper functions in aquery
- [x] Improvement: More DDLs, e.g. drop table, update table, etc.
- [ ] Bug: Join-Aware Column management
- [ ] Bug: Order By after Group By

@ -1,4 +1,6 @@
#!/usr/bash
echo "Don't execute this script if it's not on CIMS servers."
echo "run this script with source command. e.g. \`source ./cims.sh\` or \`. ./cims.sh\`"
module load g++-11.2
PWD=`pwd`
export LD_LIBRARY_PATH=$PWD/usr/lib64:$LD_LIBRARY_PATH:/lib:/lib64:/usr/lib:/usr/lib64

@ -118,8 +118,50 @@ class Backend_Type(enum.Enum):
BACKEND_MonetDB = 1
BACKEND_MariaDB = 2
@dataclass
class QueryStats:
last_time : int = time.time()
parse_time : int = 0
codegen_time : int = 0
compile_time : int = 0
exec_time : int = 0
need_print : bool = False
def clear(self):
self.parse_time = 0
self.codegen_time = 0
self.compile_time = 0
self.exec_time = 0
self.last_time = time.time()
def stop(self):
ret = time.time() - self.last_time
self.last_time = time.time()
return ret
def cumulate(self, other : Optional['QueryStats']):
if other:
other.parse_time += self.parse_time
other.codegen_time += self.codegen_time
other.compile_time += self.compile_time
other.exec_time += self.exec_time
def print(self, cumulative = None, clear = True, need_print = True):
if self.need_print:
if cumulative:
self.exec_time = self.stop()
self.cumulate(cumulative)
if need_print:
print(f'Parse Time: {self.parse_time}, Codegen Time: {self.codegen_time}, Compile Time: {self.compile_time}, Execution Time: {self.exec_time}.')
print(f'Total Time: {self.parse_time + self.codegen_time + self.compile_time + self.exec_time}')
self.need_print = False
if clear:
self.clear()
class Config:
__all_attrs__ = ['running', 'new_query', 'server_mode', 'backend_type', 'has_dll', 'n_buffers']
__all_attrs__ = ['running', 'new_query', 'server_mode',
'backend_type', 'has_dll',
'postproc_time', 'sql_time',
'n_buffers'
]
__init_attributes__ = False
@staticmethod
@ -134,7 +176,7 @@ class Config:
def __init__(self, mode, nq = 0, n_bufs = 0, bf_szs = []) -> None:
Config.__init_self__()
self.int_size = 4
self.n_attrib = 6
self.n_attrib = len(Config.__all_attrs__)
self.buf = bytearray((self.n_attrib + n_bufs) * self.int_size)
self.np_buf = np.ndarray(shape=(self.n_attrib), buffer=self.buf, dtype=np.int32)
self.new_query = nq
@ -179,6 +221,9 @@ class PromptState():
init : Callable[['PromptState'], None] = lambda _:None
stmts = ['']
payloads = {}
need_print : bool = False
stats : Optional[QueryStats] = None
currstats : Optional[QueryStats] = None
buildmgr : Optional[build_manager]= None
## CLASSES END
@ -274,7 +319,9 @@ def init_prompt() -> PromptState:
state.buildmgr = build_manager()
state.buildmgr.build_caches()
state.cfg = Config(state.server_mode)
state.stats = QueryStats()
state.currstats = QueryStats()
if state.server_mode == RunType.IPC:
atexit.register(lambda: rm(state))
state.init = init_ipc
@ -327,15 +374,18 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
payload = None
keep = True
cxt = engine.initialize()
# state.currstats = QueryStats()
# state.need_print = False
while running():
try:
if state.server_status():
state.init()
while state.get_ready():
time.sleep(.00001)
state.currstats.print(state.stats, need_print=state.need_print)
try:
og_q : str = next()
state.currstats.stop()
except EOFError:
print('stdin inreadable, Exiting...')
exit(0)
@ -376,20 +426,25 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
state.send(sz, payload)
except TypeError as e:
print(e)
state.currstats.codegen_time = state.currstats.stop()
state.currstats.compile_time = 0
state.currstats.exec_time = 0
qs = re.split(r'[ \t]', q)
build_this = not(len(qs) > 1 and qs[1].startswith('n'))
if cxt.has_dll:
with open('out.cpp', 'wb') as outfile:
outfile.write((cxt.finalize()).encode('utf-8'))
state.currstats.codegen_time += state.currstats.stop()
if build_this:
state.buildmgr.build_dll()
state.cfg.has_dll = 1
else:
state.cfg.has_dll = 0
state.currstats.compile_time = state.currstats.stop()
if build_this:
state.set_ready()
state.currstats.need_print = True
continue
elif q == 'dbg':
@ -469,6 +524,22 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
with open(filename, 'wb') as outfile:
outfile.write((cxt.finalize()).encode('utf-8'))
continue
elif q.startswith('stats'):
qs = re.split(r'[ \t]', q)
if len(qs) > 1:
if qs[1].startswith('on'):
state.need_print = True
continue
elif qs[1].startswith('off'):
state.need_print = False
continue
elif qs[1].startswith('last'):
state.currstats.need_print = True
state.currstats.print()
continue
state.stats.need_print = True
state.stats.print(clear = False)
continue
trimed = ws.sub(' ', q.lower()).split(' ')
if trimed[0].startswith('f'):
fn = 'stock.a' if len(trimed) <= 1 or len(trimed[1]) == 0 \
@ -480,9 +551,11 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
with open('tests/' + fn, 'r') as file:
contents = file.read()
state.stmts = parser.parse(contents)
state.currstats.parse_time = state.currstats.stop()
continue
state.stmts = parser.parse(q)
cxt.Info(state.stmts)
state.currstats.parse_time = state.currstats.stop()
except ParseException as e:
print(e)
continue

@ -96,6 +96,9 @@ class projection(ast_node):
else:
self.where = None
if type(self.datasource) is join:
self.datasource.process_join_conditions()
if 'groupby' in node:
self.context.special_gb = groupby.check_special(self, node['groupby'])
@ -624,6 +627,7 @@ class join(ast_node):
self.joins.append((alias(tbls.__str__()), tbls.have_sep))
self.tables += tbls.tables
self.tables_dir = {**self.tables_dir, **tbls.tables_dir}
self.join_conditions += tbls.join_conditions
elif type(tbls) is TableInfo:
self.joins.append((alias(tbls.table_name), False))
@ -661,12 +665,17 @@ class join(ast_node):
if keys[0].lower().endswith('join'):
self.have_sep = True
j = join(self, node[keys[0]])
self.join_conditions += j.join_conditions
tablename = f' {keys[0]} {j}'
if len(keys) > 1 :
_ex = expr(self, node[keys[1]])
if keys[1].lower() == 'on':
tablename += f' ON {expr(self, node[keys[1]])}'
self.join_conditions += _ex.join_conditions
tablename += f' ON {_ex}'
elif keys[1].lower() == 'using':
tablename += f' USING {expr(self, node[keys[1]])}'
if _ex.is_ColExpr:
self.join_conditions += (_ex.raw_col, j.get_cols(_ex.raw_col.name))
tablename += f' USING {_ex}'
self.joins.append((tablename, self.have_sep))
self.tables += j.tables
self.tables_dir = {**self.tables_dir, **j.tables_dir}
@ -711,7 +720,9 @@ class join(ast_node):
# TODO: join condition awareness
def process_join_conditions(self):
pass
# This is done after both from
# and where clause are processed
print(self.join_conditions)
def consume(self, node):
self.sql = ''
@ -720,7 +731,6 @@ class join(ast_node):
self.sql += j[0] # using JOIN keyword
else:
self.sql += ', ' + j[0] # using comma
self.process_join_conditions()
if node and self.sql and self.top_level:
self.sql = ' FROM ' + self.sql

@ -219,7 +219,9 @@ class expr(ast_node):
if (is_joincond and len(self.children) == 2
and all([c.is_ColExpr for c in self.children])) :
self.root.join_conditions.append((c.raw_col for c in self.children))
self.root.join_conditions.append(
self.children[0].raw_col, self.children[1].raw_col
)
if type(node) is str:
if self.is_udfexpr:

@ -67,10 +67,15 @@ struct Context{
#define __AQEXPORT__(_Ty) extern "C" _Ty __DLLEXPORT__
typedef void (*deallocator_t) (void*);
extern void default_deallocator(void* ptr);
extern void* Aalloc(unsigned long long sz);
extern void* Aalloc(unsigned long long sz,
deallocator_t deallocator = default_deallocator
);
extern void Afree(void * mem);
extern void register_memory(void* ptr, deallocator_t deallocator);
extern void register_memory(void* ptr,
deallocator_t deallocator = default_deallocator
);
__AQEXPORT__(void) init_session(Context* cxt);

@ -26,6 +26,10 @@ void register_memory(void* ptr, deallocator_t deallocator){
memmap->operator[](ptr) = deallocator;
}
void default_deallocator(void* ptr){
free(ptr);
}
__AQEXPORT__(void) init_session(Context* cxt){
session = &cxt->current;
}

@ -17,7 +17,7 @@ enum Backend_Type {
};
struct Config{
int running, new_query, server_mode,
backend_type, has_dll, n_buffers;
backend_type, has_dll, exec_time, n_buffers;
int buffer_sizes[];
};

@ -297,6 +297,26 @@ template <class T>
using decays = typename decayS<typename std::decay<T>::type>::type;
template <class T>
using decay_inner = typename decayS<T>::type;
template <class T1, class T2>
struct aqis_same_impl {
constexpr static bool value =
std::conditional_t<
std::is_signed_v<T1> == std::is_signed_v<T2>,
Cond(
std::is_floating_point_v<T1> == std::is_floating_point_v<T2>,
Cond(
sizeof(T1) == sizeof(T2),
std::true_type,
std::false_type
),
std::false_type
),
std::false_type
>::value;
};
template <class T1, class T2>
constexpr bool aqis_same = aqis_same_impl<T1, T2>::value;
template <class, template <class...> class T>
struct instance_of_impl : std::false_type {};

@ -1,5 +1,7 @@
#!aquery
# stats on
select "hello world"
xexec
@ -39,4 +41,6 @@ echo Testing Sample Queries
f network.a
xexec
stats
exit

Loading…
Cancel
Save