From fb870c9977f852e88787f92ca8cd769b93963fd9 Mon Sep 17 00:00:00 2001 From: Bill Date: Fri, 23 Sep 2022 16:08:24 +0800 Subject: [PATCH 01/12] fixed wildcard compound cols, ratios, etc. --- build.py | 2 +- engine/types.py | 17 +++++++++---- engine/utils.py | 4 ++++ prompt.py | 4 ++-- reconstruct/ast.py | 46 +++++++++++++++++++++++++----------- reconstruct/expr.py | 9 +++---- reconstruct/new_expr.py | 4 ++-- sdk/aquery.h | 5 ++++ server/aggregations.h | 34 ++++++++++++++++++++++++++ server/table.h | 1 + server/table_ext_monetdb.hpp | 4 ++-- server/types.h | 29 +++++++++++++++++++++-- tests/best_profit.a | 3 +-- 13 files changed, 129 insertions(+), 33 deletions(-) diff --git a/build.py b/build.py index 3312a0b..aa10a7a 100644 --- a/build.py +++ b/build.py @@ -69,7 +69,7 @@ class checksums: class build_manager: sourcefiles = [ - 'build.py', + 'build.py', 'Makefile', 'server/server.cpp', 'server/io.cpp', 'server/monetdb_conn.cpp', 'server/threading.cpp', 'server/winhelper.cpp' diff --git a/engine/types.py b/engine/types.py index 3083795..6e49eea 100644 --- a/engine/types.py +++ b/engine/types.py @@ -106,7 +106,9 @@ ULongT = Types(8, name = 'uint64', sqlname = 'UINT64', fp_type=DoubleT) UIntT = Types(7, name = 'uint32', sqlname = 'UINT32', long_type=ULongT, fp_type=FloatT) UShortT = Types(6, name = 'uint16', sqlname = 'UINT16', long_type=ULongT, fp_type=FloatT) UByteT = Types(5, name = 'uint8', sqlname = 'UINT8', long_type=ULongT, fp_type=FloatT) -StrT = Types(200, name = 'str', cname = 'const char*', sqlname='VARCHAR', ctype_name = 'types::ASTR') +StrT = Types(200, name = 'str', cname = 'const char*', sqlname='TEXT', ctype_name = 'types::ASTR') +TextT = Types(200, name = 'text', cname = 'const char*', sqlname='TEXT', ctype_name = 'types::ASTR') +VarcharT = Types(200, name = 'varchar', cname = 'const char*', sqlname='VARCHAR', ctype_name = 'types::ASTR') VoidT = Types(200, name = 'void', cname = 'void', sqlname='Null', ctype_name = 'types::None') class VectorT(Types): @@ -138,7 +140,10 @@ int_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', LongT, ByteT, uint_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', ULongT, UByteT, UShortT, UIntT) fp_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', FloatT, DoubleT) temporal_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', DateT, TimeT, TimeStampT) -builtin_types : Dict[str, Types] = {**_ty_make_dict('t.sqlname.lower()', AnyT, StrT), **int_types, **fp_types, **temporal_types} +builtin_types : Dict[str, Types] = { + 'string' : StrT, + **_ty_make_dict('t.sqlname.lower()', AnyT, TextT, VarcharT), + **int_types, **fp_types, **temporal_types} def get_int128_support(): for t in int_types.values(): @@ -267,7 +272,9 @@ def windowed_fn_behavor(op: OperatorBase, c_code, *x): # arithmetic opadd = OperatorBase('add', 2, auto_extension, cname = '+', sqlname = '+', call = binary_op_behavior) -opdiv = OperatorBase('div', 2, fp(auto_extension), cname = '/', sqlname = '/', call = binary_op_behavior) +# monetdb wont extend int division to fp type +# opdiv = OperatorBase('div', 2, fp(auto_extension), cname = '/', sqlname = '/', call = binary_op_behavior) +opdiv = OperatorBase('div', 2, auto_extension, cname = '/', sqlname = '/', call = binary_op_behavior) opmul = OperatorBase('mul', 2, fp(auto_extension), cname = '*', sqlname = '*', call = binary_op_behavior) opsub = OperatorBase('sub', 2, auto_extension, cname = '-', sqlname = '-', call = binary_op_behavior) opmod = OperatorBase('mod', 2, auto_extension_int, cname = '%', sqlname = '%', call = binary_op_behavior) @@ -288,7 +295,9 @@ opdistinct = OperatorBase('distinct', 1, as_is, cname = '.distinct()', sqlname = fnmax = OperatorBase('max', 1, as_is, cname = 'max', sqlname = 'MAX', call = fn_behavior) fnmin = OperatorBase('min', 1, as_is, cname = 'min', sqlname = 'MIN', call = fn_behavior) fndeltas = OperatorBase('deltas', 1, as_is, cname = 'deltas', sqlname = 'DELTAS', call = fn_behavior) +fnratios = OperatorBase('ratios', [1, 2], fp(ty_clamp(as_is, -1)), cname = 'ratios', sqlname = 'RATIOS', call = windowed_fn_behavor) fnlast = OperatorBase('last', 1, as_is, cname = 'last', sqlname = 'LAST', call = fn_behavior) +fnfirst = OperatorBase('first', 1, as_is, cname = 'frist', sqlname = 'FRIST', call = fn_behavior) #fnsum = OperatorBase('sum', 1, ext(auto_extension), cname = 'sum', sqlname = 'SUM', call = fn_behavior) #fnavg = OperatorBase('avg', 1, fp(ext(auto_extension)), cname = 'avg', sqlname = 'AVG', call = fn_behavior) fnsum = OperatorBase('sum', 1, long_return, cname = 'sum', sqlname = 'SUM', call = fn_behavior) @@ -324,7 +333,7 @@ builtin_unary_logical = _op_make_dict(opnot) builtin_unary_arith = _op_make_dict(opneg) builtin_unary_special = _op_make_dict(spnull, opdistinct) builtin_cstdlib = _op_make_dict(fnsqrt, fnlog, fnsin, fncos, fntan, fnpow) -builtin_func = _op_make_dict(fnmax, fnmin, fnsum, fnavg, fnmaxs, fnmins, fndeltas, fnlast, fnsums, fnavgs, fncnt) +builtin_func = _op_make_dict(fnmax, fnmin, fnsum, fnavg, fnmaxs, fnmins, fndeltas, fnratios, fnlast, fnfirst, fnsums, fnavgs, fncnt) user_module_func = {} builtin_operators : Dict[str, OperatorBase] = {**builtin_binary_arith, **builtin_binary_logical, **builtin_unary_arith, **builtin_unary_logical, **builtin_unary_special, **builtin_func, **builtin_cstdlib, diff --git a/engine/utils.py b/engine/utils.py index 1a8b403..995d354 100644 --- a/engine/utils.py +++ b/engine/utils.py @@ -137,3 +137,7 @@ def add_dll_dir(dll: str): os.environ['PATH'] = os.path.abspath(dll) + os.pathsep + os.environ['PATH'] nullstream = open(os.devnull, 'w') + + +def clamp(val, minval, maxval): + return min(max(val, minval), maxval) \ No newline at end of file diff --git a/prompt.py b/prompt.py index 0f59d33..91fbbe2 100644 --- a/prompt.py +++ b/prompt.py @@ -29,7 +29,7 @@ prompt_help = '''\ ******** AQuery Prompt Help ********* help: - print out this message + print this help message help commandline: print help message for AQuery Commandline : @@ -420,7 +420,7 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None): continue elif q == 'format' or q == 'fmt': subprocess.call(['clang-format', 'out.cpp']) - elif q == 'exit': + elif q == 'exit' or q == 'exit()': rm(state) exit() elif q == 'r': # build and run diff --git a/reconstruct/ast.py b/reconstruct/ast.py index d6293c4..6ed4e63 100644 --- a/reconstruct/ast.py +++ b/reconstruct/ast.py @@ -133,7 +133,7 @@ class projection(ast_node): sql_expr = expr(self, e, c_code=False) this_type = proj_expr.type name = proj_expr.sql - compound = True # compound column + compound = [proj_expr.is_compound > 1] # compound column proj_expr.cols_mentioned = self.datasource.rec alias = '' if 'name' in proj: # renaming column by AS keyword @@ -142,23 +142,28 @@ class projection(ast_node): if not proj_expr.is_special: if proj_expr.node == '*': name = [c.get_full_name() for c in self.datasource.rec] + this_type = [c.type for c in self.datasource.rec] + compound = [c.compound for c in self.datasource.rec] + proj_expr = [expr(self, c.name) for c in self.datasource.rec] else: y = lambda x:x count = lambda : 'count(*)' name = enlist(sql_expr.eval(False, y, count=count)) - for n in name: + this_type = enlist(this_type) + proj_expr = enlist(proj_expr) + for t, n, pexpr in zip(this_type, name, proj_expr): offset = len(col_exprs) if n not in self.var_table: self.var_table[n] = offset - if proj_expr.is_ColExpr and type(proj_expr.raw_col) is ColRef: - for _alias in (proj_expr.raw_col.table.alias): + if pexpr.is_ColExpr and type(pexpr.raw_col) is ColRef: + for _alias in (pexpr.raw_col.table.alias): self.var_table[f'{_alias}.'+n] = offset - proj_map[i] = [this_type, offset, proj_expr] + proj_map[i] = [t, offset, pexpr] col_expr = n + ' AS ' + alias if alias else n if alias: self.var_table[alias] = offset - col_exprs.append((col_expr, proj_expr.type)) + col_exprs.append((col_expr, t)) i += 1 else: self.context.headers.add('"./server/aggregations.h"') @@ -169,7 +174,8 @@ class projection(ast_node): i += 1 name = enlist(name) disp_name = [get_legal_name(alias if alias else n) for n in name] - + this_type = enlist(this_type) + elif type(proj) is str: col = self.datasource.get_col(proj) this_type = col.type @@ -178,8 +184,8 @@ class projection(ast_node): # name = col.name self.datasource.rec = None # TODO: Type deduction in Python - for n in disp_name: - cols.append(ColRef(this_type, self.out_table, None, n, len(cols), compound=compound)) + for t, n, c in zip(this_type, disp_name, compound): + cols.append(ColRef(t, self.out_table, None, n, len(cols), compound=c)) self.out_table.add_cols(cols, new = False) @@ -213,7 +219,7 @@ class projection(ast_node): self.add(self.group_node.sql) if self.col_ext or self.group_node and self.group_node.use_sp_gb: - self.use_postproc = True + self.has_postproc = True o = self.assumptions if 'orderby' in node: @@ -223,7 +229,7 @@ class projection(ast_node): if 'outfile' in node: self.outfile = outfile(self, node['outfile'], sql = self.sql) - if not self.use_postproc: + if not self.has_postproc: self.sql += self.outfile.sql else: self.outfile = None @@ -279,11 +285,12 @@ class projection(ast_node): val[2].cols_mentioned.intersection( self.datasource.all_cols().difference(self.group_node.refs)) ) and val[2].is_compound # compound val not in key - # or + or + val[2].is_compound > 1 # (not self.group_node and val[2].is_compound) ): out_typenames[key] = f'ColRef<{out_typenames[key]}>' - + self.out_table.columns[key].compound = True outtable_col_nameslist = ', '.join([f'"{c.name}"' for c in self.out_table.columns]) self.outtable_col_names = 'names_' + base62uuid(4) self.context.emitc(f'const char* {self.outtable_col_names}[] = {{{outtable_col_nameslist}}};') @@ -729,6 +736,17 @@ class create_table(ast_node): name = 'create_table' first_order = name def init(self, node): + node = node[self.name] + if 'query' in node: + if 'name' not in node: + raise ValueError("Table name not specified") + projection_node = node['query'] + projection_node['into'] = node['name'] + projection(None, projection_node, self.context) + self.produce = lambda *_: None + self.spawn = lambda *_: None + self.consume = lambda *_: None + return if self.parent is None: self.context.sql_begin() self.sql = 'CREATE TABLE ' @@ -851,7 +869,7 @@ class outfile(ast_node): def init(self, _): assert(isinstance(self.parent, projection)) - if not self.parent.use_postproc: + if not self.parent.has_postproc: if self.context.dialect == 'MonetDB': self.produce = self.produce_monetdb else: diff --git a/reconstruct/expr.py b/reconstruct/expr.py index 504ab8e..e5980d4 100644 --- a/reconstruct/expr.py +++ b/reconstruct/expr.py @@ -80,7 +80,7 @@ class expr(ast_node): self.udf_map = parent.context.udf_map self.func_maps = {**builtin_func, **self.udf_map, **user_module_func} self.operators = {**builtin_operators, **self.udf_map, **user_module_func} - self.ext_aggfuncs = ['sum', 'avg', 'count', 'min', 'max', 'last'] + self.ext_aggfuncs = ['sum', 'avg', 'count', 'min', 'max', 'last', 'first'] def produce(self, node): from engine.utils import enlist @@ -114,9 +114,9 @@ class expr(ast_node): str_vals = [e.sql for e in exp_vals] type_vals = [e.type for e in exp_vals] - is_compound = any([e.is_compound for e in exp_vals]) + is_compound = max([e.is_compound for e in exp_vals]) if key in self.ext_aggfuncs: - self.is_compound = False + self.is_compound = max(0, is_compound - 1) else: self.is_compound = is_compound try: @@ -134,7 +134,7 @@ class expr(ast_node): self.sql = op(self.c_code, *str_vals) special_func = [*self.context.udf_map.keys(), *self.context.module_map.keys(), - "maxs", "mins", "avgs", "sums", "deltas", "last"] + "maxs", "mins", "avgs", "sums", "deltas", "last", "first", "ratios"] if self.context.special_gb: special_func = [*special_func, *self.ext_aggfuncs] @@ -259,6 +259,7 @@ class expr(ast_node): self.sql = table_name + self.raw_col.name self.type = self.raw_col.type self.is_compound = True + self.is_compound += self.raw_col.compound self.opname = self.raw_col else: self.sql = '\'' + node + '\'' if node != '*' else '*' diff --git a/reconstruct/new_expr.py b/reconstruct/new_expr.py index d12ef56..b6b02cf 100644 --- a/reconstruct/new_expr.py +++ b/reconstruct/new_expr.py @@ -16,7 +16,7 @@ class expr_base(ast_node, metaclass = abc.ABCMeta): self.udf_map = self.context.udf_map self.func_maps = {**builtin_func, **self.udf_map, **user_module_func} self.operators = {**builtin_operators, **self.udf_map, **user_module_func} - self.narrow_funcs = ['sum', 'avg', 'count', 'min', 'max', 'last'] + self.narrow_funcs = ['sum', 'avg', 'count', 'min', 'max', 'last', 'first'] def get_variable(self): pass @@ -56,7 +56,7 @@ class expr_base(ast_node, metaclass = abc.ABCMeta): raise ValueError(f'Parse Error: more than 1 entry in {node}.') key, val = next(iter(node.items())) if key in self.operators: - self.child_exprs = [__class__(self, v) for v in val] + self.child_exprs = [self.__class__(self, v) for v in val] self.process_child_nodes() else: self.process_non_operator(key, val) diff --git a/sdk/aquery.h b/sdk/aquery.h index 3ef5bb6..2fe0404 100644 --- a/sdk/aquery.h +++ b/sdk/aquery.h @@ -75,7 +75,12 @@ extern void register_memory(void* ptr, deallocator_t deallocator); __AQEXPORT__(void) init_session(Context* cxt); #define __AQ_NO_SESSION__ __AQEXPORT__(void) init_session(Context*) {} + +#ifdef _MSC_VER +void* _cdecl memcpy(void*, void*, size_t); +#else void* memcpy(void*, const void*, unsigned long long); +#endif struct ColRef_storage { void* container; unsigned int capacity, size; diff --git a/server/aggregations.h b/server/aggregations.h index 514e4a6..48a4b35 100644 --- a/server/aggregations.h +++ b/server/aggregations.h @@ -106,6 +106,27 @@ decayed_t maxw(uint32_t w, const VT& arr) { return ret; } +template class VT> +decayed_t> ratiow(uint32_t w, const VT& arr) { + typedef std::decay_t> FPType; + uint32_t len = arr.size; + if (arr.size <= w) + len = 1; + w = w > len ? len : w; + decayed_t ret(arr.size); + ret[0] = 0; + for (uint32_t i = 0; i < w; ++i) + ret[i] = arr[i] / (FPType)arr[0]; + for (uint32_t i = w; i < arr.size; ++i) + ret[i] = arr[i] / (FPType) arr[i - w]; + return ret; +} + +template class VT> +decayed_t> ratios(const VT& arr) { + return ratiow(1, arr); +} + template class VT> decayed_t> sums(const VT& arr) { const uint32_t& len = arr.size; @@ -171,10 +192,21 @@ decayed_t deltas(const VT& arr) { template class VT> T last(const VT& arr) { + if(!arr.size) return 0; const uint32_t& len = arr.size; return arr[arr.size - 1]; } +template class VT> +T first(const VT& arr) { + if(!arr.size) return 0; + const uint32_t& len = arr.size; + return arr[0]; +} + +#define __DEFAULT_AGGREGATE_FUNCTION__(NAME, RET) \ +template constexpr inline T NAME(const T& v) { return RET; } + // wrong behavior with count(0) template constexpr inline T count(const T& v) { return 1; } template constexpr inline T max(const T& v) { return v; } @@ -185,9 +217,11 @@ template constexpr inline T maxw(uint32_t, const T& v) { return v; } template constexpr inline T minw(uint32_t, const T& v) { return v; } template constexpr inline T avgw(uint32_t, const T& v) { return v; } template constexpr inline T sumw(uint32_t, const T& v) { return v; } +template constexpr inline T ratiow(uint32_t, const T& v) { return 1; } template constexpr inline T maxs(const T& v) { return v; } template constexpr inline T mins(const T& v) { return v; } template constexpr inline T avgs(const T& v) { return v; } template constexpr inline T sums(const T& v) { return v; } template constexpr inline T last(const T& v) { return v; } template constexpr inline T daltas(const T& v) { return 0; } +template constexpr inline T ratios(const T& v) { return 1; } diff --git a/server/table.h b/server/table.h index 1e07968..e5502af 100644 --- a/server/table.h +++ b/server/table.h @@ -395,6 +395,7 @@ struct TableInfo { + num_time * types::time_t::string_length() + num_date * types::date_t::string_length() + num_timestamp * types::timestamp_t::string_length() + + 1 // padding for msvc not allowing empty arrays ]; setgbuf(cbuf); if(view) diff --git a/server/table_ext_monetdb.hpp b/server/table_ext_monetdb.hpp index e74cfef..66c925a 100644 --- a/server/table_ext_monetdb.hpp +++ b/server/table_ext_monetdb.hpp @@ -44,8 +44,8 @@ void TableInfo::monetdb_append_table(void* srv, const char* alt_name) { puts("getcols done"); for(int i = 0; i < sizeof...(Ts); ++i) { - printf("no:%d name: %s count:%d data: %p \n", - i, monetdbe_cols[i]->name, monetdbe_cols[i]->count, monetdbe_cols[i]->data); + printf("no:%d name: %s count:%d data: %p type:%d \n", + i, monetdbe_cols[i]->name, monetdbe_cols[i]->count, monetdbe_cols[i]->data, monetdbe_cols[i]->type); } std::string create_table_str = "CREATE TABLE "; create_table_str += alt_name; diff --git a/server/types.h b/server/types.h index d4a5656..13d1b72 100644 --- a/server/types.h +++ b/server/types.h @@ -29,7 +29,7 @@ namespace types { static constexpr const char* printf_str[] = { "%d", "%f", "%s", "%lf", "%Lf", "%ld", "%d", "%hi", "%s", "%s", "%c", "%u", "%lu", "%s", "%hu", "%hhu", "%s", "%s", "Vector<%s>", "%s", "NULL", "ERROR" }; static constexpr const char* SQL_Type[] = { "INT", "REAL", "TEXT", "DOUBLE", "DOUBLE", "BIGINT", "HUGEINT", "SMALLINT", "DATE", "TIME", "TINYINT", - "INT", "BIGINT", "HUGEINT", "SMALLINT", "TINYINT", "BIGINT", "BOOL", "BIGINT", "TIMESTAMP", "NULL", "ERROR"}; + "INT", "BIGINT", "HUGEINT", "SMALLINT", "TINYINT", "BOOL", "BIGINT", "TIMESTAMP", "NULL", "ERROR"}; // TODO: deal with data/time <=> str/uint conversion @@ -167,8 +167,33 @@ namespace types { }; template using GetLongType = typename GetLongTypeImpl::type>::type; + + + template + struct GetLongerTypeImpl { + using type = Cond( + + __U(T), Cond(__Eq(char), unsigned short, + Cond(__Eq(short), unsigned int, + Cond(__Eq(int), unsigned long long, + ULL_Type + ))), + + Cond(Fp(T), double, + + Cond(__Eq(char), short, + Cond(__Eq(short), int, + Cond(__Eq(int), long, + LL_Type + )))) + + ); + }; + template + using GetLongerType = typename GetLongerTypeImpl::type>::type; } + struct astring_view { const unsigned char* str = 0; constexpr astring_view(const char* str) : @@ -200,7 +225,7 @@ struct astring_view { return reinterpret_cast(str); } operator const signed char* () const { - return reinterpret_cast(str); + return reinterpret_cast(str); } }; diff --git a/tests/best_profit.a b/tests/best_profit.a index f6f3bf5..4d242ec 100644 --- a/tests/best_profit.a +++ b/tests/best_profit.a @@ -43,5 +43,4 @@ SELECT ID, avgs(10, ClosePrice) FROM td NATURAL JOIN HistoricQuotes ASSUMING ASC TradeDate -GROUP BY ID -ORDER BY ID \ No newline at end of file +GROUP BY ID \ No newline at end of file From 3bbca0d6b081fd856f34d72aa24cefa1b11755b5 Mon Sep 17 00:00:00 2001 From: Bill Date: Fri, 23 Sep 2022 17:45:37 +0800 Subject: [PATCH 02/12] fix g++ constexpr --- server/types.h | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/server/types.h b/server/types.h index 13d1b72..b41da78 100644 --- a/server/types.h +++ b/server/types.h @@ -196,13 +196,22 @@ namespace types { struct astring_view { const unsigned char* str = 0; - constexpr astring_view(const char* str) : - str((const unsigned char*)(str)) {} - constexpr astring_view(const signed char* str) : + +#if defined(__clang__) or !defined(__GNUC__) + constexpr +#endif + astring_view(const char* str) noexcept : + str((const unsigned char*)(str)) {} +#if defined(__clang__) or !defined(__GNUC__) + constexpr +#endif + astring_view(const signed char* str) noexcept : str((const unsigned char*)(str)) {} - constexpr astring_view(const unsigned char* str) : + + constexpr + astring_view(const unsigned char* str) noexcept : str(str) {} - constexpr astring_view() = default; + constexpr astring_view() noexcept = default; bool operator==(const astring_view& r) const { auto this_str = str; From eac25ddbb37f18948aac773e289f593e04b82b29 Mon Sep 17 00:00:00 2001 From: Bill Date: Sat, 24 Sep 2022 22:33:59 +0800 Subject: [PATCH 03/12] Added drop table, 'if (not) exists' support. Bug fixes --- Makefile | 2 +- README.md | 77 +++++++++++++++++------------------ aquery_config.py | 5 ++- engine/types.py | 4 +- reconstruct/__init__.py | 2 + reconstruct/ast.py | 43 ++++++++++++++++--- reconstruct/storage.py | 2 +- server/aggregations.h | 3 ++ server/io.cpp | 26 ++++++------ server/table.h | 2 +- server/table_ext_monetdb.hpp | 23 ++++++++--- server/types.h | 12 ++++-- server/vector_type.hpp | 25 +++++++++++- test.aquery | Bin 511 -> 511 bytes tests/network.a | 1 - 15 files changed, 151 insertions(+), 76 deletions(-) diff --git a/Makefile b/Makefile index 4a16eb8..291a2a6 100644 --- a/Makefile +++ b/Makefile @@ -97,6 +97,6 @@ docker: docker build -t aquery . clean: - rm *.shm *.o dll.so server.so server.bin libaquery.a libaquery.lib -rf 2> $(NULL_DEVICE) || true + rm .cached *.shm *.o dll.so server.so server.bin libaquery.a libaquery.lib -rf 2> $(NULL_DEVICE) || true diff --git a/README.md b/README.md index 83a072b..e171ab9 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,43 @@ AQuery++ Database is a cross-platform, In-Memory Column-Store Database that incorporates compiled query execution. +# Installation +## Requirements +1. Recent version of Linux, Windows or MacOS, with recent C++ compiler that has C++17 (1z) support. (however c++20 is recommended if available for heterogeneous lookup on unordered containers) + - GCC: 9.0 or above (g++ 7.x, 8.x fail to handle fold-expressions due to a compiler bug) + - Clang: 5.0 or above (Recommended) + - MSVC: 2017 or later (2022 or above is recommended) + +2. Monetdb for Hybrid Engine + - On windows, the required libraries and headers are already included in the repo. + - On Linux, see [Monetdb Easy Setup](https://www.monetdb.org/easy-setup/) for instructions. + - On MacOS, Monetdb can be easily installed in homebrew `brew install monetdb`. + +3. Python 3.6 or above and install required packages in requirements.txt by `python3 -m pip install -r requirements.txt` +## Usage +`python3 prompt.py` will launch the interactive command prompt. The server binary will be autometically rebuilt and started. +#### Commands: +- ``: parse AQuery statement +- `f `: parse all AQuery statements in file +- `dbg` start debugging session +- `print`: printout parsed AQuery statements + +- `xexec`: execute last parsed statement(s) with Hybrid Execution Engine. Hybrid Execution Engine decouples the query into two parts. The standard SQL (MonetDB dialect) part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed. +- `save `: save current code snippet. will use random filename if not specified. +- `exit`: quit the prompt +- `exec`: execute last parsed statement(s) with AQuery Execution Engine (Old). AQuery Execution Engine executes query by compiling it to C++ code and then executing it. +- `r`: run the last generated code snippet +### Example: + `f moving_avg.a`
+ `xexec` + +See ./tests/ for more examples. + +## Notes for arm64 macOS users +- In theory, AQuery++ could work on both native arm64 and x86_64 through Rosetta. But for maximum performance, running native is preferred. +- However, they can't be mixed up, i.e. make sure every component, `python` binary, `C++ compiler`, `monetdb` library and system commandline utilities such as `uname` should have the same architecture. +- Because I can't get access to an arm-based mac to fully test this setup, there might still be issues. Please open an issue if you encounter any problems. + ## Architecture ![Architecture](./docs/arch-hybrid.svg) @@ -40,53 +77,15 @@ AQuery++ Database is a cross-platform, In-Memory Column-Store Database that inco ## Known Issues: -- [x] User Module test - [ ] Interval based triggers -- [x] Hot reloading server binary +- [ ] Hot reloading server binary - [x] Bug fixes: type deduction misaligned in Hybrid Engine - [ ] Investigation: Using postproc only for q1 in Hybrid Engine (make is_special always on) - [x] Limitation: putting ColRefs back to monetdb. (Comparison) - [ ] C++ Meta-Programming: Eliminate template recursions as much as possible. -- [x] Limitation: Date and Time, String operations, Funcs in groupby agg. - [ ] Functionality: Basic helper functions in aquery - [ ] Improvement: More DDLs, e.g. drop table, update table, etc. - [ ] Bug: Join-Aware Column management - [ ] Bug: Order By after Group By -# Installation -## Requirements -1. Recent version of Linux, Windows or MacOS, with recent C++ compiler that has C++17 (1z) support. (however c++20 is recommended if available for heterogeneous lookup on unordered containers) - - GCC: 9.0 or above (g++ 7.x, 8.x fail to handle fold-expressions due to a compiler bug) - - Clang: 5.0 or above (Recommended) - - MSVC: 2017 or later (2022 or above is recommended) - -2. Monetdb for Hybrid Engine - - On windows, the required libraries and headers are already included in the repo. - - On Linux, see [Monetdb Easy Setup](https://www.monetdb.org/easy-setup/) for instructions. - - On MacOS, Monetdb can be easily installed in homebrew `brew install monetdb`. - -3. Python 3.6 or above and install required packages in requirements.txt by `python3 -m pip install -r requirements.txt` -## Usage -`python3 prompt.py` will launch the interactive command prompt. The server binary will be autometically rebuilt and started. -#### Commands: -- ``: parse AQuery statement -- `f `: parse all AQuery statements in file -- `dbg` start debugging session -- `print`: printout parsed AQuery statements - -- `xexec`: execute last parsed statement(s) with Hybrid Execution Engine. Hybrid Execution Engine decouples the query into two parts. The standard SQL (MonetDB dialect) part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed. -- `save `: save current code snippet. will use random filename if not specified. -- `exit`: quit the prompt -- `exec`: execute last parsed statement(s) with AQuery Execution Engine (Old). AQuery Execution Engine executes query by compiling it to C++ code and then executing it. -- `r`: run the last generated code snippet -### Example: - `f moving_avg.a`
- `xexec` - -See ./tests/ for more examples. - -## Notes for arm64 macOS users -- In theory, AQuery++ could work on both native arm64 and x86_64 through Rosetta. But for maximum performance, running native is preferred. -- However, they can't be mixed up, i.e. make sure every component, `python` binary, `C++ compiler`, `monetdb` library and system commandline utilities such as `uname` should have the same architecture. -- Because I can't get access to an arm-based mac to fully test this setup, there might still be issues. Please open an issue if you encounter any problems. \ No newline at end of file diff --git a/aquery_config.py b/aquery_config.py index 3330b6e..616cb2a 100644 --- a/aquery_config.py +++ b/aquery_config.py @@ -2,7 +2,7 @@ ## GLOBAL CONFIGURATION FLAGS -version_string = '0.4.5a' +version_string = '0.4.6a' add_path_to_ldpath = True rebuild_backend = False run_backend = True @@ -13,7 +13,7 @@ os_platform = 'unknown' build_driver = 'Makefile' def init_config(): - global __config_initialized__, os_platform, msbuildroot + global __config_initialized__, os_platform, msbuildroot, build_driver ## SETUP ENVIRONMENT VARIABLES # __config_initialized__ = False #os_platform = 'unkown' @@ -48,6 +48,7 @@ def init_config(): vsloc = vswhere.find(prerelease = True, latest = True, prop = 'installationPath') if vsloc: msbuildroot = vsloc[0] + '/MSBuild/Current/Bin/MSBuild.exe' + build_driver = 'MSBuild' else: print('Warning: No Visual Studio installation found.') # print("adding path") diff --git a/engine/types.py b/engine/types.py index 6e49eea..74541c6 100644 --- a/engine/types.py +++ b/engine/types.py @@ -112,7 +112,7 @@ VarcharT = Types(200, name = 'varchar', cname = 'const char*', sqlname='VARCHAR' VoidT = Types(200, name = 'void', cname = 'void', sqlname='Null', ctype_name = 'types::None') class VectorT(Types): - def __init__(self, inner_type : Types, vector_type:str = 'ColRef'): + def __init__(self, inner_type : Types, vector_type:str = 'vector_type'): self.inner_type = inner_type self.vector_type = vector_type @@ -121,7 +121,7 @@ class VectorT(Types): return f'{self.vector_type}<{self.inner_type.name}>' @property def sqlname(self) -> str: - return 'BINARY' + return 'BIGINT' @property def cname(self) -> str: return self.name diff --git a/reconstruct/__init__.py b/reconstruct/__init__.py index c27a9da..fd02f61 100644 --- a/reconstruct/__init__.py +++ b/reconstruct/__init__.py @@ -18,6 +18,8 @@ def generate(ast, cxt): ast_node.types[k](None, ast, cxt) def exec(stmts, cxt = None, keep = False): + if 'stmts' not in stmts: + return cxt = initialize(cxt, keep) stmts_stmts = stmts['stmts'] if type(stmts_stmts) is list: diff --git a/reconstruct/ast.py b/reconstruct/ast.py index 6ed4e63..52dc8ef 100644 --- a/reconstruct/ast.py +++ b/reconstruct/ast.py @@ -2,6 +2,7 @@ from copy import deepcopy from dataclasses import dataclass from enum import Enum, auto from typing import Set, Tuple, Dict, Union, List, Optional + from engine.types import * from engine.utils import enlist, base62uuid, base62alp, get_legal_name from reconstruct.storage import Context, TableInfo, ColRef @@ -151,7 +152,8 @@ class projection(ast_node): name = enlist(sql_expr.eval(False, y, count=count)) this_type = enlist(this_type) proj_expr = enlist(proj_expr) - for t, n, pexpr in zip(this_type, name, proj_expr): + for t, n, pexpr, cp in zip(this_type, name, proj_expr, compound): + t = VectorT(t) if cp else t offset = len(col_exprs) if n not in self.var_table: self.var_table[n] = offset @@ -285,11 +287,11 @@ class projection(ast_node): val[2].cols_mentioned.intersection( self.datasource.all_cols().difference(self.group_node.refs)) ) and val[2].is_compound # compound val not in key - or - val[2].is_compound > 1 + # or + # val[2].is_compound > 1 # (not self.group_node and val[2].is_compound) ): - out_typenames[key] = f'ColRef<{out_typenames[key]}>' + out_typenames[key] = f'vector_type<{out_typenames[key]}>' self.out_table.columns[key].compound = True outtable_col_nameslist = ', '.join([f'"{c.name}"' for c in self.out_table.columns]) self.outtable_col_names = 'names_' + base62uuid(4) @@ -530,7 +532,7 @@ class groupby_c(ast_node): materialize_builtin['_builtin_len'] = len_var if '_builtin_ret' in ex.udf_called.builtin_used: define_len_var() - gscanner.add(f'{ce[0]}.emplace_back({{{len_var}}});\n') + gscanner.add(f'{ce[0]}.emplace_back({len_var});\n') materialize_builtin['_builtin_ret'] = f'{ce[0]}.back()' gscanner.add(f'{ex.eval(c_code = True, y=get_var_names, materialize_builtin = materialize_builtin)};\n') continue @@ -763,16 +765,45 @@ class create_table(ast_node): if self.context.use_columnstore: self.sql += ' engine=ColumnStore' +class drop(ast_node): + name = 'drop' + first_order = name + def produce(self, node): + node = node['drop'] + tbl_name = node['table'] + if tbl_name in self.context.tables_byname: + tbl_obj = self.context.tables_byname[tbl_name] + # TODO: delete in postproc engine + self.context.tables_byname.pop(tbl_name) + self.context.tables.remove(tbl_obj) + self.sql += 'TABLE IF EXISTS ' + tbl_name + return + elif 'if_exists' not in node or not node['if_exists']: + print(f'Error: table {tbl_name} not found.') + self.sql = '' + class insert(ast_node): name = 'insert' first_order = name - + def init(self, node): + values = node['query'] + complex_query_kw = ['from', 'where', 'groupby', 'having', 'orderby', 'limit'] + if any([kw in values for kw in complex_query_kw]): + values['into'] = node['insert'] + projection(None, values, self.context) + self.produce = lambda*_:None + self.spawn = lambda*_:None + self.consume = lambda*_:None + else: + super().init(node) + def produce(self, node): values = node['query']['select'] tbl = node['insert'] self.sql = f'INSERT INTO {tbl} VALUES(' # if len(values) != table.n_cols: # raise ValueError("Column Mismatch") + list_values = [] for i, s in enumerate(values): if 'value' in s: diff --git a/reconstruct/storage.py b/reconstruct/storage.py index c43131c..790f073 100644 --- a/reconstruct/storage.py +++ b/reconstruct/storage.py @@ -59,7 +59,7 @@ class TableInfo: cxt.tables_byname[self.table_name] = self # construct reverse map def add_cols(self, cols, new = True): - for c in cols: + for c in enlist(cols): self.add_col(c, new) def add_col(self, c, new = True): diff --git a/server/aggregations.h b/server/aggregations.h index 48a4b35..05ffb56 100644 --- a/server/aggregations.h +++ b/server/aggregations.h @@ -137,6 +137,7 @@ decayed_t> sums(const VT& arr) { ret[i] = ret[i-1] + arr[i]; return ret; } + template class VT> decayed_t>> avgs(const VT& arr) { const uint32_t& len = arr.size; @@ -149,6 +150,7 @@ decayed_t>> avgs(const VT& arr) { ret[i] = (s+=arr[i])/(FPType)(i+1); return ret; } + template class VT> decayed_t> sumw(uint32_t w, const VT& arr) { const uint32_t& len = arr.size; @@ -162,6 +164,7 @@ decayed_t> sumw(uint32_t w, const VT& arr) { ret[i] = ret[i-1] + arr[i] - arr[i-w]; return ret; } + template class VT> decayed_t>> avgw(uint32_t w, const VT& arr) { typedef types::GetFPType> FPType; diff --git a/server/io.cpp b/server/io.cpp index 107829b..a47b3b3 100644 --- a/server/io.cpp +++ b/server/io.cpp @@ -265,16 +265,16 @@ string base62uuid(int l) { } -template -inline void vector_type<_Ty>::out(uint32_t n, const char* sep) const -{ - n = n > size ? size : n; - std::cout << '('; - { - uint32_t i = 0; - for (; i < n - 1; ++i) - std::cout << this->operator[](i) << sep; - std::cout << this->operator[](i); - } - std::cout << ')'; -} +// template +// inline void vector_type<_Ty>::out(uint32_t n, const char* sep) const +// { +// n = n > size ? size : n; +// std::cout << '('; +// { +// uint32_t i = 0; +// for (; i < n - 1; ++i) +// std::cout << this->operator[](i) << sep; +// std::cout << this->operator[](i); +// } +// std::cout << ')'; +// } diff --git a/server/table.h b/server/table.h index e5502af..8c0e177 100644 --- a/server/table.h +++ b/server/table.h @@ -129,7 +129,7 @@ public: } // defined in table_ext_monetdb.hpp - void* monetdb_get_col(); + void* monetdb_get_col(void** gc_vecs, uint32_t& cnt); }; template<> diff --git a/server/table_ext_monetdb.hpp b/server/table_ext_monetdb.hpp index 66c925a..32c27f9 100644 --- a/server/table_ext_monetdb.hpp +++ b/server/table_ext_monetdb.hpp @@ -22,7 +22,7 @@ inline constexpr monetdbe_types AQType_2_monetdbe[] = { #else monetdbe_int64_t, #endif - monetdbe_int16_t, monetdbe_int8_t, monetdbe_bool, monetdbe_int64_t, + monetdbe_int16_t, monetdbe_int8_t, monetdbe_bool, monetdbe_int128_t, monetdbe_timestamp, monetdbe_int64_t, monetdbe_int64_t }; @@ -35,10 +35,13 @@ void TableInfo::monetdb_append_table(void* srv, const char* alt_name) { monetdbe_column** monetdbe_cols = new monetdbe_column * [sizeof...(Ts)]; uint32_t i = 0; + constexpr auto n_vecs = count_vector_type((tuple_type*)(0)); + void* gc_vecs[1 + n_vecs]; puts("getcols..."); - const auto get_col = [&monetdbe_cols, &i, *this](auto v) { + uint32_t cnt = 0; + const auto get_col = [&monetdbe_cols, &i, *this, &gc_vecs, &cnt](auto v) { printf("%d %d\n", i, (ColRef*)v - colrefs); - monetdbe_cols[i++] = (monetdbe_column*)v->monetdb_get_col(); + monetdbe_cols[i++] = (monetdbe_column*)v->monetdb_get_col(gc_vecs, cnt); }; (get_col((ColRef*)(colrefs + i)), ...); puts("getcols done"); @@ -47,7 +50,7 @@ void TableInfo::monetdb_append_table(void* srv, const char* alt_name) { printf("no:%d name: %s count:%d data: %p type:%d \n", i, monetdbe_cols[i]->name, monetdbe_cols[i]->count, monetdbe_cols[i]->data, monetdbe_cols[i]->type); } - std::string create_table_str = "CREATE TABLE "; + std::string create_table_str = "CREATE TABLE IF NOT EXISTS "; create_table_str += alt_name; create_table_str += " ("; i = 0; @@ -70,12 +73,14 @@ void TableInfo::monetdb_append_table(void* srv, const char* alt_name) { return; } } + // for(uint32_t i = 0; i < n_vecs; ++i) + // free(gc_vecs[i]); puts("Error! Empty table."); } template -void* ColRef::monetdb_get_col() { +void* ColRef::monetdb_get_col(void** gc_vecs, uint32_t& cnt) { auto aq_type = AQType_2_monetdbe[types::Types::getType()]; monetdbe_column* col = (monetdbe_column*)malloc(sizeof(monetdbe_column)); @@ -83,7 +88,13 @@ void* ColRef::monetdb_get_col() { col->count = this->size; col->data = this->container; col->name = const_cast(this->name); - + // auto arr = (types::timestamp_t*) malloc (sizeof(types::timestamp_t)* this->size); + // if constexpr (is_vector_type){ + // for(uint32_t i = 0; i < this->size; ++i){ + // memcpy(arr + i, this->container + i, sizeof(types::timestamp_t)); + // } + // gc_vecs[cnt++] = arr; + // } return col; } diff --git a/server/types.h b/server/types.h index b41da78..f988d4d 100644 --- a/server/types.h +++ b/server/types.h @@ -29,7 +29,7 @@ namespace types { static constexpr const char* printf_str[] = { "%d", "%f", "%s", "%lf", "%Lf", "%ld", "%d", "%hi", "%s", "%s", "%c", "%u", "%lu", "%s", "%hu", "%hhu", "%s", "%s", "Vector<%s>", "%s", "NULL", "ERROR" }; static constexpr const char* SQL_Type[] = { "INT", "REAL", "TEXT", "DOUBLE", "DOUBLE", "BIGINT", "HUGEINT", "SMALLINT", "DATE", "TIME", "TINYINT", - "INT", "BIGINT", "HUGEINT", "SMALLINT", "TINYINT", "BOOL", "BIGINT", "TIMESTAMP", "NULL", "ERROR"}; + "INT", "BIGINT", "HUGEINT", "SMALLINT", "TINYINT", "BOOL", "HUGEINT", "TIMESTAMP", "NULL", "ERROR"}; // TODO: deal with data/time <=> str/uint conversion @@ -197,12 +197,12 @@ namespace types { struct astring_view { const unsigned char* str = 0; -#if defined(__clang__) or !defined(__GNUC__) +#if defined(__clang__) || !defined(__GNUC__) constexpr #endif astring_view(const char* str) noexcept : str((const unsigned char*)(str)) {} -#if defined(__clang__) or !defined(__GNUC__) +#if defined(__clang__) || !defined(__GNUC__) constexpr #endif astring_view(const signed char* str) noexcept : @@ -373,4 +373,10 @@ constexpr size_t count_type(std::tuple* ts) { size_t t[] = {sum_type() ...}; return sum_type(t, sizeof...(Types)); } +template +constexpr size_t count_vector_type(std::tuple* ts) { + size_t t[] = {is_vector_type ...}; + return sum_type(t, sizeof...(Types)); +} + #endif // !_TYPES_H diff --git a/server/vector_type.hpp b/server/vector_type.hpp index 720db75..a73570d 100644 --- a/server/vector_type.hpp +++ b/server/vector_type.hpp @@ -12,11 +12,16 @@ #include #include #include +#include #include "hasher.h" #include "types.h" #pragma pack(push, 1) template +class slim_vector { + +}; +template class vector_type { public: typedef vector_type<_Ty> Decayed_t; @@ -249,7 +254,25 @@ public: } size = this->size + dist; } - void out(uint32_t n = 4, const char* sep = " ") const; + inline void out(uint32_t n = 4, const char* sep = " ") const + { + const char* more = ""; + if (n < this->size) + more = " ... "; + else + n = this->size; + + std::cout << '('; + if (n > 0) + { + uint32_t i = 0; + for (; i < n - 1; ++i) + std::cout << this->operator[](i) << sep; + std::cout << this->operator[](i); + } + std::cout<< more; + std::cout << ')'; + } vector_type<_Ty> subvec_memcpy(uint32_t start, uint32_t end) const { vector_type<_Ty> subvec(end - start); memcpy(subvec.container, container + start, sizeof(_Ty) * (end - start)); diff --git a/test.aquery b/test.aquery index f9cd33c47647f494ffedf5a6104d1185297de7df..624e03b3adbb19f3c0e1019406be8145a1d2d392 100644 GIT binary patch delta 20 bcmey*{GXXaSuwG&G_|O5VxaIw=_iZ;P|F97 delta 20 bcmey*{GXXanIW;TG_|O5VxaIw=_iZ;PW=aM diff --git a/tests/network.a b/tests/network.a index 922fec4..169a8b6 100644 --- a/tests/network.a +++ b/tests/network.a @@ -10,4 +10,3 @@ FROM network ASSUMING ASC src, ASC dst, ASC _time GROUP BY src, dst, sums (deltas(_time) > 120) - From 668d19338a364e3d4f7f83471ea70205aaedc85c Mon Sep 17 00:00:00 2001 From: Bill Date: Sat, 24 Sep 2022 22:51:49 +0800 Subject: [PATCH 04/12] update instructions for docker --- README.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index e171ab9..4889320 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ AQuery++ Database is a cross-platform, In-Memory Column-Store Database that inco 1. Recent version of Linux, Windows or MacOS, with recent C++ compiler that has C++17 (1z) support. (however c++20 is recommended if available for heterogeneous lookup on unordered containers) - GCC: 9.0 or above (g++ 7.x, 8.x fail to handle fold-expressions due to a compiler bug) - Clang: 5.0 or above (Recommended) - - MSVC: 2017 or later (2022 or above is recommended) + - MSVC: 2019 or later (2022 or above is recommended) 2. Monetdb for Hybrid Engine - On windows, the required libraries and headers are already included in the repo. @@ -17,8 +17,13 @@ AQuery++ Database is a cross-platform, In-Memory Column-Store Database that inco - On MacOS, Monetdb can be easily installed in homebrew `brew install monetdb`. 3. Python 3.6 or above and install required packages in requirements.txt by `python3 -m pip install -r requirements.txt` + +## DOCKER: + - Alternatively, you can also use docker to run AQuery. + - Type `make docker` to build the docker image from scratch. + - For Arm-based Mac users, you would need to build and run the x86_64 docker image because MonetDB doesn't offer official binaries for arm64 Linux. ## Usage -`python3 prompt.py` will launch the interactive command prompt. The server binary will be autometically rebuilt and started. +`python3 prompt.py` will launch the interactive command prompt. The server binary will be automatically rebuilt and started. #### Commands: - ``: parse AQuery statement - `f `: parse all AQuery statements in file @@ -87,5 +92,3 @@ See ./tests/ for more examples. - [ ] Improvement: More DDLs, e.g. drop table, update table, etc. - [ ] Bug: Join-Aware Column management - [ ] Bug: Order By after Group By - - From 52b2412c01a8e7533f6e8cb130461e2c0bdce08c Mon Sep 17 00:00:00 2001 From: Bill Date: Sun, 25 Sep 2022 21:14:21 +0800 Subject: [PATCH 05/12] Updated instructions --- README.md | 50 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 4889320..27f86b6 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,6 @@ AQuery++ Database is a cross-platform, In-Memory Column-Store Database that incorporates compiled query execution. -# Installation ## Requirements 1. Recent version of Linux, Windows or MacOS, with recent C++ compiler that has C++17 (1z) support. (however c++20 is recommended if available for heterogeneous lookup on unordered containers) - GCC: 9.0 or above (g++ 7.x, 8.x fail to handle fold-expressions due to a compiler bug) @@ -18,7 +17,46 @@ AQuery++ Database is a cross-platform, In-Memory Column-Store Database that inco 3. Python 3.6 or above and install required packages in requirements.txt by `python3 -m pip install -r requirements.txt` -## DOCKER: +## Installation +AQuery is tested on mainstream operating systems such as Windows, macOS and Linux +### Windows +There're multiple options to run AQuery on Windows. You can use the native toolchain from Microsoft Visual Studio or gcc from Cygwin/MinGW or run it under Windows Subsystem for Linux. + +- For WSL, Docker or linux virtual machines, see Linux, Docker sections below +- For Visual Studio: + 1. Install python3.6 or above from [official website](https://www.python.org/downloads/windows/) or Microsoft Store. + 2. Install Microsoft Visual Studio 2022 or later with **Desktop development with C++** selected. + 3. Clone AQuery repo from [Github](https://github.com/sunyinqi0508/AQuery2) + 4. Install python requirements with pip `python3 -m pip install -r requirements.txt` + 5. Change the build driver from aquery_config.py to "MSBuild" + 6. The libraries and headers for Monetdb are already included in msc-plugins, however you can also choose to download them from [Monetdb Easy Setup](https://www.monetdb.org/easy-setup/) and put them in the same place. + +- For MinGW: + 1. Install gcc and python3 using the package manager. (For Msys2, `pacman -S gcc python3`) + 2. Clone AQuery repo from Github + 3. Install python requirements + 4. The prebuilt binaries are included in ./lib directory. However, you could also rebuild them from [source](https://github.com/MonetDB/MonetDB). + +- Note that it might be possible to use python from python.org or Microsoft store with gcc from MinGW. However, it might not work because of ABI breakage. So the better way is to use gcc with MinGW python from pacman or use clang/MSVC instead. + +### macOS +- Install a package manager such as [homebrew](https://brew.sh) +- Install python3 and monetdb using homebrew `brew install python3 monetdb` +- Install C++ compiler come with Xcode commandline tool by `xcode-select --install` or from homebrew +- If you have multiple C++ compilers on the system. Specify C++ compiler by setting the **CXX** environment variable. e.g. `export CXX=clang` +- Install python packages from **requirements.txt** + +**for arm64 macOS users** +- In theory, AQuery++ could work on both native arm64 and x86_64 through Rosetta. But for maximum performance, running native is preferred. +- However, they can't be mixed up, i.e. make sure every component, `python` binary, `C++ compiler`, `monetdb` library and system commandline utilities such as `uname` should have the same architecture. +- Because I can't get access to an arm-based mac to fully test this setup, there might still be issues. Please open an issue if you encounter any problems. + +### Linux +- Install monetdb, see [Monetdb Easy Setup](https://www.monetdb.org/easy-setup/) for instructions. +- Install python3, C++ compiler and git. (For Ubuntu, run `apt update && apt install -y python3 python3-pip clang-14 libmonetdbe-dev git `) +- Install required python packages by `python3 -m pip install -r requirements.txt` +- If you have multiple C++ compilers on the system. Specify C++ compiler by setting the **CXX** environment variable. e.g. `export CXX=clang-14` +### Docker: - Alternatively, you can also use docker to run AQuery. - Type `make docker` to build the docker image from scratch. - For Arm-based Mac users, you would need to build and run the x86_64 docker image because MonetDB doesn't offer official binaries for arm64 Linux. @@ -33,7 +71,6 @@ AQuery++ Database is a cross-platform, In-Memory Column-Store Database that inco - `xexec`: execute last parsed statement(s) with Hybrid Execution Engine. Hybrid Execution Engine decouples the query into two parts. The standard SQL (MonetDB dialect) part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed. - `save `: save current code snippet. will use random filename if not specified. - `exit`: quit the prompt -- `exec`: execute last parsed statement(s) with AQuery Execution Engine (Old). AQuery Execution Engine executes query by compiling it to C++ code and then executing it. - `r`: run the last generated code snippet ### Example: `f moving_avg.a`
@@ -41,10 +78,6 @@ AQuery++ Database is a cross-platform, In-Memory Column-Store Database that inco See ./tests/ for more examples. -## Notes for arm64 macOS users -- In theory, AQuery++ could work on both native arm64 and x86_64 through Rosetta. But for maximum performance, running native is preferred. -- However, they can't be mixed up, i.e. make sure every component, `python` binary, `C++ compiler`, `monetdb` library and system commandline utilities such as `uname` should have the same architecture. -- Because I can't get access to an arm-based mac to fully test this setup, there might still be issues. Please open an issue if you encounter any problems. ## Architecture ![Architecture](./docs/arch-hybrid.svg) @@ -86,9 +119,8 @@ See ./tests/ for more examples. - [ ] Hot reloading server binary - [x] Bug fixes: type deduction misaligned in Hybrid Engine - [ ] Investigation: Using postproc only for q1 in Hybrid Engine (make is_special always on) -- [x] Limitation: putting ColRefs back to monetdb. (Comparison) - [ ] C++ Meta-Programming: Eliminate template recursions as much as possible. - [ ] Functionality: Basic helper functions in aquery -- [ ] Improvement: More DDLs, e.g. drop table, update table, etc. +- [x] Improvement: More DDLs, e.g. drop table, update table, etc. - [ ] Bug: Join-Aware Column management - [ ] Bug: Order By after Group By From 406593a311370b38d3839d2b8462703b2025028d Mon Sep 17 00:00:00 2001 From: Bill Date: Sun, 25 Sep 2022 23:30:36 +0800 Subject: [PATCH 06/12] Fixed g++ std::hash<__uint128_t> undefined problem --- Dockerfile | 2 +- README.md | 2 +- build.py | 3 ++- server/hasher.h | 46 +++++++++++++++++++++++++++++++++++++++++++--- server/types.h | 32 ++++++++++++++++++++++---------- 5 files changed, 69 insertions(+), 16 deletions(-) diff --git a/Dockerfile b/Dockerfile index d463363..92bd75d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,7 +16,7 @@ RUN git clone https://github.com/sunyinqi0508/AQuery2 RUN python3 -m pip install -r AQuery2/requirements.txt -ENV IS_DOCKER_IMAGE=1 CXX=clang-14 +ENV IS_DOCKER_IMAGE=1 CXX=clang++-14 CMD cd AQuery2 && python3 prompt.py diff --git a/README.md b/README.md index 27f86b6..6bb0f6c 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ There're multiple options to run AQuery on Windows. You can use the native toolc - Install monetdb, see [Monetdb Easy Setup](https://www.monetdb.org/easy-setup/) for instructions. - Install python3, C++ compiler and git. (For Ubuntu, run `apt update && apt install -y python3 python3-pip clang-14 libmonetdbe-dev git `) - Install required python packages by `python3 -m pip install -r requirements.txt` -- If you have multiple C++ compilers on the system. Specify C++ compiler by setting the **CXX** environment variable. e.g. `export CXX=clang-14` +- If you have multiple C++ compilers on the system. Specify C++ compiler by setting the **CXX** environment variable. e.g. `export CXX=clang++-14` ### Docker: - Alternatively, you can also use docker to run AQuery. - Type `make docker` to build the docker image from scratch. diff --git a/build.py b/build.py index aa10a7a..f6f2561 100644 --- a/build.py +++ b/build.py @@ -104,7 +104,8 @@ class build_manager: def __init__(self, mgr : 'build_manager') -> None: super().__init__(mgr) os.environ['PCH'] = f'{mgr.PCH}' - os.environ['CXX'] = mgr.cxx if mgr.cxx else 'c++' + if 'CXX' not in os.environ: + os.environ['CXX'] = mgr.cxx if mgr.cxx else 'c++' def libaquery_a(self): self.build_cmd = [['rm', 'libaquery.a'],['make', 'libaquery.a']] diff --git a/server/hasher.h b/server/hasher.h index 526c168..70a97e8 100644 --- a/server/hasher.h +++ b/server/hasher.h @@ -2,6 +2,7 @@ #include #include +#include #include "types.h" // only works for 64 bit systems constexpr size_t _FNV_offset_basis = 14695981039346656037ULL; @@ -21,7 +22,31 @@ inline size_t append_bytes(const astring_view& view) noexcept { return append_bytes(view.str); } - +#ifdef __SIZEOF_INT128__ +union int128_struct +{ + struct { + uint64_t low, high; + }__struct; + __int128_t value = 0; + __uint128_t uvalue; + constexpr int128_struct() : value(0) {} + constexpr int128_struct(const __int128_t &value) noexcept : value(value) {} + constexpr int128_struct(const __uint128_t &value) noexcept : uvalue(value) {} + operator __int128_t () const { + return value; + } + operator __uint128_t () const { + return uvalue; + } + operator __int128_t& () { + return value; + } + operator __uint128_t& () { + return uvalue; + } +}; +#endif template struct hasher { template typename std::enable_if< i == sizeof...(Types), @@ -32,8 +57,15 @@ struct hasher { template typename std::enable_if < i < sizeof ...(Types), size_t>::type hashi(const std::tuple& record) const { using current_type = typename std::decay>::type>::type; - - return std::hash()(std::get(record)) ^ hashi(record); +#ifdef __SIZEOF_INT128__ + using _current_type = typename std::conditional_t< + std::is_same_v || + std::is_same_v, + int128_struct, current_type>; +#else + #define _current_type current_type +#endif + return std::hash<_current_type>()(std::get(record)) ^ hashi(record); } size_t operator()(const std::tuple& record) const { return hashi(record); @@ -75,7 +107,15 @@ namespace std{ std::hash()(_Keyval.time); } }; +#ifdef __SIZEOF_INT128__ + template<> + struct hash{ + size_t operator() (const int128_struct& _Keyval) const noexcept { + return std::hash()(_Keyval.__struct.low) ^ std::hash()(_Keyval.__struct.high); + } + }; +#endif template struct hash> : public hasher{ }; diff --git a/server/types.h b/server/types.h index f988d4d..80f9eac 100644 --- a/server/types.h +++ b/server/types.h @@ -3,6 +3,7 @@ #include #include #include +using std::size_t; #if defined(__SIZEOF_INT128__) and not defined(_WIN32) #define __AQ__HAS__INT128__ @@ -194,19 +195,19 @@ namespace types { } -struct astring_view { +union astring_view { const unsigned char* str = 0; + const signed char* sstr; + const char* rstr; + size_t ptr; + -#if defined(__clang__) || !defined(__GNUC__) constexpr -#endif astring_view(const char* str) noexcept : - str((const unsigned char*)(str)) {} -#if defined(__clang__) || !defined(__GNUC__) + rstr(str) {} constexpr -#endif astring_view(const signed char* str) noexcept : - str((const unsigned char*)(str)) {} + sstr(str) {} constexpr astring_view(const unsigned char* str) noexcept : @@ -225,16 +226,27 @@ struct astring_view { return !(*this_str || *other_str); } bool operator >(const astring_view&r) const{ + auto this_str = str; + auto other_str = r.str; + bool ret = true; + while (*this_str && *other_str) { + if (*this_str <= *other_str) + ret = false; + this_str++; + other_str++; + } + return (*this_str && !*other_str) || + (ret && !*this_str && *other_str); } operator const char* () const { - return reinterpret_cast(str); + return rstr; } operator const unsigned char* () const { - return reinterpret_cast(str); + return str; } operator const signed char* () const { - return reinterpret_cast(str); + return sstr; } }; From b2f009532cd2679045a31c59eca134b2024ec83e Mon Sep 17 00:00:00 2001 From: Bill Date: Mon, 26 Sep 2022 02:01:14 +0800 Subject: [PATCH 07/12] Fixed compilation for clang on Linux --- README.md | 2 +- server/table.h | 106 ++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 83 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 6bb0f6c..98ae406 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ AQuery is tested on mainstream operating systems such as Windows, macOS and Linu ### Windows There're multiple options to run AQuery on Windows. You can use the native toolchain from Microsoft Visual Studio or gcc from Cygwin/MinGW or run it under Windows Subsystem for Linux. -- For WSL, Docker or linux virtual machines, see Linux, Docker sections below +- For WSL, Docker or Linux virtual machines, see Linux, Docker sections below - For Visual Studio: 1. Install python3.6 or above from [official website](https://www.python.org/downloads/windows/) or Microsoft Store. 2. Install Microsoft Visual Studio 2022 or later with **Desktop development with C++** selected. diff --git a/server/table.h b/server/table.h index 8c0e177..68cea77 100644 --- a/server/table.h +++ b/server/table.h @@ -622,86 +622,144 @@ inline void TableInfo::print(const char* __restrict sep, const char* _ std::cout << end; } } +template class VT, + class TRet> +using test_vt_support = typename std::enable_if_t, ColRef> || + std::is_same_v, ColView> || + std::is_same_v, vector_type>, TRet>; + +template class VT> +using get_autoext_type = test_vt_support::type>>; + +template class VT> +using get_long_type = test_vt_support::type>>>; + +template class VT> +using get_fp_type = test_vt_support::type>>>; + +template class VT, template class VT2, + class TRet> +using test_vt_support2 = typename std::enable_if_t<(std::is_same_v, ColRef> || + std::is_same_v, ColView> || + std::is_same_v, vector_type>) && + (std::is_same_v, ColRef> || + std::is_same_v, ColView> || + std::is_same_v, vector_type>), TRet >; +template class VT, template class VT2> +using get_autoext_type2 = test_vt_support2::type>>; + +template class VT, template class VT2> +using get_long_type2 = test_vt_support2::type>>>; + +template class VT, template class VT2> +using get_fp_type2 = test_vt_support2::type>>>; + template class VT, template class VT2> -decayed_t::type> operator -(const VT& lhs, const VT2& rhs) { - auto ret = decayed_t::type>(lhs.size); +get_autoext_type2 +operator -(const VT& lhs, const VT2& rhs) { + auto ret = get_autoext_type2(lhs.size); for (uint32_t i = 0; i < lhs.size; ++i) ret[i] = lhs[i] - rhs[i]; return ret; } template class VT> -decayed_t::type> operator -(const VT& lhs, const T2& rhs) { - auto ret = decayed_t::type>(lhs.size); +get_autoext_type +operator -(const VT& lhs, const T2& rhs) { + auto ret = get_autoext_type(lhs.size); for (uint32_t i = 0; i < lhs.size; ++i) ret[i] = lhs[i] - rhs; return ret; } template class VT> -decayed_t::type> operator -(const T2& lhs, const VT& rhs) { - auto ret = decayed_t::type>(rhs.size); +get_autoext_type +operator -(const T2& lhs, const VT& rhs) { + auto ret = get_autoext_type(rhs.size); for (uint32_t i = 0; i < rhs.size; ++i) ret[i] = lhs - rhs[i]; return ret; } template class VT, template class VT2> -decayed_t::type> operator +(const VT& lhs, const VT2& rhs) { - auto ret = decayed_t::type>(lhs.size); +get_autoext_type2 +operator +(const VT& lhs, const VT2& rhs) { + auto ret = get_autoext_type2(lhs.size); for (uint32_t i = 0; i < lhs.size; ++i) ret[i] = lhs[i] + rhs[i]; return ret; } template class VT> -decayed_t::type> operator +(const VT& lhs, const T2& rhs) { - auto ret = decayed_t::type>(lhs.size); +get_autoext_type +operator +(const VT& lhs, const T2& rhs) { + auto ret = get_autoext_type(lhs.size); for (uint32_t i = 0; i < lhs.size; ++i) ret[i] = lhs[i] + rhs; return ret; } template class VT> -decayed_t::type> operator +(const T2& lhs, const VT& rhs) { - auto ret = decayed_t::type>(rhs.size); +get_autoext_type +operator +(const T2& lhs, const VT& rhs) { + auto ret = get_autoext_type (rhs.size); for (uint32_t i = 0; i < rhs.size; ++i) ret[i] = lhs + rhs[i]; return ret; } template class VT, template class VT2> -decayed_t::type> operator *(const VT& lhs, const VT2& rhs) { - auto ret = decayed_t::type>(lhs.size); +get_long_type2 +operator *(const VT& lhs, const VT2& rhs) { + auto ret = get_long_type2(lhs.size); for (uint32_t i = 0; i < lhs.size; ++i) ret[i] = lhs[i] * rhs[i]; return ret; } template class VT> -decayed_t::type> operator *(const VT& lhs, const T2& rhs) { - auto ret = decayed_t::type>(lhs.size); +get_long_type +operator *(const VT& lhs, const T2& rhs) { + auto ret = get_long_type(lhs.size); for (uint32_t i = 0; i < lhs.size; ++i) ret[i] = lhs[i] * rhs; return ret; } template class VT> -decayed_t::type> operator *(const T2& lhs, const VT& rhs) { - auto ret = decayed_t::type>(rhs.size); +get_long_type +operator *(const T2& lhs, const VT& rhs) { + auto ret = get_long_type(rhs.size); for (uint32_t i = 0; i < rhs.size; ++i) ret[i] = lhs * rhs[i]; return ret; } template class VT, template class VT2> -decayed_t::type>> operator /(const VT& lhs, const VT2& rhs) { - auto ret = decayed_t::type>>(lhs.size); +get_fp_type2 +operator /(const VT& lhs, const VT2& rhs) { + auto ret = get_fp_type2(lhs.size); for (uint32_t i = 0; i < lhs.size; ++i) ret[i] = lhs[i] / rhs[i]; return ret; } template class VT> -decayed_t::type>> operator /(const VT& lhs, const T2& rhs) { - auto ret = decayed_t::type>>(lhs.size); +get_fp_type +operator /(const VT& lhs, const T2& rhs) { + auto ret = get_fp_type(lhs.size); for (uint32_t i = 0; i < lhs.size; ++i) ret[i] = lhs[i] / rhs; return ret; } template class VT> -decayed_t::type>> operator /(const T2& lhs, const VT& rhs) { - auto ret = decayed_t::type>>(rhs.size); +get_fp_type +operator /(const T2& lhs, const VT& rhs) { + auto ret = get_fp_type(rhs.size); for (uint32_t i = 0; i < rhs.size; ++i) ret[i] = lhs / rhs[i]; return ret; From d494c878088e98ee217d688e1d5e5da2b4d81547 Mon Sep 17 00:00:00 2001 From: Bill Date: Mon, 26 Sep 2022 02:11:04 +0800 Subject: [PATCH 08/12] try fix Linux clang compilation --- server/table.h | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/server/table.h b/server/table.h index 68cea77..48a01a1 100644 --- a/server/table.h +++ b/server/table.h @@ -623,29 +623,29 @@ inline void TableInfo::print(const char* __restrict sep, const char* _ } } template class VT, + template class VT, class TRet> using test_vt_support = typename std::enable_if_t, ColRef> || std::is_same_v, ColView> || std::is_same_v, vector_type>, TRet>; template class VT> + template class VT> using get_autoext_type = test_vt_support::type>>; template class VT> + template class VT> using get_long_type = test_vt_support::type>>>; template class VT> + template class VT> using get_fp_type = test_vt_support::type>>>; template class VT, template class VT2, + template class VT, template class VT2, class TRet> using test_vt_support2 = typename std::enable_if_t<(std::is_same_v, ColRef> || std::is_same_v, ColView> || @@ -654,21 +654,21 @@ using test_vt_support2 = typename std::enable_if_t<(std::is_same_v, ColRe std::is_same_v, ColView> || std::is_same_v, vector_type>), TRet >; template class VT, template class VT2> + template class VT, template class VT2> using get_autoext_type2 = test_vt_support2::type>>; template class VT, template class VT2> + template class VT, template class VT2> using get_long_type2 = test_vt_support2::type>>>; template class VT, template class VT2> + template class VT, template class VT2> using get_fp_type2 = test_vt_support2::type>>>; -template class VT, template class VT2> +template class VT, template class VT2> get_autoext_type2 operator -(const VT& lhs, const VT2& rhs) { auto ret = get_autoext_type2(lhs.size); @@ -676,7 +676,7 @@ operator -(const VT& lhs, const VT2& rhs) { ret[i] = lhs[i] - rhs[i]; return ret; } -template class VT> +template class VT> get_autoext_type operator -(const VT& lhs, const T2& rhs) { auto ret = get_autoext_type(lhs.size); @@ -684,7 +684,7 @@ operator -(const VT& lhs, const T2& rhs) { ret[i] = lhs[i] - rhs; return ret; } -template class VT> +template class VT> get_autoext_type operator -(const T2& lhs, const VT& rhs) { auto ret = get_autoext_type(rhs.size); @@ -692,7 +692,7 @@ operator -(const T2& lhs, const VT& rhs) { ret[i] = lhs - rhs[i]; return ret; } -template class VT, template class VT2> +template class VT, template class VT2> get_autoext_type2 operator +(const VT& lhs, const VT2& rhs) { auto ret = get_autoext_type2(lhs.size); @@ -700,7 +700,7 @@ operator +(const VT& lhs, const VT2& rhs) { ret[i] = lhs[i] + rhs[i]; return ret; } -template class VT> +template class VT> get_autoext_type operator +(const VT& lhs, const T2& rhs) { auto ret = get_autoext_type(lhs.size); @@ -708,7 +708,7 @@ operator +(const VT& lhs, const T2& rhs) { ret[i] = lhs[i] + rhs; return ret; } -template class VT> +template class VT> get_autoext_type operator +(const T2& lhs, const VT& rhs) { auto ret = get_autoext_type (rhs.size); @@ -716,7 +716,7 @@ operator +(const T2& lhs, const VT& rhs) { ret[i] = lhs + rhs[i]; return ret; } -template class VT, template class VT2> +template class VT, template class VT2> get_long_type2 operator *(const VT& lhs, const VT2& rhs) { auto ret = get_long_type2(lhs.size); @@ -724,7 +724,7 @@ operator *(const VT& lhs, const VT2& rhs) { ret[i] = lhs[i] * rhs[i]; return ret; } -template class VT> +template class VT> get_long_type operator *(const VT& lhs, const T2& rhs) { auto ret = get_long_type(lhs.size); @@ -732,7 +732,7 @@ operator *(const VT& lhs, const T2& rhs) { ret[i] = lhs[i] * rhs; return ret; } -template class VT> +template class VT> get_long_type operator *(const T2& lhs, const VT& rhs) { auto ret = get_long_type(rhs.size); @@ -740,7 +740,7 @@ operator *(const T2& lhs, const VT& rhs) { ret[i] = lhs * rhs[i]; return ret; } -template class VT, template class VT2> +template class VT, template class VT2> get_fp_type2 operator /(const VT& lhs, const VT2& rhs) { auto ret = get_fp_type2(lhs.size); @@ -748,7 +748,7 @@ operator /(const VT& lhs, const VT2& rhs) { ret[i] = lhs[i] / rhs[i]; return ret; } -template class VT> +template class VT> get_fp_type operator /(const VT& lhs, const T2& rhs) { auto ret = get_fp_type(lhs.size); @@ -756,7 +756,7 @@ operator /(const VT& lhs, const T2& rhs) { ret[i] = lhs[i] / rhs; return ret; } -template class VT> +template class VT> get_fp_type operator /(const T2& lhs, const VT& rhs) { auto ret = get_fp_type(rhs.size); @@ -765,21 +765,21 @@ operator /(const T2& lhs, const VT& rhs) { return ret; } -template class VT, template class VT2> +template class VT, template class VT2> VT operator >(const VT& lhs, const VT2& rhs) { auto ret = VT(lhs.size); for (uint32_t i = 0; i < lhs.size; ++i) ret[i] = lhs[i] > rhs[i]; return ret; } -template class VT> +template class VT> VT operator >(const VT& lhs, const T2& rhs) { auto ret = VT(lhs.size); for (uint32_t i = 0; i < lhs.size; ++i) ret[i] = lhs[i] > rhs; return ret; } -template class VT> +template class VT> VT operator >(const T2& lhs, const VT& rhs) { auto ret = VT(rhs.size); for (uint32_t i = 0; i < rhs.size; ++i) From 3229a54bd41e14e0e626664c6e7e9d02d32716af Mon Sep 17 00:00:00 2001 From: Bill Date: Mon, 26 Sep 2022 02:23:29 +0800 Subject: [PATCH 09/12] optimize build --- build.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/build.py b/build.py index f6f2561..79da774 100644 --- a/build.py +++ b/build.py @@ -16,14 +16,15 @@ class checksums: server : Optional[Union[bytes, bool]] = None sources : Union[Dict[str, bytes], bool] = None env : str = '' - def calc(self, libaquery_a = 'libaquery.a' , + def calc(self, compiler_name, libaquery_a = 'libaquery.a' , pch_hpp_gch = 'server/pch.hpp.gch', server = 'server.so' ): from platform import machine self.env = (aquery_config.os_platform + machine() + - aquery_config.build_driver + aquery_config.build_driver + + compiler_name ) for key in self.__dict__.keys(): try: @@ -106,6 +107,8 @@ class build_manager: os.environ['PCH'] = f'{mgr.PCH}' if 'CXX' not in os.environ: os.environ['CXX'] = mgr.cxx if mgr.cxx else 'c++' + else: + mgr.cxx = os.environ['CXX'] def libaquery_a(self): self.build_cmd = [['rm', 'libaquery.a'],['make', 'libaquery.a']] @@ -130,6 +133,10 @@ class build_manager: class MSBuildDriver(DriverBase): platform_map = {'amd64':'x64', 'arm64':'arm64', 'x86':'win32'} opt_map = {'0':'Debug', '1':'RelWithDebugInfo', '2':'Release', '3':'Release', '4':'Release'} + def __init__(self, mgr : 'build_manager') -> None: + super().__init__(mgr) + mgr.cxx = aquery_config.msbuildroot + def get_flags(self): self.platform = self.platform_map[self.mgr.Platform] self.platform = f'/p:platform={self.platform}' @@ -143,7 +150,7 @@ class build_manager: return self.build() def pch(self): - pass + return True def server(self): loc = os.path.abspath('./msc-plugin/server.vcxproj') @@ -185,7 +192,7 @@ class build_manager: libaquery_a = 'libaquery.a' if aquery_config.os_platform == 'win': libaquery_a = 'libaquery.lib' - current.calc(libaquery_a) + current.calc(self.cxx, libaquery_a) try: with open('.cached', 'rb') as cache_sig: cached = pickle.loads(cache_sig.read()) @@ -194,7 +201,10 @@ class build_manager: self.cache_status = current != cached success = True - if force or self.cache_status.sources: + if (force or + self.cache_status.sources or + self.cache_status.env + ): self.driver.pch() self.driver.libaquery_a() self.driver.server() @@ -206,7 +216,7 @@ class build_manager: if self.cache_status.server: success = self.driver.server() and success if success: - current.calc(libaquery_a) + current.calc(self.cxx, libaquery_a) with open('.cached', 'wb') as cache_sig: cache_sig.write(pickle.dumps(current)) else: From 2ed0fd786386b29cb116d83d837d92eadf5035c7 Mon Sep 17 00:00:00 2001 From: bill sun Date: Mon, 26 Sep 2022 04:44:09 +0800 Subject: [PATCH 10/12] Fixed windows build --- README.md | 17 ++++++++++------- aquery_config.py | 21 ++++++++++++++------- build.py | 2 +- monetdb/msvc/monetdb_config.h | 2 +- sdk/aquery.h | 4 ++-- server/server.cpp | 4 ++-- server/table_ext_monetdb.hpp | 7 ++++++- server/winhelper.cpp | 4 ++++ server/winhelper.h | 2 ++ 9 files changed, 42 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 98ae406..f1e0cb1 100644 --- a/README.md +++ b/README.md @@ -23,22 +23,25 @@ AQuery is tested on mainstream operating systems such as Windows, macOS and Linu There're multiple options to run AQuery on Windows. You can use the native toolchain from Microsoft Visual Studio or gcc from Cygwin/MinGW or run it under Windows Subsystem for Linux. - For WSL, Docker or Linux virtual machines, see Linux, Docker sections below -- For Visual Studio: +- For Visual Studio (Recommended): 1. Install python3.6 or above from [official website](https://www.python.org/downloads/windows/) or Microsoft Store. 2. Install Microsoft Visual Studio 2022 or later with **Desktop development with C++** selected. 3. Clone AQuery repo from [Github](https://github.com/sunyinqi0508/AQuery2) 4. Install python requirements with pip `python3 -m pip install -r requirements.txt` 5. Change the build driver from aquery_config.py to "MSBuild" 6. The libraries and headers for Monetdb are already included in msc-plugins, however you can also choose to download them from [Monetdb Easy Setup](https://www.monetdb.org/easy-setup/) and put them in the same place. - -- For MinGW: - 1. Install gcc and python3 using the package manager. (For Msys2, `pacman -S gcc python3`) + +- For Winlibs (Recommended): + - Download latest winlibs toolchain from the [official website](https://winlibs.com/) + - Since winlibs is linked with native windows runtime libraries (UCRT or MSVCRT), it offers better interoperatibility with other libraries built with MSVC such as python and monetdb. + - Other steps can be either the same as Visual Studio or Cygwin/Mingw (below) without ABI break. + +- For CygWin/MinGW: + 1. Install gcc and python3 using its **builtin package manager** instead of the one from python.org or windows store. (For Msys2, `pacman -S gcc python3`) 2. Clone AQuery repo from Github 3. Install python requirements 4. The prebuilt binaries are included in ./lib directory. However, you could also rebuild them from [source](https://github.com/MonetDB/MonetDB). - -- Note that it might be possible to use python from python.org or Microsoft store with gcc from MinGW. However, it might not work because of ABI breakage. So the better way is to use gcc with MinGW python from pacman or use clang/MSVC instead. - + ### macOS - Install a package manager such as [homebrew](https://brew.sh) - Install python3 and monetdb using homebrew `brew install python3 monetdb` diff --git a/aquery_config.py b/aquery_config.py index 616cb2a..699ad70 100644 --- a/aquery_config.py +++ b/aquery_config.py @@ -44,16 +44,23 @@ def init_config(): if os_platform == 'win': add_dll_dir(cygroot) add_dll_dir(os.path.abspath('./msc-plugin')) - import vswhere - vsloc = vswhere.find(prerelease = True, latest = True, prop = 'installationPath') - if vsloc: - msbuildroot = vsloc[0] + '/MSBuild/Current/Bin/MSBuild.exe' - build_driver = 'MSBuild' - else: - print('Warning: No Visual Studio installation found.') + if build_driver == 'Auto': + try: + import vswhere + vsloc = vswhere.find(prerelease = True, latest = True, prop = 'installationPath') + if vsloc: + msbuildroot = vsloc[0] + '/MSBuild/Current/Bin/MSBuild.exe' + build_driver = 'MSBuild' + else: + print('Warning: No Visual Studio installation found.') + build_driver = 'Makefile' + except ModuleNotFoundError: + build_driver = 'Makefile' # print("adding path") else: import readline + if build_driver == 'Auto': + build_driver = 'Makefile' if os_platform == 'cygwin': add_dll_dir('./lib') __config_initialized__ = True diff --git a/build.py b/build.py index 79da774..edf09be 100644 --- a/build.py +++ b/build.py @@ -14,7 +14,7 @@ class checksums: libaquery_a : Optional[Union[bytes, bool]] = None pch_hpp_gch : Optional[Union[bytes, bool]] = None server : Optional[Union[bytes, bool]] = None - sources : Union[Dict[str, bytes], bool] = None + sources : Optional[Union[Dict[str, bytes], bool]] = None env : str = '' def calc(self, compiler_name, libaquery_a = 'libaquery.a' , pch_hpp_gch = 'server/pch.hpp.gch', diff --git a/monetdb/msvc/monetdb_config.h b/monetdb/msvc/monetdb_config.h index d47fef2..730c019 100644 --- a/monetdb/msvc/monetdb_config.h +++ b/monetdb/msvc/monetdb_config.h @@ -435,7 +435,7 @@ gmtime_r(const time_t *__restrict__ timep, struct tm *__restrict__ result) #define HAVE_SOCKLEN_T 1 #ifndef _MSC_VER -#define SOCKET int +// #define SOCKET int #define closesocket close #endif diff --git a/sdk/aquery.h b/sdk/aquery.h index 2fe0404..4c9c779 100644 --- a/sdk/aquery.h +++ b/sdk/aquery.h @@ -76,8 +76,8 @@ __AQEXPORT__(void) init_session(Context* cxt); #define __AQ_NO_SESSION__ __AQEXPORT__(void) init_session(Context*) {} -#ifdef _MSC_VER -void* _cdecl memcpy(void*, void*, size_t); +#ifdef _WIN32 +#include #else void* memcpy(void*, const void*, unsigned long long); #endif diff --git a/server/server.cpp b/server/server.cpp index c92d25a..c7b130d 100644 --- a/server/server.cpp +++ b/server/server.cpp @@ -176,10 +176,10 @@ int dll_main(int argc, char** argv, Context* cxt){ //getlasterror if (!user_module_handle) -#ifndef _MSC_VER +#ifndef _WIN32 puts(dlerror()); #else - printf("Fatal Error: Module %s failed to load with error code %d.\n", mname, GetLastError()); + printf("Fatal Error: Module %s failed to load with error code %d.\n", mname, dlerror()); #endif user_module_map[mname] = user_module_handle; initialize_module(mname, user_module_handle, cxt); diff --git a/server/table_ext_monetdb.hpp b/server/table_ext_monetdb.hpp index 32c27f9..c128559 100644 --- a/server/table_ext_monetdb.hpp +++ b/server/table_ext_monetdb.hpp @@ -22,7 +22,12 @@ inline constexpr monetdbe_types AQType_2_monetdbe[] = { #else monetdbe_int64_t, #endif - monetdbe_int16_t, monetdbe_int8_t, monetdbe_bool, monetdbe_int128_t, + monetdbe_int16_t, monetdbe_int8_t, monetdbe_bool, +#ifdef HAVE_HGE + monetdbe_int128_t, +#else + monetdbe_int64_t, +#endif monetdbe_timestamp, monetdbe_int64_t, monetdbe_int64_t }; diff --git a/server/winhelper.cpp b/server/winhelper.cpp index 08fd1a2..ed418af 100644 --- a/server/winhelper.cpp +++ b/server/winhelper.cpp @@ -20,6 +20,10 @@ int dlclose(void* handle) return FreeLibrary(static_cast(handle)); } +int dlerror() { + return GetLastError(); +} + SharedMemory::SharedMemory(const char* fname) { this->hFileMap = CreateFileMappingA(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, 2, fname); diff --git a/server/winhelper.h b/server/winhelper.h index 5993943..df9231e 100644 --- a/server/winhelper.h +++ b/server/winhelper.h @@ -5,6 +5,8 @@ static constexpr int RTLD_LAZY = 1; void* dlopen(const char*, int); void* dlsym(void*, const char*); int dlclose(void*); +int dlerror(); + struct SharedMemory { void* hFileMap; From d10a476a34b44221623abcacaef27fab7006e145 Mon Sep 17 00:00:00 2001 From: Bill Date: Tue, 27 Sep 2022 05:08:53 +0800 Subject: [PATCH 11/12] Updated instructions, bulid drivers, bug fixes --- AQuery2 | 2616 +++++++++++++++++++++++++++++++ MonetDB-release-epel.noarch.rpm | Bin 0 -> 6588 bytes README.md | 22 +- aquery_config.py | 2 +- arch-check.sh | 21 + build.py | 18 +- engine/utils.py | 5 +- prompt.py | 6 +- reconstruct/ast.py | 13 +- reconstruct/expr.py | 28 +- test.aquery | 3 + 11 files changed, 2707 insertions(+), 27 deletions(-) create mode 100644 AQuery2 create mode 100644 MonetDB-release-epel.noarch.rpm create mode 100644 arch-check.sh diff --git a/AQuery2 b/AQuery2 new file mode 100644 index 0000000..4478769 --- /dev/null +++ b/AQuery2 @@ -0,0 +1,2616 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GitHub - sunyinqi0508/AQuery2: An in-memory column-store time-series database that uses query compilation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Skip to content + + + + + + + + + + + + +
+ +
+ + + + + + + +
+ + + + + +
+ + + + + + + + + + + +
+
+
+ + + + + + + + + + + + +
+ +
+ +
+
+ + + + / + + AQuery2 + + + Public +
+ +
+ + + +
+ +
+
+

+ An in-memory column-store time-series database that uses query compilation +

+ +

License

+ + + + +
+
+ +
+ +
+
+ +
+ + + + +
+ + + + + +
+ + + + + + +

sunyinqi0508/AQuery2

+
+ + +
+ + +
+ +
+ + + + + +
+
+ +
+ +
+
+ + + master + + + + +
+
+
+ Switch branches/tags + +
+ + + +
+ +
+ +
+ + +
+ +
+ + + + + + + + + + + + + + + +
+ + +
+
+
+
+ +
+ +
+ + + + +
+ + + + + + + + +
+ Code +
+ +
+
+ +
+ + +
+
+ + + + + + +
+
+

Latest commit

+
+ +
+
 
+
+

Git stats

+ +
+
+
+

Files

+ + + + + Permalink + +
+ + + Failed to load latest commit information. + + + +
+
+
+
Type
+
Name
+
Latest commit message
+
Commit time
+
+ +
+
+ +
+ + + +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ +
+ data +
+ +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ +
+ docs +
+ +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ +
+ engine +
+ +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ +
+ lib +
+ +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ +
+ monetdb +
+ +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ + + +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ +
+ msvs-py +
+ +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ + + +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ +
+ sdk +
+ +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ +
+ server +
+ +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ +
+ tests +
+ +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ + + +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ + + +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ +
+ LICENSE +
+ +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ +
+ Makefile +
+ +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ +
+ README.md +
+ +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ + + +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ +
+ build.py +
+ +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ + + +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ +
+ csv.h +
+ +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ + + +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ +
+ dbconn.py +
+ +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ + + +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ +
+ mmw.cpp +
+ +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ +
+ prompt.py +
+ +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ + + +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ + + +
+
 
+
+ +
+
 
+
+ +
+
+
+ +
+ + + +
+
 
+
+ +
+
 
+
+ +
+
+ +
+ +
+ + +
+ + + + +
+ + + +
+

AQuery++ Database

+

Introduction

+

AQuery++ Database is a cross-platform, In-Memory Column-Store Database that incorporates compiled query execution.

+

Requirements

+
    +
  1. +

    Recent version of Linux, Windows or MacOS, with recent C++ compiler that has C++17 (1z) support. (however c++20 is recommended if available for heterogeneous lookup on unordered containers)

    +
      +
    • GCC: 9.0 or above (g++ 7.x, 8.x fail to handle fold-expressions due to a compiler bug)
    • +
    • Clang: 5.0 or above (Recommended)
    • +
    • MSVC: 2019 or later (2022 or above is recommended)
    • +
    +
  2. +
  3. +

    Monetdb for Hybrid Engine

    +
      +
    • On windows, the required libraries and headers are already included in the repo.
    • +
    • On Linux, see Monetdb Easy Setup for instructions.
    • +
    • On MacOS, Monetdb can be easily installed in homebrew brew install monetdb.
    • +
    +
  4. +
  5. +

    Python 3.6 or above and install required packages in requirements.txt by python3 -m pip install -r requirements.txt

    +
  6. +
+

Installation

+

AQuery is tested on mainstream operating systems such as Windows, macOS and Linux

+

Windows

+

There're multiple options to run AQuery on Windows. You can use the native toolchain from Microsoft Visual Studio or gcc from Cygwin/MinGW or run it under Windows Subsystem for Linux.

+
    +
  • +

    For WSL, Docker or Linux virtual machines, see Linux, Docker sections below

    +
  • +
  • +

    For Visual Studio:

    +
      +
    1. Install python3.6 or above from official website or Microsoft Store.
    2. +
    3. Install Microsoft Visual Studio 2022 or later with Desktop development with C++ selected.
    4. +
    5. Clone AQuery repo from Github
    6. +
    7. Install python requirements with pip python3 -m pip install -r requirements.txt
    8. +
    9. Change the build driver from aquery_config.py to "MSBuild"
    10. +
    11. The libraries and headers for Monetdb are already included in msc-plugins, however you can also choose to download them from Monetdb Easy Setup and put them in the same place.
    12. +
    +
  • +
  • +

    For MinGW:

    +
      +
    1. Install gcc and python3 using the package manager. (For Msys2, pacman -S gcc python3)
    2. +
    3. Clone AQuery repo from Github
    4. +
    5. Install python requirements
    6. +
    7. The prebuilt binaries are included in ./lib directory. However, you could also rebuild them from source.
    8. +
    +
  • +
  • +

    Note that it might be possible to use python from python.org or Microsoft store with gcc from MinGW. However, it might not work because of ABI breakage. So the better way is to use gcc with MinGW python from pacman or use clang/MSVC instead.

    +
  • +
+

macOS

+
    +
  • Install a package manager such as homebrew
  • +
  • Install python3 and monetdb using homebrew brew install python3 monetdb
  • +
  • Install C++ compiler come with Xcode commandline tool by xcode-select --install or from homebrew
  • +
  • If you have multiple C++ compilers on the system. Specify C++ compiler by setting the CXX environment variable. e.g. export CXX=clang
  • +
  • Install python packages from requirements.txt
  • +
+

for arm64 macOS users

+
    +
  • In theory, AQuery++ could work on both native arm64 and x86_64 through Rosetta. But for maximum performance, running native is preferred.
  • +
  • However, they can't be mixed up, i.e. make sure every component, python binary, C++ compiler, monetdb library and system commandline utilities such as uname should have the same architecture.
  • +
  • Because I can't get access to an arm-based mac to fully test this setup, there might still be issues. Please open an issue if you encounter any problems.
  • +
+

Linux

+
    +
  • Install monetdb, see Monetdb Easy Setup for instructions.
  • +
  • Install python3, C++ compiler and git. (For Ubuntu, run apt update && apt install -y python3 python3-pip clang-14 libmonetdbe-dev git )
  • +
  • Install required python packages by python3 -m pip install -r requirements.txt
  • +
  • If you have multiple C++ compilers on the system. Specify C++ compiler by setting the CXX environment variable. e.g. export CXX=clang++-14
  • +
+

Docker:

+
    +
  • Alternatively, you can also use docker to run AQuery.
  • +
  • Type make docker to build the docker image from scratch.
  • +
  • For Arm-based Mac users, you would need to build and run the x86_64 docker image because MonetDB doesn't offer official binaries for arm64 Linux.
  • +
+

Usage

+

python3 prompt.py will launch the interactive command prompt. The server binary will be automatically rebuilt and started.

+

Commands:

+
    +
  • +

    <sql statement>: parse AQuery statement

    +
  • +
  • +

    f <filename>: parse all AQuery statements in file

    +
  • +
  • +

    dbg start debugging session

    +
  • +
  • +

    print: printout parsed AQuery statements

    +
  • +
  • +

    xexec: execute last parsed statement(s) with Hybrid Execution Engine. Hybrid Execution Engine decouples the query into two parts. The standard SQL (MonetDB dialect) part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed.

    +
  • +
  • +

    save <OPTIONAL: filename>: save current code snippet. will use random filename if not specified.

    +
  • +
  • +

    exit: quit the prompt

    +
  • +
  • +

    r: run the last generated code snippet

    +
  • +
+

Example:

+

f moving_avg.a
+xexec

+

See ./tests/ for more examples.

+

Architecture

+

Architecture

+

AQuery Compiler

+
    +
  • The query is first processed by the AQuery Compiler which is composed of a frontend that parses the query into AST and a backend that generates target code that delivers the query.
  • +
  • Front end of AQuery++ Compiler is built on top of mo-sql-parsing with modifications to handle AQuery dialect and extension.
  • +
  • Backend of AQuery++ Compiler generates target code dependent on the Execution Engine. It can either be the C++ code for AQuery Execution Engine or sql and C++ post-processor for Hybrid Engine or k9 for the k9 Engine.
  • +
+

Execution Engines

+
    +
  • AQuery++ supports different execution engines thanks to the decoupled compiler structure.
  • +
  • AQuery Execution Engine: executes queries by compiling the query plan to C++ code. Doesn't support joins and udf functions.
  • +
  • Hybrid Execution Engine: decouples the query into two parts. The sql-compliant part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed.
  • +
  • K9 Execution Engine: (discontinued).
  • +
+

Roadmap

+
    +
  • SQL Parser -> AQuery Parser (Front End)
  • +
  • AQuery-C++ Compiler (Back End) +
      +
    • Schema and Data Model
    • +
    • Data acquisition/output from/to csv file
    • +
    +
  • +
  • Execution Engine +
      +
    • Projections and single-group Aggregations
    • +
    • Group by Aggregations
    • +
    • Filters
    • +
    • Order by
    • +
    • Assumption
    • +
    • Flatten
    • +
    • UDFs (Hybrid Engine only)
    • +
    • User Module
    • +
    • Triggers
    • +
    • Join (Hybrid Engine only)
    • +
    • Subqueries
    • +
    +
  • +
  • Query Optimization +
      +
    • Selection/Order by push-down
    • +
    • Join Optimization (Only in Hybrid Engine)
    • +
    +
  • +
+

Known Issues:

+
    +
  • Interval based triggers
  • +
  • Hot reloading server binary
  • +
  • Bug fixes: type deduction misaligned in Hybrid Engine
  • +
  • Investigation: Using postproc only for q1 in Hybrid Engine (make is_special always on)
  • +
  • C++ Meta-Programming: Eliminate template recursions as much as possible.
  • +
  • Functionality: Basic helper functions in aquery
  • +
  • Improvement: More DDLs, e.g. drop table, update table, etc.
  • +
  • Bug: Join-Aware Column management
  • +
  • Bug: Order By after Group By
  • +
+
+
+
+ +
+ + +
+
+ +
+
+
+

About

+ +

+ An in-memory column-store time-series database that uses query compilation +

+ +

Topics

+ + +

Resources

+ + +

License

+ + + + + + + +

Stars

+ + +

Watchers

+ + +

Forks

+ + +
+
+ + + + + + + + + + + +
+
+

Languages

+
+ + + + + +
+ + +
+
+
+
+ +
+ +
+ + +
+ +
+ + +
+
+ +
+ + + + + + + + + + + + + + + + + + + + + diff --git a/MonetDB-release-epel.noarch.rpm b/MonetDB-release-epel.noarch.rpm new file mode 100644 index 0000000000000000000000000000000000000000..3edafcff9245dc48aa9a7c196bb2052f04843bd7 GIT binary patch literal 6588 zcma)92UHZv)^1?PNrIxNI0lq7!@vLok|cxVBtg=I9vGSA1d;>=BpFdebU{#1LBfh6 z2q-ElA}YqkfJzim5kbU+%3lr5zTNlld1w2a>RVskP`B=_y4C%@apjiyj6P*c1BzD3BMe0#qbh zgouGeq@bk>mSa-0k53A(6??7tI*R@5NA2%X@S6}9-Ppic0?A0vn;TJ6mx6|u4ryj$ z^T@@YtnRtre?FADb+|x6z2;iATJrVe*KvN8g%?C^lofZmYD{^Qm^*2wTNYQr{;hi@ z*jk|`q!->%Ow4F-!eZ_E^!^^|{S2GTk=^bCTkOVn!Oh=u<@(0CmwCS5A_ukmDniZ9 ziBWI-vS=>cYu?BUO&?BqaA1f1zTC!6@w-=k6J(UpOEc4oxehPwzJ2I$%81*(_`Kbv zq25nvaU*G(rOIDi_E<{e$2YpA;!S&6%*t8Dkx4xlL)u+(75B`)>NdaT)KIXBvu)VK z8MzA1$bhYLb=ZrKn>8eU=mtwFGc2YDJ_u!XR*bxCyHNJf>{4{bA0#=NjZ!$#$bdv6 zP|QqWI*mvo(8*-jh;Cv^Gcz_ahK)!DW-x(hYDNYbfS?6S{#A@u{WZpSx+Q-|8Du0# zYwXk-SJCP!@9~_t+pQhL%}~uvS(|feL(^I11B@wwa>(hMCXWdjhobB(@?JRhE^}kT z20?mPny#%*{`x&`P^)Hq6~vQK=jL2C#Fp$x_pA8GR2Rm-efp*B_6EZ05goUEP^~cM zZ1J|My+>UX--Ju8(ck2;s(Q)JGaZ6&4J$|1cgZ)X3g!DMC9y(x@dd+TsUJ7K%~NQ@ zoxd_5yi~Wc+ppzd_mS&c-h1H*m%Yd6`*%bKSGB(w9JnwUF_eri?o_K1@39CI{k*c( zmJ}(Qh|^6u|Jdh<-&(b(lH7}1bXJVwlfV6ZcG~hs06SFY%bx4)B*&ZtD$O~s_K(JV zc2)M?W^q>!(#wl43XoVTm4MgG`Iw%b-V6Hk{&IfSf^A<1#;idD`(LsQ;e&=`K?*eF znL97w{6L~8egi0i)liI_9|SK(F>-#SLDNL>dlWmMcoa}@oXxQ{wEZFp2ecdDE@+CbrgRE6wEI=H{TDGAA<6KqBsS`zfin=7GuENGv|FLpkU1y zWNo0NiRPggSsQ|p2qSzMK!J{EA&O-I1@RLtL$TZ}7F&&Cq#glzF)b7;qI?31ksJVh zF_51b`>KEfAH?hd1^K|t2NcogqI@I|hN^*$GD*w@dJ_D z3@BpX6i{FvgV;l;8K8)K7K#zS02W95MfkR}SYj2Rh`!V8xP%3uNPL`8Y>#3WK!Lsl zGC#r(MEPDQ4nq0JIKmG``2s)@`x{X{;va&MyaGQYwgQUChXRV&&q8q+iYrh|0TirP z;trsQK5Z6DqWc8{qj){ar=!>%#Y_}?q8RZD%r6-QC^Ekg#dGihq#Fq#B*WbGaHzK$vkS544Fy= zpA$SXFwED(`g*(JX1IawpaXnm10ys(UPi_*3M_+5rbZ*vb9tN?1`QYzM8Q}#OojP; zG7=sJ6B*;oEC6BsC5$q>|AdjlM&rZBA}a$6pfJ&;`+!9{gP1vhiFsTe14P1&!4}40 zwHSC9uP5WncMF|94 z{#t!~knXvygiN7N6I|Kx_^YeBhfbsJWOo+sd|;A)W%?E0IQ|nVAv^ zBr=^~02&Na$t0R7!Hi;RWMoF96HJK)Bm%|M&r5KX8AW1^Xn0fj_1GBYuQDKHTxQp|{CBAI4FBGC!P1TqX8nwU`y=tKgI zNH8#^BL{l!;c++u=)YcQJpad+D4x%w;(1&a1c7^U=J_Wblew7x&C=}5ppeM?cjLh{ zaG>UdAz+`$plI2FJ;VcB5T-eT4d_8;!F=sG9x`Ijh=loq|BQK&94j{1WdBdug&&xp4M9Xa0dIsefQ;}ac)|=t-pDBUDu;~m zhBITPU~@r1Y4g@0&G#byvago$I^Rtkm`B69gCma2!oyndXGYeMsjQiOiN#@OKJKA^ zCy7k;?<5WWwIs3_94LS+P7FAg944q5j7TSrMF2~}%V4ou ze=AE=G8@b0#3FNn5=z4g+0-a9I}-Fj#i4LOWu4J9nK6eN(FUtn5S7c|Kye9>AzI16k(@c1Gt~_j$wqGMzds_7 zjjx@H3swN`C5Xo0&CDAAcbPea#B=5wdV2at1nAHENlFL=A1yL{Q*uW|q2(e9U=ZWw zY|^V#EE66IEtG$Op%{J>%5FV&QPEps%QaWEJbqvzhPpI}Hnyfzxn|o~rC4Yx>14zn zIhNM+kGs-e^*iwpsV6!PLF|qDX4WF~{6vn1dzX&#lpzANq*zzc;@m zA#uyPMYyT*+u041(i#Utnx`D3Qr@aAt;kKfry-Xgxnso@<){&_9}(rTip%VBu3IE; zHB7R9saEbkKhS%#$n)^vZB@?%1{>O4eP8q`_>Su4yR3y}_@~Ok~{e(1}_(XkUAK#OX$OCSIylB;}|^KgOhT@s7;IiG9_&1EN9m zMr}OA16$;qMH_bqSQC`)ea;(lY+bneeA06<`aX#_vW27XxhoqD63Zh`OX&Ipr0E4W zJm_taQA@X8JO7-y$pv-R6tr;7(s^xX<9|xZzgl1SJ%L!GR4S-klCnrvS=z3Xxm9OP zcgeYnvYETgP4AYxv@+h2c+fh$Uu-zMif*+~;?06vUsf8+W?lM@BPurc-nQ5^f1jVW zvZz#+#4&S=B`#-_y$^a4Yd*Eb=cuPT9^6#bB^4I`J&h)IRdz$svR{=wPX|RGxtxuv zXA+OQJ52QOcI_-Ul^^m$(!FX5wYws1z`1(rK!A9K%J)7J3&Vh&aFIfk( zZ`N-Q{4uWg=7z`T16p|@#~Nc6(MMbj{L@zLx#bt0MJkkqriy9O*(N6zy6o7gWrPV7 z8}~F#sjy59T@u8zjICRpPl`L$=CUQCYt-a7h4+^yKbQPGE;TXLN%wFvXbNAhn0#fp z_mqT+QKX>%BB#$k=>0&9_MM&B5T2BNc~Lv$ymLLvYR^ zY0cu*U4hBM1=_9HeY+Otw~nn%T^Ch+Li13(nQFm-7LwJjweQ1k(%g>Sj_bX4MQ5eo z-k9yeMOxf*-V0jPnkG&=wPf!-zPm!{Wrfx5Bg->>$Qq9rkErbKabbHH9fS<@K7yJ7b!gCd8FfZ z=GVyWij=~JtKsKOT`NU;{N0LaWQDh*J&oON^pKKQ8r`7}yQ!9s*N?JUw#RLs^R`VU z&2wnKQ8X++y!p__!^ZII^{Pjbe?R$?>9C+9$Xn=jLDNxqBDF0n0r#neb$^3A#z(3O$%l6)Wtc|&(P^~jmge#P>|uGqiudW+YL;D27c9No z-&y3AV?!_4gwt=^hz@pM5fAJeY|mO+Z+1ze&|GQM>)rX0?hHTE&uX>iis^C@$JKvH zbkoL;cfLKSD!<}ac89!f=%Tj5MSZ%v5}JmsuP?YCAuGP#;jX;NQSE}fi_Z@oSknHG z5dD;tTyW;>o=tZ?pEW0y<$Sf_+$6S=u6Z~H>ctS7Z@x`_OXRs0wA*A{9PfPA7I!#h zjaHcTyOjCY<7oJ0vFTWi$rS7gZCk_s{xLtofa&Pbt%Csr-#*o?9FS98sUxvszhPpA z{I%Y~H8!Cw6SM}$d98=!mOqtY?6-Spw@*S&BhSUPvhLZ_a|DCQR)NmuMpK!>ONFR%VkfxNL2<`{hXu3}gEIjL=NszDq%Dnms zE4=e~FZhpngSpr0s*Tq*P8?l-qG#99@vhvQn}X?+^pWF7T%Ht|jMo=^xF>VDQ~syH zRC+UeSrGoddO*03>7J3A{ak;aZt({v*E7z1TlN~WCb@k0h5o+xMHch*Dxyx@=_q@D zw#p~bC)Rsx1LfWoN?h7U^)=deht6ev({UJSaK`m}hZHu+1@Z^y>(IYRIPTNuHIkD~ z{yvnZPid_b5b@J0mySPm`cy-IS$v?`bsuHF*X6Tf%cnoDveuAnZQIfsa4EoAdP|G> z8$zkBro!LAzw$Ax5-BL07lj%gbcXf`KSB&S* zIwzG>j~=HnSzVnqAq#Wb6`Uy6>&ibcHSfrJPIp{95WQ87cR1VK_o`|lCp9cq@H5F? z&O**MKcBlr?pezInt-0cye2GWL6Os&qBO%*n{+E1x*8i}b4nFop2tP1?{Lic<2 zf#T&~OK-qc+j4L8M{eKs_H|a^)+dKAPsN;CCQpwR*0gp;`*)#&KN&H#Qz^9NZO1S9Ed^EO)oMx)?T;z!F|>>n$#Ymz z<+XgrnN^E5+^myh?gg(j=nAf)=RR}`Tv}(lH{Ekg^N#cDg6|Ou&*RQJ`;xBwdZuz} zEK?;<*DBFirBJyK-|@iL=z{iuonmst#87zg#5G6VSD(~L*rnN|$G=WLi`RdwA!YO| zysMRY@>?jaWwFZHq7`(&jtrt?&;Md_2@a+))s$>R$=v|inD zjd4!6-mta_pQzHDbAjeivgv7Q_Y+l#F6GnYbHPeaFKrvKmaM;16r0}jqNC!0r1Qn{ z$>1N)FKDN173xlg25o$9Y(ain7H3y;vZv_N&9s9LA8GeLu_W)KPpN{6z)|6r&gxrDr=9FR4+fhLZ^1==>;&UIsaJv#*Oqw)2oYzzG)H9*R`yDNLH`+ zHSc@7{@eDwUs+wo6QTv_eiok5@kzwtyPod@OsD l?Y+|4+>60kDjy+{{qsv!LQt~Hjm-KtJxgYO3}k|W{|}$//liblto-plugin.dll` to `mingw64/lib/bfd-plugins/` For Link time optimization support on gcc-ar and gcc-ranlib - For CygWin/MinGW: - 1. Install gcc and python3 using its **builtin package manager** instead of the one from python.org or windows store. (For Msys2, `pacman -S gcc python3`) + 1. Install gcc and python3 using its **builtin package manager** instead of the one from python.org or windows store. (For Msys2, `pacman -S gcc python3`). Otherwise, ABI breakage may happen. 2. Clone AQuery repo from Github 3. Install python requirements 4. The prebuilt binaries are included in ./lib directory. However, you could also rebuild them from [source](https://github.com/MonetDB/MonetDB). ### macOS +- If you're using an arm-based mac (e.g. M1, M2 processors). Please go to the Application folder and right-click on the Terminal app, select 'Get Info' and ensure that the 'Open using Rosetta' option is unchecked. See the section below for more notes for arm-based macs. - Install a package manager such as [homebrew](https://brew.sh) - Install python3 and monetdb using homebrew `brew install python3 monetdb` - Install C++ compiler come with Xcode commandline tool by `xcode-select --install` or from homebrew @@ -50,8 +52,10 @@ There're multiple options to run AQuery on Windows. You can use the native toolc - Install python packages from **requirements.txt** **for arm64 macOS users** -- In theory, AQuery++ could work on both native arm64 and x86_64 through Rosetta. But for maximum performance, running native is preferred. -- However, they can't be mixed up, i.e. make sure every component, `python` binary, `C++ compiler`, `monetdb` library and system commandline utilities such as `uname` should have the same architecture. +- In theory, AQuery++ can work on both native arm64 and x86_64 through Rosetta. But for maximum performance, running native is preferred. +- However, they can't be mixed up, i.e. make sure every component, `python` , `C++ compiler`, `monetdb` library and system commandline utilities such as `uname` should have the same architecture. +- Use the script `./arch-check.sh` to check if relevant binaries all have the same architecture. +- In the case where binaries have different architectures, install the software with desired architecture and make an alias or link to ensure the newly installed binary is referred to. - Because I can't get access to an arm-based mac to fully test this setup, there might still be issues. Please open an issue if you encounter any problems. ### Linux @@ -59,19 +63,23 @@ There're multiple options to run AQuery on Windows. You can use the native toolc - Install python3, C++ compiler and git. (For Ubuntu, run `apt update && apt install -y python3 python3-pip clang-14 libmonetdbe-dev git `) - Install required python packages by `python3 -m pip install -r requirements.txt` - If you have multiple C++ compilers on the system. Specify C++ compiler by setting the **CXX** environment variable. e.g. `export CXX=clang++-14` +- Note for anaconda users: the system libraries included in anaconda might differ from the ones your compiler is using. In this case, you might get errors similar to: + >ImportError: libstdc++.so.6: version `GLIBCXX_3.4.26' not found + + In this case, upgrade anaconda or your compiler or use the python from your OS or package manager instead. Or (**NOT recommended**) copy/link the library from your system (e.g. /usr/lib/x86_64-linux-gnu/libstdc++.so.6) to anaconda's library directory (e.g. ~/Anaconda3/lib/). + ### Docker: - Alternatively, you can also use docker to run AQuery. - Type `make docker` to build the docker image from scratch. - - For Arm-based Mac users, you would need to build and run the x86_64 docker image because MonetDB doesn't offer official binaries for arm64 Linux. + - For Arm-based Mac users, you would have to build and run the x86_64 docker image because MonetDB doesn't offer official binaries for arm64 Linux. ## Usage `python3 prompt.py` will launch the interactive command prompt. The server binary will be automatically rebuilt and started. #### Commands: - ``: parse AQuery statement - `f `: parse all AQuery statements in file +- `exec`: execute last parsed statement(s) with Hybrid Execution Engine. Hybrid Execution Engine decouples the query into two parts. The standard SQL (MonetDB dialect) part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed. - `dbg` start debugging session - `print`: printout parsed AQuery statements - -- `xexec`: execute last parsed statement(s) with Hybrid Execution Engine. Hybrid Execution Engine decouples the query into two parts. The standard SQL (MonetDB dialect) part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed. - `save `: save current code snippet. will use random filename if not specified. - `exit`: quit the prompt - `r`: run the last generated code snippet diff --git a/aquery_config.py b/aquery_config.py index 699ad70..c2ede14 100644 --- a/aquery_config.py +++ b/aquery_config.py @@ -2,7 +2,7 @@ ## GLOBAL CONFIGURATION FLAGS -version_string = '0.4.6a' +version_string = '0.4.7a' add_path_to_ldpath = True rebuild_backend = False run_backend = True diff --git a/arch-check.sh b/arch-check.sh new file mode 100644 index 0000000..a472028 --- /dev/null +++ b/arch-check.sh @@ -0,0 +1,21 @@ +ARCH=`uname -m` +ARCH2=`arch` +echo Current architechure: $ARCH $ARCH2 +echo Current shell: $SHELL +PASSED=1 +for i in python3 c++ make ranlib libtool $SHELL +do + FILEPATH=`which $i` + FILEINFO=`file $FILEPATH` + if [[ $FILEINFO =~ $ARCH ]]; then + echo $i@$FILEPATH: passed + else + echo "\033[1;31mERROR\033[0m: Architecture of $i is not $ARCH: $FILEINFO" + PASSED=0 + fi +done + +if [[ PASSED -eq 1 ]]; then + echo "\033[1;32mBinary archtechure check passed\033[0m" +fi + diff --git a/build.py b/build.py index edf09be..172b30c 100644 --- a/build.py +++ b/build.py @@ -94,8 +94,8 @@ class build_manager: ret = True for c in self.build_cmd: if c: - try: - ret = subprocess.call(c, stdout = stdout, stderr = stderr) and ret + try: # only last success matters + ret = not subprocess.call(c, stdout = stdout, stderr = stderr) # and ret except (FileNotFoundError): ret = False pass @@ -205,21 +205,23 @@ class build_manager: self.cache_status.sources or self.cache_status.env ): - self.driver.pch() - self.driver.libaquery_a() - self.driver.server() + success &= self.driver.pch() + success &= self.driver.libaquery_a() + success &= self.driver.server() else: if self.cache_status.libaquery_a: - success = self.driver.libaquery_a() and success + success &= self.driver.libaquery_a() if self.cache_status.pch_hpp_gch: - success = self.driver.pch() and success + success &= self.driver.pch() if self.cache_status.server: - success = self.driver.server() and success + success &= self.driver.server() if success: current.calc(self.cxx, libaquery_a) with open('.cached', 'wb') as cache_sig: cache_sig.write(pickle.dumps(current)) else: + if aquery_config.os_platform == 'mac': + os.system('./arch-check.sh') try: os.remove('./.cached') except: diff --git a/engine/utils.py b/engine/utils.py index 995d354..065f8c8 100644 --- a/engine/utils.py +++ b/engine/utils.py @@ -140,4 +140,7 @@ nullstream = open(os.devnull, 'w') def clamp(val, minval, maxval): - return min(max(val, minval), maxval) \ No newline at end of file + return min(max(val, minval), maxval) + +def escape_qoutes(string : str): + return re.sub(r'^\'', r'\'',re.sub(r'([^\\])\'', r'\1\'', string)) diff --git a/prompt.py b/prompt.py index 91fbbe2..3598c62 100644 --- a/prompt.py +++ b/prompt.py @@ -43,7 +43,7 @@ dbg: print: printout parsed sql statements exec: - execute last parsed statement(s) with AQuery Execution Engine + execute last parsed statement(s) with AQuery Execution Engine (disabled) xexec: execute last parsed statement(s) with Hybrid Execution Engine r: @@ -336,7 +336,7 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None): time.sleep(.00001) og_q : str = next() q = og_q.lower().strip() - if q == 'exec': # generate build and run (AQuery Engine) + if False and q == 'exec': # generate build and run (AQuery Engine) state.cfg.backend_type = Backend_Type.BACKEND_AQuery.value cxt = engine.exec(state.stmts, cxt, keep) if state.buildmgr.build_dll() == 0: @@ -352,7 +352,7 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None): else: print(prompt_help) continue - elif q.startswith('xexec'): # generate build and run (MonetDB Engine) + elif q.startswith('xexec') or q.startswith('exec'): # generate build and run (MonetDB Engine) state.cfg.backend_type = Backend_Type.BACKEND_MonetDB.value cxt = xengine.exec(state.stmts, cxt, keep) diff --git a/reconstruct/ast.py b/reconstruct/ast.py index 52dc8ef..44fb969 100644 --- a/reconstruct/ast.py +++ b/reconstruct/ast.py @@ -617,6 +617,7 @@ class join(ast_node): self.join_conditions = [] # self.tmp_name = 'join_' + base62uuid(4) # self.datasource = TableInfo(self.tmp_name, [], self.context) + def append(self, tbls, __alias = ''): alias = lambda t : t + ' ' + __alias if len(__alias) else t if type(tbls) is join: @@ -661,8 +662,11 @@ class join(ast_node): self.have_sep = True j = join(self, node[keys[0]]) tablename = f' {keys[0]} {j}' - if len(keys) > 1 and keys[1].lower() == 'on': - tablename += f' on {expr(self, node[keys[1]])}' + if len(keys) > 1 : + if keys[1].lower() == 'on': + tablename += f' ON {expr(self, node[keys[1]])}' + elif keys[1].lower() == 'using': + tablename += f' USING {expr(self, node[keys[1]])}' self.joins.append((tablename, self.have_sep)) self.tables += j.tables self.tables_dir = {**self.tables_dir, **j.tables_dir} @@ -731,8 +735,9 @@ class join(ast_node): class filter(ast_node): name = 'where' def produce(self, node): - self.add(expr(self, node).sql) - + filter_expr = expr(self, node) + self.add(filter_expr.sql) + self.datasource.join_conditions += filter_expr.join_conditions class create_table(ast_node): name = 'create_table' diff --git a/reconstruct/expr.py b/reconstruct/expr.py index e5980d4..0faf9a5 100644 --- a/reconstruct/expr.py +++ b/reconstruct/expr.py @@ -12,6 +12,11 @@ from engine.types import * class expr(ast_node): name='expr' + valid_joincond = { + 0 : ('and', 'eq', 'not'), + 1 : ('or', 'neq', 'not'), + 2 : ('', '', '') + } @property def udf_decltypecall(self): return self._udf_decltypecall if self._udf_decltypecall else self.sql @@ -46,6 +51,7 @@ class expr(ast_node): self.node = node self.supress_undefined = supress_undefined if(type(parent) is expr): + self.next_valid = parent.next_valid self.inside_agg = parent.inside_agg self.is_udfexpr = parent.is_udfexpr self.is_agg_func = parent.is_agg_func @@ -53,6 +59,8 @@ class expr(ast_node): self.c_code = parent.c_code self.builtin_vars = parent.builtin_vars else: + self.join_conditions = [] + self.next_valid = 0 self.is_agg_func = False self.is_udfexpr = type(parent) is udf self.root : expr = self @@ -92,9 +100,18 @@ class expr(ast_node): else: if len(node) > 1: print(f'Parser Error: {node} has more than 1 dict entry.') - + + is_joincond = False for key, val in node.items(): key = key.lower() + if key not in self.valid_joincond[self.next_valid]: + self.next_valid = 2 + else: + if key == self.valid_joincond[self.next_valid][2]: + self.next_valid = not self.next_valid + elif key == self.valid_joincond[self.next_valid][1]: + self.next_valid = 2 + is_joincond = True if key in self.operators: if key in builtin_func: if self.is_agg_func: @@ -200,6 +217,9 @@ class expr(ast_node): else: print(f'Undefined expr: {key}{val}') + if (is_joincond and len(self.children) == 2 + and all([c.is_ColExpr for c in self.children])) : + self.root.join_conditions.append((c.raw_col for c in self.children)) if type(node) is str: if self.is_udfexpr: @@ -342,9 +362,11 @@ class expr(ast_node): exec(f'loc["{b}"] = lambda : "{b}"') x = self.c_code if c_code is None else c_code + from engine.utils import escape_qoutes if decltypestr: - return eval('f\'' + self.udf_decltypecall + '\'') - return eval('f\'' + self.sql + '\'') + return eval('f\'' + escape_qoutes(self.udf_decltypecall) + '\'') + self.sql.replace("'", "\\'") + return eval('f\'' + escape_qoutes(self.sql) + '\'') if self.is_recursive_call_inudf or (self.need_decltypestr and self.is_udfexpr) or gettype: return call else: diff --git a/test.aquery b/test.aquery index 624e03b..7756ff4 100644 --- a/test.aquery +++ b/test.aquery @@ -1,5 +1,8 @@ #!aquery +select "hello world" +xexec + echo Testing Insert, Filters and Nested Aggregation f stock.a xexec From 15f11f1eab25e950ccafc3d6b762a05aa7a02cc9 Mon Sep 17 00:00:00 2001 From: Bill Date: Tue, 27 Sep 2022 05:25:25 +0800 Subject: [PATCH 12/12] Updated instructions, bulid drivers, bug fixes --- AQuery2 | 2616 ------------------------------- MonetDB-release-epel.noarch.rpm | Bin 6588 -> 0 bytes 2 files changed, 2616 deletions(-) delete mode 100644 AQuery2 delete mode 100644 MonetDB-release-epel.noarch.rpm diff --git a/AQuery2 b/AQuery2 deleted file mode 100644 index 4478769..0000000 --- a/AQuery2 +++ /dev/null @@ -1,2616 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - GitHub - sunyinqi0508/AQuery2: An in-memory column-store time-series database that uses query compilation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- Skip to content - - - - - - - - - - - - -
- -
- - - - - - - -
- - - - - -
- - - - - - - - - - - -
-
-
- - - - - - - - - - - - -
- -
- -
-
- - - - / - - AQuery2 - - - Public -
- -
- - - -
- -
-
-

- An in-memory column-store time-series database that uses query compilation -

- -

License

- - - - -
-
- -
- -
-
- -
- - - - -
- - - - - -
- - - - - - -

sunyinqi0508/AQuery2

-
- - -
- - -
- -
- - - - - -
-
- -
- -
-
- - - master - - - - -
-
-
- Switch branches/tags - -
- - - -
- -
- -
- - -
- -
- - - - - - - - - - - - - - - -
- - -
-
-
-
- -
- -
- - - - -
- - - - - - - - -
- Code -
- -
-
- -
- - -
-
- - - - - - -
-
-

Latest commit

-
- -
-
 
-
-

Git stats

- -
-
-
-

Files

- - - - - Permalink - -
- - - Failed to load latest commit information. - - - -
-
-
-
Type
-
Name
-
Latest commit message
-
Commit time
-
- -
-
- -
- - - -
-
 
-
- -
-
 
-
- -
-
-
- -
- -
- data -
- -
-
 
-
- -
-
 
-
- -
-
-
- -
- -
- docs -
- -
-
 
-
- -
-
 
-
- -
-
-
- -
- -
- engine -
- -
-
 
-
- -
-
 
-
- -
-
-
- -
- -
- lib -
- -
-
 
-
- -
-
 
-
- -
-
-
- -
- -
- monetdb -
- -
-
 
-
- -
-
 
-
- -
-
-
- -
- - - -
-
 
-
- -
-
 
-
- -
-
-
- -
- -
- msvs-py -
- -
-
 
-
- -
-
 
-
- -
-
-
- -
- - - -
-
 
-
- -
-
 
-
- -
-
-
- -
- -
- sdk -
- -
-
 
-
- -
-
 
-
- -
-
-
- -
- -
- server -
- -
-
 
-
- -
-
 
-
- -
-
-
- -
- -
- tests -
- -
-
 
-
- -
-
 
-
- -
-
-
- -
- - - -
-
 
-
- -
-
 
-
- -
-
-
- -
- - - -
-
 
-
- -
-
 
-
- -
-
-
- -
- -
- LICENSE -
- -
-
 
-
- -
-
 
-
- -
-
-
- -
- -
- Makefile -
- -
-
 
-
- -
-
 
-
- -
-
-
- -
- -
- README.md -
- -
-
 
-
- -
-
 
-
- -
-
-
- -
- - - -
-
 
-
- -
-
 
-
- -
-
-
- -
- -
- build.py -
- -
-
 
-
- -
-
 
-
- -
-
-
- -
- - - -
-
 
-
- -
-
 
-
- -
-
-
- -
- -
- csv.h -
- -
-
 
-
- -
-
 
-
- -
-
-
- -
- - - -
-
 
-
- -
-
 
-
- -
-
-
- -
- -
- dbconn.py -
- -
-
 
-
- -
-
 
-
- -
-
-
- -
- - - -
-
 
-
- -
-
 
-
- -
-
-
- -
- -
- mmw.cpp -
- -
-
 
-
- -
-
 
-
- -
-
-
- -
- -
- prompt.py -
- -
-
 
-
- -
-
 
-
- -
-
-
- -
- - - -
-
 
-
- -
-
 
-
- -
-
-
- -
- - - -
-
 
-
- -
-
 
-
- -
-
-
- -
- - - -
-
 
-
- -
-
 
-
- -
-
- -
- -
- - -
- - - - -
- - - -
-

AQuery++ Database

-

Introduction

-

AQuery++ Database is a cross-platform, In-Memory Column-Store Database that incorporates compiled query execution.

-

Requirements

-
    -
  1. -

    Recent version of Linux, Windows or MacOS, with recent C++ compiler that has C++17 (1z) support. (however c++20 is recommended if available for heterogeneous lookup on unordered containers)

    -
      -
    • GCC: 9.0 or above (g++ 7.x, 8.x fail to handle fold-expressions due to a compiler bug)
    • -
    • Clang: 5.0 or above (Recommended)
    • -
    • MSVC: 2019 or later (2022 or above is recommended)
    • -
    -
  2. -
  3. -

    Monetdb for Hybrid Engine

    -
      -
    • On windows, the required libraries and headers are already included in the repo.
    • -
    • On Linux, see Monetdb Easy Setup for instructions.
    • -
    • On MacOS, Monetdb can be easily installed in homebrew brew install monetdb.
    • -
    -
  4. -
  5. -

    Python 3.6 or above and install required packages in requirements.txt by python3 -m pip install -r requirements.txt

    -
  6. -
-

Installation

-

AQuery is tested on mainstream operating systems such as Windows, macOS and Linux

-

Windows

-

There're multiple options to run AQuery on Windows. You can use the native toolchain from Microsoft Visual Studio or gcc from Cygwin/MinGW or run it under Windows Subsystem for Linux.

-
    -
  • -

    For WSL, Docker or Linux virtual machines, see Linux, Docker sections below

    -
  • -
  • -

    For Visual Studio:

    -
      -
    1. Install python3.6 or above from official website or Microsoft Store.
    2. -
    3. Install Microsoft Visual Studio 2022 or later with Desktop development with C++ selected.
    4. -
    5. Clone AQuery repo from Github
    6. -
    7. Install python requirements with pip python3 -m pip install -r requirements.txt
    8. -
    9. Change the build driver from aquery_config.py to "MSBuild"
    10. -
    11. The libraries and headers for Monetdb are already included in msc-plugins, however you can also choose to download them from Monetdb Easy Setup and put them in the same place.
    12. -
    -
  • -
  • -

    For MinGW:

    -
      -
    1. Install gcc and python3 using the package manager. (For Msys2, pacman -S gcc python3)
    2. -
    3. Clone AQuery repo from Github
    4. -
    5. Install python requirements
    6. -
    7. The prebuilt binaries are included in ./lib directory. However, you could also rebuild them from source.
    8. -
    -
  • -
  • -

    Note that it might be possible to use python from python.org or Microsoft store with gcc from MinGW. However, it might not work because of ABI breakage. So the better way is to use gcc with MinGW python from pacman or use clang/MSVC instead.

    -
  • -
-

macOS

-
    -
  • Install a package manager such as homebrew
  • -
  • Install python3 and monetdb using homebrew brew install python3 monetdb
  • -
  • Install C++ compiler come with Xcode commandline tool by xcode-select --install or from homebrew
  • -
  • If you have multiple C++ compilers on the system. Specify C++ compiler by setting the CXX environment variable. e.g. export CXX=clang
  • -
  • Install python packages from requirements.txt
  • -
-

for arm64 macOS users

-
    -
  • In theory, AQuery++ could work on both native arm64 and x86_64 through Rosetta. But for maximum performance, running native is preferred.
  • -
  • However, they can't be mixed up, i.e. make sure every component, python binary, C++ compiler, monetdb library and system commandline utilities such as uname should have the same architecture.
  • -
  • Because I can't get access to an arm-based mac to fully test this setup, there might still be issues. Please open an issue if you encounter any problems.
  • -
-

Linux

-
    -
  • Install monetdb, see Monetdb Easy Setup for instructions.
  • -
  • Install python3, C++ compiler and git. (For Ubuntu, run apt update && apt install -y python3 python3-pip clang-14 libmonetdbe-dev git )
  • -
  • Install required python packages by python3 -m pip install -r requirements.txt
  • -
  • If you have multiple C++ compilers on the system. Specify C++ compiler by setting the CXX environment variable. e.g. export CXX=clang++-14
  • -
-

Docker:

-
    -
  • Alternatively, you can also use docker to run AQuery.
  • -
  • Type make docker to build the docker image from scratch.
  • -
  • For Arm-based Mac users, you would need to build and run the x86_64 docker image because MonetDB doesn't offer official binaries for arm64 Linux.
  • -
-

Usage

-

python3 prompt.py will launch the interactive command prompt. The server binary will be automatically rebuilt and started.

-

Commands:

-
    -
  • -

    <sql statement>: parse AQuery statement

    -
  • -
  • -

    f <filename>: parse all AQuery statements in file

    -
  • -
  • -

    dbg start debugging session

    -
  • -
  • -

    print: printout parsed AQuery statements

    -
  • -
  • -

    xexec: execute last parsed statement(s) with Hybrid Execution Engine. Hybrid Execution Engine decouples the query into two parts. The standard SQL (MonetDB dialect) part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed.

    -
  • -
  • -

    save <OPTIONAL: filename>: save current code snippet. will use random filename if not specified.

    -
  • -
  • -

    exit: quit the prompt

    -
  • -
  • -

    r: run the last generated code snippet

    -
  • -
-

Example:

-

f moving_avg.a
-xexec

-

See ./tests/ for more examples.

-

Architecture

-

Architecture

-

AQuery Compiler

-
    -
  • The query is first processed by the AQuery Compiler which is composed of a frontend that parses the query into AST and a backend that generates target code that delivers the query.
  • -
  • Front end of AQuery++ Compiler is built on top of mo-sql-parsing with modifications to handle AQuery dialect and extension.
  • -
  • Backend of AQuery++ Compiler generates target code dependent on the Execution Engine. It can either be the C++ code for AQuery Execution Engine or sql and C++ post-processor for Hybrid Engine or k9 for the k9 Engine.
  • -
-

Execution Engines

-
    -
  • AQuery++ supports different execution engines thanks to the decoupled compiler structure.
  • -
  • AQuery Execution Engine: executes queries by compiling the query plan to C++ code. Doesn't support joins and udf functions.
  • -
  • Hybrid Execution Engine: decouples the query into two parts. The sql-compliant part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed.
  • -
  • K9 Execution Engine: (discontinued).
  • -
-

Roadmap

-
    -
  • SQL Parser -> AQuery Parser (Front End)
  • -
  • AQuery-C++ Compiler (Back End) -
      -
    • Schema and Data Model
    • -
    • Data acquisition/output from/to csv file
    • -
    -
  • -
  • Execution Engine -
      -
    • Projections and single-group Aggregations
    • -
    • Group by Aggregations
    • -
    • Filters
    • -
    • Order by
    • -
    • Assumption
    • -
    • Flatten
    • -
    • UDFs (Hybrid Engine only)
    • -
    • User Module
    • -
    • Triggers
    • -
    • Join (Hybrid Engine only)
    • -
    • Subqueries
    • -
    -
  • -
  • Query Optimization -
      -
    • Selection/Order by push-down
    • -
    • Join Optimization (Only in Hybrid Engine)
    • -
    -
  • -
-

Known Issues:

-
    -
  • Interval based triggers
  • -
  • Hot reloading server binary
  • -
  • Bug fixes: type deduction misaligned in Hybrid Engine
  • -
  • Investigation: Using postproc only for q1 in Hybrid Engine (make is_special always on)
  • -
  • C++ Meta-Programming: Eliminate template recursions as much as possible.
  • -
  • Functionality: Basic helper functions in aquery
  • -
  • Improvement: More DDLs, e.g. drop table, update table, etc.
  • -
  • Bug: Join-Aware Column management
  • -
  • Bug: Order By after Group By
  • -
-
-
-
- -
- - -
-
- -
-
-
-

About

- -

- An in-memory column-store time-series database that uses query compilation -

- -

Topics

- - -

Resources

- - -

License

- - - - - - - -

Stars

- - -

Watchers

- - -

Forks

- - -
-
- - - - - - - - - - - -
-
-

Languages

-
- - - - - -
- - -
-
-
-
- -
- -
- - -
- -
- - -
-
- -
- - - - - - - - - - - - - - - - - - - - - diff --git a/MonetDB-release-epel.noarch.rpm b/MonetDB-release-epel.noarch.rpm deleted file mode 100644 index 3edafcff9245dc48aa9a7c196bb2052f04843bd7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6588 zcma)92UHZv)^1?PNrIxNI0lq7!@vLok|cxVBtg=I9vGSA1d;>=BpFdebU{#1LBfh6 z2q-ElA}YqkfJzim5kbU+%3lr5zTNlld1w2a>RVskP`B=_y4C%@apjiyj6P*c1BzD3BMe0#qbh zgouGeq@bk>mSa-0k53A(6??7tI*R@5NA2%X@S6}9-Ppic0?A0vn;TJ6mx6|u4ryj$ z^T@@YtnRtre?FADb+|x6z2;iATJrVe*KvN8g%?C^lofZmYD{^Qm^*2wTNYQr{;hi@ z*jk|`q!->%Ow4F-!eZ_E^!^^|{S2GTk=^bCTkOVn!Oh=u<@(0CmwCS5A_ukmDniZ9 ziBWI-vS=>cYu?BUO&?BqaA1f1zTC!6@w-=k6J(UpOEc4oxehPwzJ2I$%81*(_`Kbv zq25nvaU*G(rOIDi_E<{e$2YpA;!S&6%*t8Dkx4xlL)u+(75B`)>NdaT)KIXBvu)VK z8MzA1$bhYLb=ZrKn>8eU=mtwFGc2YDJ_u!XR*bxCyHNJf>{4{bA0#=NjZ!$#$bdv6 zP|QqWI*mvo(8*-jh;Cv^Gcz_ahK)!DW-x(hYDNYbfS?6S{#A@u{WZpSx+Q-|8Du0# zYwXk-SJCP!@9~_t+pQhL%}~uvS(|feL(^I11B@wwa>(hMCXWdjhobB(@?JRhE^}kT z20?mPny#%*{`x&`P^)Hq6~vQK=jL2C#Fp$x_pA8GR2Rm-efp*B_6EZ05goUEP^~cM zZ1J|My+>UX--Ju8(ck2;s(Q)JGaZ6&4J$|1cgZ)X3g!DMC9y(x@dd+TsUJ7K%~NQ@ zoxd_5yi~Wc+ppzd_mS&c-h1H*m%Yd6`*%bKSGB(w9JnwUF_eri?o_K1@39CI{k*c( zmJ}(Qh|^6u|Jdh<-&(b(lH7}1bXJVwlfV6ZcG~hs06SFY%bx4)B*&ZtD$O~s_K(JV zc2)M?W^q>!(#wl43XoVTm4MgG`Iw%b-V6Hk{&IfSf^A<1#;idD`(LsQ;e&=`K?*eF znL97w{6L~8egi0i)liI_9|SK(F>-#SLDNL>dlWmMcoa}@oXxQ{wEZFp2ecdDE@+CbrgRE6wEI=H{TDGAA<6KqBsS`zfin=7GuENGv|FLpkU1y zWNo0NiRPggSsQ|p2qSzMK!J{EA&O-I1@RLtL$TZ}7F&&Cq#glzF)b7;qI?31ksJVh zF_51b`>KEfAH?hd1^K|t2NcogqI@I|hN^*$GD*w@dJ_D z3@BpX6i{FvgV;l;8K8)K7K#zS02W95MfkR}SYj2Rh`!V8xP%3uNPL`8Y>#3WK!Lsl zGC#r(MEPDQ4nq0JIKmG``2s)@`x{X{;va&MyaGQYwgQUChXRV&&q8q+iYrh|0TirP z;trsQK5Z6DqWc8{qj){ar=!>%#Y_}?q8RZD%r6-QC^Ekg#dGihq#Fq#B*WbGaHzK$vkS544Fy= zpA$SXFwED(`g*(JX1IawpaXnm10ys(UPi_*3M_+5rbZ*vb9tN?1`QYzM8Q}#OojP; zG7=sJ6B*;oEC6BsC5$q>|AdjlM&rZBA}a$6pfJ&;`+!9{gP1vhiFsTe14P1&!4}40 zwHSC9uP5WncMF|94 z{#t!~knXvygiN7N6I|Kx_^YeBhfbsJWOo+sd|;A)W%?E0IQ|nVAv^ zBr=^~02&Na$t0R7!Hi;RWMoF96HJK)Bm%|M&r5KX8AW1^Xn0fj_1GBYuQDKHTxQp|{CBAI4FBGC!P1TqX8nwU`y=tKgI zNH8#^BL{l!;c++u=)YcQJpad+D4x%w;(1&a1c7^U=J_Wblew7x&C=}5ppeM?cjLh{ zaG>UdAz+`$plI2FJ;VcB5T-eT4d_8;!F=sG9x`Ijh=loq|BQK&94j{1WdBdug&&xp4M9Xa0dIsefQ;}ac)|=t-pDBUDu;~m zhBITPU~@r1Y4g@0&G#byvago$I^Rtkm`B69gCma2!oyndXGYeMsjQiOiN#@OKJKA^ zCy7k;?<5WWwIs3_94LS+P7FAg944q5j7TSrMF2~}%V4ou ze=AE=G8@b0#3FNn5=z4g+0-a9I}-Fj#i4LOWu4J9nK6eN(FUtn5S7c|Kye9>AzI16k(@c1Gt~_j$wqGMzds_7 zjjx@H3swN`C5Xo0&CDAAcbPea#B=5wdV2at1nAHENlFL=A1yL{Q*uW|q2(e9U=ZWw zY|^V#EE66IEtG$Op%{J>%5FV&QPEps%QaWEJbqvzhPpI}Hnyfzxn|o~rC4Yx>14zn zIhNM+kGs-e^*iwpsV6!PLF|qDX4WF~{6vn1dzX&#lpzANq*zzc;@m zA#uyPMYyT*+u041(i#Utnx`D3Qr@aAt;kKfry-Xgxnso@<){&_9}(rTip%VBu3IE; zHB7R9saEbkKhS%#$n)^vZB@?%1{>O4eP8q`_>Su4yR3y}_@~Ok~{e(1}_(XkUAK#OX$OCSIylB;}|^KgOhT@s7;IiG9_&1EN9m zMr}OA16$;qMH_bqSQC`)ea;(lY+bneeA06<`aX#_vW27XxhoqD63Zh`OX&Ipr0E4W zJm_taQA@X8JO7-y$pv-R6tr;7(s^xX<9|xZzgl1SJ%L!GR4S-klCnrvS=z3Xxm9OP zcgeYnvYETgP4AYxv@+h2c+fh$Uu-zMif*+~;?06vUsf8+W?lM@BPurc-nQ5^f1jVW zvZz#+#4&S=B`#-_y$^a4Yd*Eb=cuPT9^6#bB^4I`J&h)IRdz$svR{=wPX|RGxtxuv zXA+OQJ52QOcI_-Ul^^m$(!FX5wYws1z`1(rK!A9K%J)7J3&Vh&aFIfk( zZ`N-Q{4uWg=7z`T16p|@#~Nc6(MMbj{L@zLx#bt0MJkkqriy9O*(N6zy6o7gWrPV7 z8}~F#sjy59T@u8zjICRpPl`L$=CUQCYt-a7h4+^yKbQPGE;TXLN%wFvXbNAhn0#fp z_mqT+QKX>%BB#$k=>0&9_MM&B5T2BNc~Lv$ymLLvYR^ zY0cu*U4hBM1=_9HeY+Otw~nn%T^Ch+Li13(nQFm-7LwJjweQ1k(%g>Sj_bX4MQ5eo z-k9yeMOxf*-V0jPnkG&=wPf!-zPm!{Wrfx5Bg->>$Qq9rkErbKabbHH9fS<@K7yJ7b!gCd8FfZ z=GVyWij=~JtKsKOT`NU;{N0LaWQDh*J&oON^pKKQ8r`7}yQ!9s*N?JUw#RLs^R`VU z&2wnKQ8X++y!p__!^ZII^{Pjbe?R$?>9C+9$Xn=jLDNxqBDF0n0r#neb$^3A#z(3O$%l6)Wtc|&(P^~jmge#P>|uGqiudW+YL;D27c9No z-&y3AV?!_4gwt=^hz@pM5fAJeY|mO+Z+1ze&|GQM>)rX0?hHTE&uX>iis^C@$JKvH zbkoL;cfLKSD!<}ac89!f=%Tj5MSZ%v5}JmsuP?YCAuGP#;jX;NQSE}fi_Z@oSknHG z5dD;tTyW;>o=tZ?pEW0y<$Sf_+$6S=u6Z~H>ctS7Z@x`_OXRs0wA*A{9PfPA7I!#h zjaHcTyOjCY<7oJ0vFTWi$rS7gZCk_s{xLtofa&Pbt%Csr-#*o?9FS98sUxvszhPpA z{I%Y~H8!Cw6SM}$d98=!mOqtY?6-Spw@*S&BhSUPvhLZ_a|DCQR)NmuMpK!>ONFR%VkfxNL2<`{hXu3}gEIjL=NszDq%Dnms zE4=e~FZhpngSpr0s*Tq*P8?l-qG#99@vhvQn}X?+^pWF7T%Ht|jMo=^xF>VDQ~syH zRC+UeSrGoddO*03>7J3A{ak;aZt({v*E7z1TlN~WCb@k0h5o+xMHch*Dxyx@=_q@D zw#p~bC)Rsx1LfWoN?h7U^)=deht6ev({UJSaK`m}hZHu+1@Z^y>(IYRIPTNuHIkD~ z{yvnZPid_b5b@J0mySPm`cy-IS$v?`bsuHF*X6Tf%cnoDveuAnZQIfsa4EoAdP|G> z8$zkBro!LAzw$Ax5-BL07lj%gbcXf`KSB&S* zIwzG>j~=HnSzVnqAq#Wb6`Uy6>&ibcHSfrJPIp{95WQ87cR1VK_o`|lCp9cq@H5F? z&O**MKcBlr?pezInt-0cye2GWL6Os&qBO%*n{+E1x*8i}b4nFop2tP1?{Lic<2 zf#T&~OK-qc+j4L8M{eKs_H|a^)+dKAPsN;CCQpwR*0gp;`*)#&KN&H#Qz^9NZO1S9Ed^EO)oMx)?T;z!F|>>n$#Ymz z<+XgrnN^E5+^myh?gg(j=nAf)=RR}`Tv}(lH{Ekg^N#cDg6|Ou&*RQJ`;xBwdZuz} zEK?;<*DBFirBJyK-|@iL=z{iuonmst#87zg#5G6VSD(~L*rnN|$G=WLi`RdwA!YO| zysMRY@>?jaWwFZHq7`(&jtrt?&;Md_2@a+))s$>R$=v|inD zjd4!6-mta_pQzHDbAjeivgv7Q_Y+l#F6GnYbHPeaFKrvKmaM;16r0}jqNC!0r1Qn{ z$>1N)FKDN173xlg25o$9Y(ain7H3y;vZv_N&9s9LA8GeLu_W)KPpN{6z)|6r&gxrDr=9FR4+fhLZ^1==>;&UIsaJv#*Oqw)2oYzzG)H9*R`yDNLH`+ zHSc@7{@eDwUs+wo6QTv_eiok5@kzwtyPod@OsD l?Y+|4+>60kDjy+{{qsv!LQt~Hjm-KtJxgYO3}k|W{|}$