diff --git a/README.md b/README.md index 7328873..20400d0 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ -# AQuery++ +# AQuery++ DB -AQuery++ Compiler that compiles AQuery into C++17. -Frontend built on top of [mo-sql-parsing](https://github.com/klahnakoski/mo-sql-parsing). +AQuery++ Database is an In-Memory Column-Store Database that incorporates compiled query execution. +Compiler frontend built on top of [mo-sql-parsing](https://github.com/klahnakoski/mo-sql-parsing). ## Roadmap - [x] SQL Parser -> AQuery Parser (Front End) diff --git a/engine/ddl.py b/engine/ddl.py index 0c946b6..4a1e344 100644 --- a/engine/ddl.py +++ b/engine/ddl.py @@ -6,8 +6,8 @@ from engine.utils import base62uuid class create_table(ast_node): name = 'create_table' - def __init__(self, parent: "ast_node", node, context: Context = None, cexpr = None): - self.cexpr = cexpr + def __init__(self, parent: "ast_node", node, context: Context = None, cexprs = None): + self.cexprs = cexprs super().__init__(parent, node, context) def produce(self, node): if type(node) is not TableInfo: @@ -23,19 +23,21 @@ class create_table(ast_node): self.context.tables_in_context[tbl] = tbl.table_name tbl.cxt_name = tbl.table_name tbl.refer_all() - if self.cexpr is None: + # create an empty new table + if self.cexprs is None: for c in tbl.columns: self.emit(f"{c.cxt_name}.init();") + # create an output table else: if len(self.context.scans) == 0: for i, c in enumerate(tbl.columns): self.emit(f"{c.cxt_name}.init();") - self.emit(f"{c.cxt_name} = {self.cexpr[i]()};") + self.emit(f"{c.cxt_name} = {self.cexprs[i]()};") else: scanner:scan = self.context.scans[-1] for i, c in enumerate(tbl.columns): scanner.add(f"{c.cxt_name}.init();", "init") - scanner.add(f"{c.cxt_name} = {self.cexpr[i](scanner.it_ver)};") + scanner.add(f"{c.cxt_name} = {self.cexprs[i](scanner.it_ver)};") class insert(ast_node): name = 'insert' diff --git a/engine/projection.py b/engine/projection.py index 22cd5c6..180d197 100644 --- a/engine/projection.py +++ b/engine/projection.py @@ -95,6 +95,7 @@ class projection(ast_node): sname = expr(self, e)._expr fname = expr.toCExpr(sname) # fastest access method at innermost context absname = expr(self, e, abs_col=True)._expr # absolute name at function scope + # TODO: Make it single pass here. compound = True # compound column cexprs.append(fname) cname = e if type(e) is str else ''.join([a if a in base62alp else '' for a in expr.toCExpr(absname)()]) @@ -109,21 +110,21 @@ class projection(ast_node): compound = compound and has_groupby and has_other(self.datasource.rec, self.group_node.referenced) self.datasource.rec = None - typename = '' + typename = f'decays' if not compound: - typename = f'value_type>' - else : - typename = f'decays' + typename = f'value_type<{typename}>' cols.append(ColRef(cname, expr.toCExpr(typename)(), self.out_table, 0, None, cname, i, compound=compound)) self.out_table.add_cols(cols, False) if has_groupby: - create_table(self, self.out_table) # only initializes out_table. + create_table(self, self.out_table) # creates empty out_table. self.group_node.finalize(cexprs, self.out_table) else: - create_table(self, self.out_table, cexpr = cexprs) + create_table(self, self.out_table, cexprs = cexprs) # create and populate out_table. + + self.datasource.group_node = None if self.where is not None: @@ -134,7 +135,7 @@ class projection(ast_node): self.datasource = self.out_table self.context.datasource = self.out_table # discard current ds orderby_node = orderby(self, node['orderby']) - self.emit(f'auto {disp_varname} ={self.out_table.reference()}->order_by_view<{",".join([f"{c}" for c in orderby_node.col_list])}>();') + self.emit(f'auto {disp_varname} = {self.out_table.reference()}->order_by_view<{",".join([f"{c}" for c in orderby_node.col_list])}>();') else: disp_varname = f'*{self.out_table.cxt_name}' if self.disp: diff --git a/msc-plugin/lineage.hpp b/msc-plugin/lineage.hpp new file mode 100644 index 0000000..cb20820 --- /dev/null +++ b/msc-plugin/lineage.hpp @@ -0,0 +1,5 @@ +#pragma once +template +struct lineage { + +}; \ No newline at end of file diff --git a/msvs-py/engine/projection.py b/msvs-py/engine/projection.py deleted file mode 100644 index 0eafd1f..0000000 --- a/msvs-py/engine/projection.py +++ /dev/null @@ -1,145 +0,0 @@ -from engine.ast import ColRef, TableInfo, ast_node, Context, include -from engine.groupby import groupby -from engine.join import join -from engine.expr import expr -from engine.orderby import orderby -from engine.scan import filter -from engine.utils import base62uuid, enlist, base62alp -from engine.ddl import create_table, outfile -import copy - -class projection(ast_node): - name='select' - def __init__(self, parent:ast_node, node, context:Context = None, outname = None, disp = True): - self.disp = disp - self.outname = outname - self.group_node = None - self.assumption = None - self.where = None - ast_node.__init__(self, parent, node, context) - def init(self, _): - if self.outname is None: - self.outname = self.context.gen_tmptable() - - def produce(self, node): - p = node['select'] - self.projections = p if type(p) is list else [p] - print(node) - - def spawn(self, node): - self.datasource = None - if 'from' in node: - from_clause = node['from'] - if type(from_clause) is list: - # from joins - join(self, from_clause) - elif type(from_clause) is dict: - if 'value' in from_clause: - value = from_clause['value'] - if type(value) is dict: - if 'select' in value: - # from subquery - projection(self, from_clause, disp = False) - else: - # TODO: from func over table - print(f'from func over table{node}') - elif type(value) is str: - self.datasource = self.context.tables_byname[value] - if 'assumptions' in from_clause: - self.assumption = orderby(self, enlist(from_clause['assumptions'])) - - elif type(from_clause) is str: - self.datasource = self.context.tables_byname[from_clause] - - if self.datasource is None: - raise ValueError('spawn error: from clause') - - if self.datasource is not None: - self.datasource_changed = True - self.prev_datasource = self.context.datasource - self.context.datasource = self.datasource - if 'where' in node: - self.where = filter(self, node['where'], True) - # self.datasource = filter(self, node['where'], True).output - #self.context.datasource = self.datasource - - if 'groupby' in node: - self.group_node = groupby(self, node['groupby']) - self.datasource = copy.copy(self.datasource) # shallow copy - self.datasource.groupinfo = self.group_node - else: - self.group_node = None - - def consume(self, node): - self.inv = True - disp_varname = 'd'+base62uuid(7) - has_groupby = False - if self.group_node is not None: - # There is group by; - has_groupby = True - cexprs = [] - flatten = False - cols = [] - self.out_table = TableInfo('out_'+base62uuid(4), [], self.context) - if 'outfile' in node: - flatten = True - - new_names = [] - for i, proj in enumerate(self.projections): - cname = '' - compound = False - self.datasource.rec = set() - if type(proj) is dict: - if 'value' in proj: - e = proj['value'] - sname = expr(self, e)._expr - fname = expr.toCExpr(sname) # fastest access method at innermost context - absname = expr(self, e, abs_col=True)._expr # absolute name at function scope - compound = True - cexprs.append(fname) - cname = e if type(e) is str else ''.join([a if a in base62alp else '' for a in expr.toCExpr(absname)()]) - if 'name' in proj: # renaming column by AS keyword - cname = proj['name'] - new_names.append(cname) - elif type(proj) is str: - col = self.datasource.get_col_d(proj) - if type(col) is ColRef: - col.reference() - compound = compound and has_groupby and self.datasource.rec not in self.group_node.referenced - self.datasource.rec = None - cols.append(ColRef(cname, expr.toCExpr(f'decays')(0), self.out_table, 0, None, cname, i, compound=compound)) - self.out_table.add_cols(cols, False) - - if has_groupby: - create_table(self, self.out_table) - self.group_node.finalize(cexprs, self.out_table) - else: - create_table(self, self.out_table, cexpr = cexprs) - self.datasource.group_node = None - - if self.where is not None: - self.where.finalize() - - has_orderby = 'orderby' in node - if has_orderby: - self.datasource = self.out_table - self.context.datasource = self.out_table # discard current ds - orderby_node = orderby(self, node['orderby']) - self.emit(f'auto {disp_varname} ={self.out_table.reference()}->order_by_view<{",".join([f"{c}" for c in orderby_node.col_list])}>();') - else: - disp_varname = f'*{self.out_table.cxt_name}' - if self.disp: - self.emit(f'print({disp_varname});') - - - if flatten: - if len(self.projections) > 1 and not self.inv: - self.emit(f"{disp_varname}:+{disp_varname}") - outfile(self, node['outfile']) - - if self.datasource_changed: - self.context.datasource = self.prev_datasource - - -import sys -include(sys.modules[__name__]) \ No newline at end of file diff --git a/msvs-py/msvs-py.pyproj b/msvs-py/msvs-py.pyproj index 67fab2a..b2fdde2 100644 --- a/msvs-py/msvs-py.pyproj +++ b/msvs-py/msvs-py.pyproj @@ -1,90 +1,93 @@ - - - Debug - 2.0 - ccc243f5-663e-45b7-a6de-b2468c58b3a7 - . - - - ..\msvs-py - . - . - msvs-py - msvs-py - - - true - false - - - true - false - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + Debug + 2.0 + ccc243f5-663e-45b7-a6de-b2468c58b3a7 + . + + + ..\msvs-py + . + . + msvs-py + msvs-py + + + true + false + + + true + false + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/out.cpp b/out.cpp index 6d056ce..9a2e44f 100644 --- a/out.cpp +++ b/out.cpp @@ -41,6 +41,7 @@ cxt->tables.insert({"out_4DCN", out_4DCN}); auto& out_4DCN_sumtestc = *(ColRef> *)(&out_4DCN->colrefs[0]); auto& out_4DCN_b = *(ColRef>> *)(&out_4DCN->colrefs[1]); auto& out_4DCN_d = *(ColRef>> *)(&out_4DCN->colrefs[2]); +auto lineage = test->bind(out_4DCN); out_4DCN_sumtestc.init(); out_4DCN_b.init(); out_4DCN_d.init(); @@ -50,8 +51,10 @@ auto &val_7BUMR6d = i1s.second; out_4DCN_sumtestc.emplace_back(sum(test_c[val_7BUMR6d])); out_4DCN_b.emplace_back(get<1>(key_4Q0aEyH)); out_4DCN_d.emplace_back(get<2>(key_4Q0aEyH)); +lineage.emplace_back(val_7BUMR6d[0]); } -auto d6X0PMzl =out_4DCN->order_by_view<-3,1>(); +print(lineage.rid); +auto d6X0PMzl = out_4DCN->order_by_view<-3,1>(); print(d6X0PMzl); return 0; } \ No newline at end of file diff --git a/server/aggregations.h b/server/aggregations.h index 8bd79b5..7a5accc 100644 --- a/server/aggregations.h +++ b/server/aggregations.h @@ -91,14 +91,36 @@ decayed_t maxw(uint32_t w, const VT& arr) { } return ret; } + +template class VT> +decayed_t> sums(const VT& arr) { + const uint32_t& len = arr.size; + decayed_t> ret(len); + uint32_t i = 0; + if(len) ret[i++] = arr[0]; + for (; i < len; ++i) + ret[i] = ret[i-1] + arr[i]; + return ret; +} +template class VT> +decayed_t> avgs(const VT& arr) { + const uint32_t& len = arr.size; + typedef types::GetFPType FPType; + decayed_t ret(len); + uint32_t i = 0; + types::GetLongType s; + if(len) s = ret[i++] = arr[0]; + for (; i < len; ++i) + ret[i] = (s+=arr[i])/(FPType)(i+1); + return ret; +} template class VT> decayed_t> sumw(uint32_t w, const VT& arr) { const uint32_t& len = arr.size; decayed_t> ret(len); uint32_t i = 0; w = w > len ? len : w; - if(arr.size) - ret[i++] = arr[0]; + if(len) ret[i++] = arr[0]; for (; i < w; ++i) ret[i] = ret[i-1] + arr[i]; for (; i < len; ++i) @@ -113,14 +135,15 @@ decayed_t> avgw(uint32_t w, const VT& arr) { uint32_t i = 0; types::GetLongType s; w = w > len ? len : w; - if(arr.size) - s = ret[i++] = arr[0]; + if(len) s = ret[i++] = arr[0]; for (; i < w; ++i) ret[i] = (s += arr[i])/(FPType)(i+1); for (; i < len; ++i) ret[i] = ret[i-1] + (arr[i] - arr[i-w])/(FPType)w; return ret; } + +template constexpr inline T count(const T& v) { return 1; } template constexpr inline T max(const T& v) { return v; } template constexpr inline T min(const T& v) { return v; } template constexpr inline T avg(const T& v) { return v; } diff --git a/server/table.h b/server/table.h index 49ce40c..655d11e 100644 --- a/server/table.h +++ b/server/table.h @@ -130,16 +130,13 @@ template constexpr inline ColRef>>& get(const TableView<_Types...>& table) noexcept { return *(ColRef>> *) & (table.info.colrefs[_Index]); } -template -struct is_vector_impl : std::false_type {}; + template struct is_vector_impl> : std::true_type {}; template struct is_vector_impl> : std::true_type {}; template struct is_vector_impl> : std::true_type {}; -template -constexpr static bool is_vector_type = is_vector_impl::value; template struct TableView; @@ -150,6 +147,29 @@ struct TableInfo { uint32_t n_cols; typedef std::tuple tuple_type; void print(const char* __restrict sep, const char* __restrict end) const; + + template + struct lineage_t { + TableInfo* this_table; + TableInfo* table; + vector_type rid; + constexpr lineage_t(TableInfo*this_table, TableInfo *table) + : this_table(this_table), table(table), rid(0) {} + constexpr lineage_t() : this_table(0), table(0), rid(0) {} + + template + inline auto& get(uint32_t idx) { + return get(*table)[rid[idx]]; + } + void emplace_back(const uint32_t& v) { + rid.emplace_back(v); + } + }; + template + auto bind(TableInfo* table2) { + return lineage_t(this, table2); + } + template typename std::enable_if::type print_impl(const uint32_t& i, const char* __restrict sep = " ") const; template @@ -196,6 +216,8 @@ struct TableInfo { auto order_by_view () { return TableView(order_by(), *this); } + + // Print 2 -- generate printf string first, supports flattening, supports sprintf/printf/fprintf template inline void print2_impl(Fn func, const uint32_t& i, const __Types& ... args) const { using this_type = typename std::tuple_element::type; @@ -257,6 +279,7 @@ struct TableView { delete idxs; } }; + template constexpr static inline bool is_vector(const ColRef&) { return true; @@ -265,12 +288,6 @@ template constexpr static inline bool is_vector(const vector_type&) { return true; } -template -constexpr static inline bool is_vector(const T&) { - return false; -} - - template TableInfo::TableInfo(const char* name, uint32_t n_cols) : name(name), n_cols(n_cols) { diff --git a/server/types.h b/server/types.h index 08e52ef..5ce967d 100644 --- a/server/types.h +++ b/server/types.h @@ -9,25 +9,33 @@ #ifdef _MSC_VER #define __restrict__ __restrict #endif + +template +constexpr static inline bool is_vector(const T&) { + return false; +} +template +struct is_vector_impl : std::false_type {}; +template +constexpr static bool is_vector_type = is_vector_impl::value; + namespace types { enum Type_t { AINT, AFLOAT, ASTR, ADOUBLE, ALDOUBLE, ALONG, ASHORT, ADATE, ATIME, ACHAR, - AUINT, AULONG, AUSHORT, AUCHAR, NONE, ERROR + AUINT, AULONG, AUSHORT, AUCHAR, VECTOR, NONE, ERROR }; static constexpr const char* printf_str[] = { "%d", "%f", "%s", "%lf", "%llf", "%ld", "%hi", "%s", "%s", "%c", - "%u", "%lu", "%hu", "%hhu", "NULL" }; + "%u", "%lu", "%hu", "%hhu", "Vector<%s>", "NULL", "ERROR" }; // TODO: deal with data/time <=> str/uint conversion struct date_t { uint32_t val; date_t(const char* d) { - } std::string toString() const; }; struct time_t { uint32_t val; time_t(const char* d) { - } std::string toString() const; }; @@ -51,12 +59,14 @@ namespace types { f(unsigned short, AUSHORT) \ f(unsigned char, AUCHAR) - constexpr static Type_t getType() { -#define TypeConnect(x, y) if(typeid(T) == typeid(x)) return y; else + inline constexpr static Type_t getType() { +#define TypeConnect(x, y) if constexpr(std::is_same::value) return y; else ConnectTypes(TypeConnect) + if constexpr (is_vector_type) + return VECTOR; + else return NONE; } - //static constexpr inline void print(T& v); }; #define ATypeSize(t, at) sizeof(t), static constexpr size_t AType_sizes[] = { ConnectTypes(ATypeSize) 1 };