Bill Sun 3 years ago
parent 70d7167c1e
commit 588ca0ba0b

@ -1,7 +1,7 @@
# AQuery++
# AQuery++ DB
AQuery++ Compiler that compiles AQuery into C++17.
Frontend built on top of [mo-sql-parsing](https://github.com/klahnakoski/mo-sql-parsing).
AQuery++ Database is an In-Memory Column-Store Database that incorporates compiled query execution.
Compiler frontend built on top of [mo-sql-parsing](https://github.com/klahnakoski/mo-sql-parsing).
## Roadmap
- [x] SQL Parser -> AQuery Parser (Front End)

@ -6,8 +6,8 @@ from engine.utils import base62uuid
class create_table(ast_node):
name = 'create_table'
def __init__(self, parent: "ast_node", node, context: Context = None, cexpr = None):
self.cexpr = cexpr
def __init__(self, parent: "ast_node", node, context: Context = None, cexprs = None):
self.cexprs = cexprs
super().__init__(parent, node, context)
def produce(self, node):
if type(node) is not TableInfo:
@ -23,19 +23,21 @@ class create_table(ast_node):
self.context.tables_in_context[tbl] = tbl.table_name
tbl.cxt_name = tbl.table_name
tbl.refer_all()
if self.cexpr is None:
# create an empty new table
if self.cexprs is None:
for c in tbl.columns:
self.emit(f"{c.cxt_name}.init();")
# create an output table
else:
if len(self.context.scans) == 0:
for i, c in enumerate(tbl.columns):
self.emit(f"{c.cxt_name}.init();")
self.emit(f"{c.cxt_name} = {self.cexpr[i]()};")
self.emit(f"{c.cxt_name} = {self.cexprs[i]()};")
else:
scanner:scan = self.context.scans[-1]
for i, c in enumerate(tbl.columns):
scanner.add(f"{c.cxt_name}.init();", "init")
scanner.add(f"{c.cxt_name} = {self.cexpr[i](scanner.it_ver)};")
scanner.add(f"{c.cxt_name} = {self.cexprs[i](scanner.it_ver)};")
class insert(ast_node):
name = 'insert'

@ -95,6 +95,7 @@ class projection(ast_node):
sname = expr(self, e)._expr
fname = expr.toCExpr(sname) # fastest access method at innermost context
absname = expr(self, e, abs_col=True)._expr # absolute name at function scope
# TODO: Make it single pass here.
compound = True # compound column
cexprs.append(fname)
cname = e if type(e) is str else ''.join([a if a in base62alp else '' for a in expr.toCExpr(absname)()])
@ -109,21 +110,21 @@ class projection(ast_node):
compound = compound and has_groupby and has_other(self.datasource.rec, self.group_node.referenced)
self.datasource.rec = None
typename = ''
if not compound:
typename = f'value_type<decays<decltype({absname})>>'
else :
typename = f'decays<decltype({absname})>'
if not compound:
typename = f'value_type<{typename}>'
cols.append(ColRef(cname, expr.toCExpr(typename)(), self.out_table, 0, None, cname, i, compound=compound))
self.out_table.add_cols(cols, False)
if has_groupby:
create_table(self, self.out_table) # only initializes out_table.
create_table(self, self.out_table) # creates empty out_table.
self.group_node.finalize(cexprs, self.out_table)
else:
create_table(self, self.out_table, cexpr = cexprs)
create_table(self, self.out_table, cexprs = cexprs) # create and populate out_table.
self.datasource.group_node = None
if self.where is not None:
@ -134,7 +135,7 @@ class projection(ast_node):
self.datasource = self.out_table
self.context.datasource = self.out_table # discard current ds
orderby_node = orderby(self, node['orderby'])
self.emit(f'auto {disp_varname} ={self.out_table.reference()}->order_by_view<{",".join([f"{c}" for c in orderby_node.col_list])}>();')
self.emit(f'auto {disp_varname} = {self.out_table.reference()}->order_by_view<{",".join([f"{c}" for c in orderby_node.col_list])}>();')
else:
disp_varname = f'*{self.out_table.cxt_name}'
if self.disp:

@ -0,0 +1,5 @@
#pragma once
template <class...T1, class ...T2>
struct lineage {
};

@ -1,145 +0,0 @@
from engine.ast import ColRef, TableInfo, ast_node, Context, include
from engine.groupby import groupby
from engine.join import join
from engine.expr import expr
from engine.orderby import orderby
from engine.scan import filter
from engine.utils import base62uuid, enlist, base62alp
from engine.ddl import create_table, outfile
import copy
class projection(ast_node):
name='select'
def __init__(self, parent:ast_node, node, context:Context = None, outname = None, disp = True):
self.disp = disp
self.outname = outname
self.group_node = None
self.assumption = None
self.where = None
ast_node.__init__(self, parent, node, context)
def init(self, _):
if self.outname is None:
self.outname = self.context.gen_tmptable()
def produce(self, node):
p = node['select']
self.projections = p if type(p) is list else [p]
print(node)
def spawn(self, node):
self.datasource = None
if 'from' in node:
from_clause = node['from']
if type(from_clause) is list:
# from joins
join(self, from_clause)
elif type(from_clause) is dict:
if 'value' in from_clause:
value = from_clause['value']
if type(value) is dict:
if 'select' in value:
# from subquery
projection(self, from_clause, disp = False)
else:
# TODO: from func over table
print(f'from func over table{node}')
elif type(value) is str:
self.datasource = self.context.tables_byname[value]
if 'assumptions' in from_clause:
self.assumption = orderby(self, enlist(from_clause['assumptions']))
elif type(from_clause) is str:
self.datasource = self.context.tables_byname[from_clause]
if self.datasource is None:
raise ValueError('spawn error: from clause')
if self.datasource is not None:
self.datasource_changed = True
self.prev_datasource = self.context.datasource
self.context.datasource = self.datasource
if 'where' in node:
self.where = filter(self, node['where'], True)
# self.datasource = filter(self, node['where'], True).output
#self.context.datasource = self.datasource
if 'groupby' in node:
self.group_node = groupby(self, node['groupby'])
self.datasource = copy.copy(self.datasource) # shallow copy
self.datasource.groupinfo = self.group_node
else:
self.group_node = None
def consume(self, node):
self.inv = True
disp_varname = 'd'+base62uuid(7)
has_groupby = False
if self.group_node is not None:
# There is group by;
has_groupby = True
cexprs = []
flatten = False
cols = []
self.out_table = TableInfo('out_'+base62uuid(4), [], self.context)
if 'outfile' in node:
flatten = True
new_names = []
for i, proj in enumerate(self.projections):
cname = ''
compound = False
self.datasource.rec = set()
if type(proj) is dict:
if 'value' in proj:
e = proj['value']
sname = expr(self, e)._expr
fname = expr.toCExpr(sname) # fastest access method at innermost context
absname = expr(self, e, abs_col=True)._expr # absolute name at function scope
compound = True
cexprs.append(fname)
cname = e if type(e) is str else ''.join([a if a in base62alp else '' for a in expr.toCExpr(absname)()])
if 'name' in proj: # renaming column by AS keyword
cname = proj['name']
new_names.append(cname)
elif type(proj) is str:
col = self.datasource.get_col_d(proj)
if type(col) is ColRef:
col.reference()
compound = compound and has_groupby and self.datasource.rec not in self.group_node.referenced
self.datasource.rec = None
cols.append(ColRef(cname, expr.toCExpr(f'decays<decltype({absname})>')(0), self.out_table, 0, None, cname, i, compound=compound))
self.out_table.add_cols(cols, False)
if has_groupby:
create_table(self, self.out_table)
self.group_node.finalize(cexprs, self.out_table)
else:
create_table(self, self.out_table, cexpr = cexprs)
self.datasource.group_node = None
if self.where is not None:
self.where.finalize()
has_orderby = 'orderby' in node
if has_orderby:
self.datasource = self.out_table
self.context.datasource = self.out_table # discard current ds
orderby_node = orderby(self, node['orderby'])
self.emit(f'auto {disp_varname} ={self.out_table.reference()}->order_by_view<{",".join([f"{c}" for c in orderby_node.col_list])}>();')
else:
disp_varname = f'*{self.out_table.cxt_name}'
if self.disp:
self.emit(f'print({disp_varname});')
if flatten:
if len(self.projections) > 1 and not self.inv:
self.emit(f"{disp_varname}:+{disp_varname}")
outfile(self, node['outfile'])
if self.datasource_changed:
self.context.datasource = self.prev_datasource
import sys
include(sys.modules[__name__])

@ -21,62 +21,65 @@
<EnableUnmanagedDebugging>false</EnableUnmanagedDebugging>
</PropertyGroup>
<ItemGroup>
<Folder Include="aquery_parser\" />
<Folder Include="aquery_parser\__pycache__\" />
<Folder Include="engine\" />
<Folder Include="engine\__pycache__\" />
<Folder Include="..\aquery_parser\" />
<Folder Include="..\aquery_parser\__pycache__\" />
<Folder Include="..\engine\" />
<Folder Include="..\engine\__pycache__\" />
<Folder Include="parser\" />
<Folder Include="parser\__pycache__\" />
<Folder Include="__pycache__\" />
</ItemGroup>
<ItemGroup>
<Compile Include="aquery_parser\keywords.py" />
<Compile Include="aquery_parser\sql_parser.py" />
<Compile Include="aquery_parser\types.py" />
<Compile Include="aquery_parser\utils.py" />
<Compile Include="aquery_parser\windows.py" />
<Compile Include="aquery_parser\__init__.py" />
<Compile Include="engine\ast.py" />
<Compile Include="engine\ddl.py" />
<Compile Include="engine\expr.py" />
<Compile Include="engine\groupby.py" />
<Compile Include="engine\join.py" />
<Compile Include="engine\orderby.py" />
<Compile Include="engine\projection.py" />
<Compile Include="engine\scan.py" />
<Compile Include="engine\types.py" />
<Compile Include="engine\utils.py" />
<Compile Include="engine\__init__.py" />
<Compile Include="prompt.py" />
<Compile Include="..\aquery_parser\keywords.py" />
<Compile Include="..\aquery_parser\sql_parser.py" />
<Compile Include="..\aquery_parser\types.py" />
<Compile Include="..\aquery_parser\utils.py" />
<Compile Include="..\aquery_parser\windows.py" />
<Compile Include="..\aquery_parser\__init__.py" />
<Compile Include="..\engine\ast.py" />
<Compile Include="..\engine\ddl.py" />
<Compile Include="..\engine\expr.py" />
<Compile Include="..\engine\groupby.py" />
<Compile Include="..\engine\join.py" />
<Compile Include="..\engine\orderby.py" />
<Compile Include="..\engine\projection.py" />
<Compile Include="..\engine\scan.py" />
<Compile Include="..\engine\types.py" />
<Compile Include="..\engine\utils.py" />
<Compile Include="..\engine\__init__.py" />
<Compile Include="..\prompt.py" />
</ItemGroup>
<ItemGroup>
<Content Include="aquery_parser\__pycache__\keywords.cpython-310.pyc" />
<Content Include="aquery_parser\__pycache__\keywords.cpython-39.pyc" />
<Content Include="aquery_parser\__pycache__\sql_parser.cpython-310.pyc" />
<Content Include="aquery_parser\__pycache__\sql_parser.cpython-39.pyc" />
<Content Include="aquery_parser\__pycache__\types.cpython-310.pyc" />
<Content Include="aquery_parser\__pycache__\types.cpython-39.pyc" />
<Content Include="aquery_parser\__pycache__\utils.cpython-310.pyc" />
<Content Include="aquery_parser\__pycache__\utils.cpython-39.pyc" />
<Content Include="aquery_parser\__pycache__\windows.cpython-310.pyc" />
<Content Include="aquery_parser\__pycache__\windows.cpython-39.pyc" />
<Content Include="aquery_parser\__pycache__\__init__.cpython-310.pyc" />
<Content Include="aquery_parser\__pycache__\__init__.cpython-39.pyc" />
<Content Include="engine\__pycache__\ast.cpython-310.pyc" />
<Content Include="engine\__pycache__\ast.cpython-39.pyc" />
<Content Include="engine\__pycache__\ddl.cpython-310.pyc" />
<Content Include="engine\__pycache__\ddl.cpython-39.pyc" />
<Content Include="engine\__pycache__\expr.cpython-310.pyc" />
<Content Include="engine\__pycache__\expr.cpython-39.pyc" />
<Content Include="engine\__pycache__\groupby.cpython-310.pyc" />
<Content Include="engine\__pycache__\join.cpython-310.pyc" />
<Content Include="engine\__pycache__\join.cpython-39.pyc" />
<Content Include="engine\__pycache__\orderby.cpython-310.pyc" />
<Content Include="engine\__pycache__\projection.cpython-310.pyc" />
<Content Include="engine\__pycache__\projection.cpython-39.pyc" />
<Content Include="engine\__pycache__\scan.cpython-310.pyc" />
<Content Include="engine\__pycache__\types.cpython-310.pyc" />
<Content Include="engine\__pycache__\utils.cpython-310.pyc" />
<Content Include="engine\__pycache__\utils.cpython-39.pyc" />
<Content Include="engine\__pycache__\__init__.cpython-310.pyc" />
<Content Include="engine\__pycache__\__init__.cpython-39.pyc" />
<Content Include="..\aquery_parser\__pycache__\keywords.cpython-310.pyc" />
<Content Include="..\aquery_parser\__pycache__\keywords.cpython-39.pyc" />
<Content Include="..\aquery_parser\__pycache__\sql_parser.cpython-310.pyc" />
<Content Include="..\aquery_parser\__pycache__\sql_parser.cpython-39.pyc" />
<Content Include="..\aquery_parser\__pycache__\types.cpython-310.pyc" />
<Content Include="..\aquery_parser\__pycache__\types.cpython-39.pyc" />
<Content Include="..\aquery_parser\__pycache__\utils.cpython-310.pyc" />
<Content Include="..\aquery_parser\__pycache__\utils.cpython-39.pyc" />
<Content Include="..\aquery_parser\__pycache__\windows.cpython-310.pyc" />
<Content Include="..\aquery_parser\__pycache__\windows.cpython-39.pyc" />
<Content Include="..\aquery_parser\__pycache__\__init__.cpython-310.pyc" />
<Content Include="..\aquery_parser\__pycache__\__init__.cpython-39.pyc" />
<Content Include="..\engine\__pycache__\ast.cpython-310.pyc" />
<Content Include="..\engine\__pycache__\ast.cpython-39.pyc" />
<Content Include="..\engine\__pycache__\ddl.cpython-310.pyc" />
<Content Include="..\engine\__pycache__\ddl.cpython-39.pyc" />
<Content Include="..\engine\__pycache__\expr.cpython-310.pyc" />
<Content Include="..\engine\__pycache__\expr.cpython-39.pyc" />
<Content Include="..\engine\__pycache__\groupby.cpython-310.pyc" />
<Content Include="..\engine\__pycache__\join.cpython-310.pyc" />
<Content Include="..\engine\__pycache__\join.cpython-39.pyc" />
<Content Include="..\engine\__pycache__\orderby.cpython-310.pyc" />
<Content Include="..\engine\__pycache__\projection.cpython-310.pyc" />
<Content Include="..\engine\__pycache__\projection.cpython-39.pyc" />
<Content Include="..\engine\__pycache__\scan.cpython-310.pyc" />
<Content Include="..\engine\__pycache__\types.cpython-310.pyc" />
<Content Include="..\engine\__pycache__\utils.cpython-310.pyc" />
<Content Include="..\engine\__pycache__\utils.cpython-39.pyc" />
<Content Include="..\engine\__pycache__\__init__.cpython-310.pyc" />
<Content Include="..\engine\__pycache__\__init__.cpython-39.pyc" />
</ItemGroup>
<Import Project="$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\Python Tools\Microsoft.PythonTools.targets" />
<!-- Uncomment the CoreCompile target to enable the Build command in

@ -41,6 +41,7 @@ cxt->tables.insert({"out_4DCN", out_4DCN});
auto& out_4DCN_sumtestc = *(ColRef<decays<decltype(sum(test_c))>> *)(&out_4DCN->colrefs[0]);
auto& out_4DCN_b = *(ColRef<value_type<decays<decltype(test_b)>>> *)(&out_4DCN->colrefs[1]);
auto& out_4DCN_d = *(ColRef<value_type<decays<decltype(test_d)>>> *)(&out_4DCN->colrefs[2]);
auto lineage = test->bind(out_4DCN);
out_4DCN_sumtestc.init();
out_4DCN_b.init();
out_4DCN_d.init();
@ -50,8 +51,10 @@ auto &val_7BUMR6d = i1s.second;
out_4DCN_sumtestc.emplace_back(sum(test_c[val_7BUMR6d]));
out_4DCN_b.emplace_back(get<1>(key_4Q0aEyH));
out_4DCN_d.emplace_back(get<2>(key_4Q0aEyH));
lineage.emplace_back(val_7BUMR6d[0]);
}
auto d6X0PMzl =out_4DCN->order_by_view<-3,1>();
print(lineage.rid);
auto d6X0PMzl = out_4DCN->order_by_view<-3,1>();
print(d6X0PMzl);
return 0;
}

@ -91,14 +91,36 @@ decayed_t<VT,T> maxw(uint32_t w, const VT<T>& arr) {
}
return ret;
}
template<class T, template<typename ...> class VT>
decayed_t<VT, types::GetLongType<T>> sums(const VT<T>& arr) {
const uint32_t& len = arr.size;
decayed_t<VT, types::GetLongType<T>> ret(len);
uint32_t i = 0;
if(len) ret[i++] = arr[0];
for (; i < len; ++i)
ret[i] = ret[i-1] + arr[i];
return ret;
}
template<class T, template<typename ...> class VT>
decayed_t<VT, types::GetFPType<T>> avgs(const VT<T>& arr) {
const uint32_t& len = arr.size;
typedef types::GetFPType<T> FPType;
decayed_t<VT, FPType> ret(len);
uint32_t i = 0;
types::GetLongType<T> s;
if(len) s = ret[i++] = arr[0];
for (; i < len; ++i)
ret[i] = (s+=arr[i])/(FPType)(i+1);
return ret;
}
template<class T, template<typename ...> class VT>
decayed_t<VT, types::GetLongType<T>> sumw(uint32_t w, const VT<T>& arr) {
const uint32_t& len = arr.size;
decayed_t<VT, types::GetLongType<T>> ret(len);
uint32_t i = 0;
w = w > len ? len : w;
if(arr.size)
ret[i++] = arr[0];
if(len) ret[i++] = arr[0];
for (; i < w; ++i)
ret[i] = ret[i-1] + arr[i];
for (; i < len; ++i)
@ -113,14 +135,15 @@ decayed_t<VT, types::GetFPType<T>> avgw(uint32_t w, const VT<T>& arr) {
uint32_t i = 0;
types::GetLongType<T> s;
w = w > len ? len : w;
if(arr.size)
s = ret[i++] = arr[0];
if(len) s = ret[i++] = arr[0];
for (; i < w; ++i)
ret[i] = (s += arr[i])/(FPType)(i+1);
for (; i < len; ++i)
ret[i] = ret[i-1] + (arr[i] - arr[i-w])/(FPType)w;
return ret;
}
template <class T> constexpr inline T count(const T& v) { return 1; }
template <class T> constexpr inline T max(const T& v) { return v; }
template <class T> constexpr inline T min(const T& v) { return v; }
template <class T> constexpr inline T avg(const T& v) { return v; }

@ -130,16 +130,13 @@ template <long long _Index, class... _Types>
constexpr inline ColRef<std::tuple_element_t<_Index, std::tuple<_Types...>>>& get(const TableView<_Types...>& table) noexcept {
return *(ColRef<std::tuple_element_t<_Index, std::tuple<_Types...>>> *) & (table.info.colrefs[_Index]);
}
template <class T>
struct is_vector_impl : std::false_type {};
template <class V>
struct is_vector_impl<ColRef<V>> : std::true_type {};
template <class V>
struct is_vector_impl<ColView<V>> : std::true_type {};
template <class V>
struct is_vector_impl<vector_type<V>> : std::true_type {};
template <class T>
constexpr static bool is_vector_type = is_vector_impl<T>::value;
template<class ...Types>
struct TableView;
@ -150,6 +147,29 @@ struct TableInfo {
uint32_t n_cols;
typedef std::tuple<Types...> tuple_type;
void print(const char* __restrict sep, const char* __restrict end) const;
template <class ...Types2>
struct lineage_t {
TableInfo<Types...>* this_table;
TableInfo<Types2...>* table;
vector_type<uint32_t> rid;
constexpr lineage_t(TableInfo<Types...>*this_table, TableInfo<Types2...> *table)
: this_table(this_table), table(table), rid(0) {}
constexpr lineage_t() : this_table(0), table(0), rid(0) {}
template <int col>
inline auto& get(uint32_t idx) {
return get<col>(*table)[rid[idx]];
}
void emplace_back(const uint32_t& v) {
rid.emplace_back(v);
}
};
template<class ...Types2>
auto bind(TableInfo<Types2...>* table2) {
return lineage_t(this, table2);
}
template <size_t j = 0>
typename std::enable_if<j == sizeof...(Types) - 1, void>::type print_impl(const uint32_t& i, const char* __restrict sep = " ") const;
template <size_t j = 0>
@ -196,6 +216,8 @@ struct TableInfo {
auto order_by_view () {
return TableView<Types...>(order_by<cols...>(), *this);
}
// Print 2 -- generate printf string first, supports flattening, supports sprintf/printf/fprintf
template <int col, int ...rem_cols, class Fn, class ...__Types>
inline void print2_impl(Fn func, const uint32_t& i, const __Types& ... args) const {
using this_type = typename std::tuple_element<col, tuple_type>::type;
@ -257,6 +279,7 @@ struct TableView {
delete idxs;
}
};
template <class T>
constexpr static inline bool is_vector(const ColRef<T>&) {
return true;
@ -265,12 +288,6 @@ template <class T>
constexpr static inline bool is_vector(const vector_type<T>&) {
return true;
}
template <class T>
constexpr static inline bool is_vector(const T&) {
return false;
}
template<class ...Types>
TableInfo<Types...>::TableInfo(const char* name, uint32_t n_cols) : name(name), n_cols(n_cols) {

@ -9,25 +9,33 @@
#ifdef _MSC_VER
#define __restrict__ __restrict
#endif
template <class T>
constexpr static inline bool is_vector(const T&) {
return false;
}
template <class T>
struct is_vector_impl : std::false_type {};
template <class T>
constexpr static bool is_vector_type = is_vector_impl<T>::value;
namespace types {
enum Type_t {
AINT, AFLOAT, ASTR, ADOUBLE, ALDOUBLE, ALONG, ASHORT, ADATE, ATIME, ACHAR,
AUINT, AULONG, AUSHORT, AUCHAR, NONE, ERROR
AUINT, AULONG, AUSHORT, AUCHAR, VECTOR, NONE, ERROR
};
static constexpr const char* printf_str[] = { "%d", "%f", "%s", "%lf", "%llf", "%ld", "%hi", "%s", "%s", "%c",
"%u", "%lu", "%hu", "%hhu", "NULL" };
"%u", "%lu", "%hu", "%hhu", "Vector<%s>", "NULL", "ERROR" };
// TODO: deal with data/time <=> str/uint conversion
struct date_t {
uint32_t val;
date_t(const char* d) {
}
std::string toString() const;
};
struct time_t {
uint32_t val;
time_t(const char* d) {
}
std::string toString() const;
};
@ -51,12 +59,14 @@ namespace types {
f(unsigned short, AUSHORT) \
f(unsigned char, AUCHAR)
constexpr static Type_t getType() {
#define TypeConnect(x, y) if(typeid(T) == typeid(x)) return y; else
inline constexpr static Type_t getType() {
#define TypeConnect(x, y) if constexpr(std::is_same<x, T>::value) return y; else
ConnectTypes(TypeConnect)
if constexpr (is_vector_type<T>)
return VECTOR;
else
return NONE;
}
//static constexpr inline void print(T& v);
};
#define ATypeSize(t, at) sizeof(t),
static constexpr size_t AType_sizes[] = { ConnectTypes(ATypeSize) 1 };

Loading…
Cancel
Save