From 2462bc6711971f0d15583a955906442208be7920 Mon Sep 17 00:00:00 2001 From: Bill Sun Date: Sun, 24 Apr 2022 02:56:19 +0800 Subject: [PATCH] update --- engine/ast.py | 4 + engine/ddl.py | 15 +++- engine/expr.py | 2 +- engine/orderby.py | 35 +++------ engine/projection.py | 35 +++++---- out.cpp | 71 +++++++++--------- q1.sql | 2 +- server/io.h | 15 ++++ server/priority_vector.hpp | 19 +++++ server/server.cpp | 5 +- server/server.vcxproj | 7 +- server/table.h | 148 +++++++++++++++++++++++++++---------- server/types.h | 12 ++- server/utils.h | 28 ++++--- server/vector_type.hpp | 8 +- 15 files changed, 259 insertions(+), 147 deletions(-) create mode 100644 server/io.h create mode 100644 server/priority_vector.hpp diff --git a/engine/ast.py b/engine/ast.py index 7c02e2e..a4aaf70 100644 --- a/engine/ast.py +++ b/engine/ast.py @@ -15,6 +15,9 @@ class ColRef: self.order_pending = None # order_pending self.compound = compound # compound field (list as a field) self.views = [] + self.aux_columns = [] # columns for temperary calculations + # e.g. order by, group by, filter by expressions + self.__arr__ = (cname, _ty, cobj, cnt, table, name, id) def reference(self): @@ -90,6 +93,7 @@ class TableInfo: type_tags += '>' self.cxt.emit(f'auto& {base_name} = *(TableInfo{type_tags} *)(cxt->tables[{self.table_name}]);') + return self.cxt_name def refer_all(self): self.reference() for c in self.columns: diff --git a/engine/ddl.py b/engine/ddl.py index 130d54f..a13ba2c 100644 --- a/engine/ddl.py +++ b/engine/ddl.py @@ -88,6 +88,15 @@ class outfile(ast_node): def produce(self, node): out_table:TableInfo = self.parent.out_table filename = node['loc']['literal'] if 'loc' in node else node['literal'] + sep = ',' if 'term' not in node else node['term']['literal'] + self.context.headers.add('fstream') + cout_backup_buffer = 'stdout_' + base62uuid(4) + ofstream = 'ofstream_' + base62uuid(6) + + self.emit(f'auto {cout_backup_buffer} = cout.rdbuf();') + self.emit(f'auto {ofstream} = ofstream("{filename}");') + self.emit(f'cout.rdbuf({ofstream}.rdbuf());') + self.emit_no_ln(f"\"{filename}\"1:`csv@(+(") l_compound = False l_cols = '' @@ -114,6 +123,10 @@ class outfile(ast_node): else: self.emit_no_ln(f'{l_keys}!+,/({ending(l_cols)})') self.emit('))') - + + self.emit(f'cout.rdbuf({cout_backup_buffer});') + self.emit(f'{ofstream}.close();') + + import sys include(sys.modules[__name__]) \ No newline at end of file diff --git a/engine/expr.py b/engine/expr.py index 0c8d380..91440e9 100644 --- a/engine/expr.py +++ b/engine/expr.py @@ -40,7 +40,7 @@ class expr(ast_node): 'not' : '!' } - coumpound_generating_ops = ['mod', 'mins', 'maxs', 'sums'] + \ + coumpound_generating_ops = ['avgs', 'mins', 'maxs', 'sums'] + \ list( binary_ops.keys()) + list(compound_ops.keys()) + list(unary_ops.keys() ) def __init__(self, parent, node, materialize_cols = True, abs_col = False): diff --git a/engine/orderby.py b/engine/orderby.py index ae410d4..7993fcd 100644 --- a/engine/orderby.py +++ b/engine/orderby.py @@ -16,43 +16,26 @@ class order_item: return ('' if self.order else '-') + f'({self.name})' def __str__(self): - return self.materialize() + return self.name def __repr__(self): return self.__str__() -class orders: - def __init__(self, node, datasource): - self.order_items = [] - self.materialized = False - self.view = None - self.node = node - self.datasource = datasource - self.n_attrs = -1 - - def materialize(self): - if not self.materialized: - self.view = View(self.node.context, self.datasource, False) - keys = ';'.join([f'{o}' for o in self.order_items]) - self.n_attrs = len(self.order_items) - self.node.emit(f"{self.view.name}: > +`j (({',' if self.n_attrs == 1 else ''}{keys}))") - self.materialized = True - - def append(self, o): - self.order_items.append(o) - class orderby(ast_node): name = '_orderby' - + def __init__(self, parent: "ast_node", node, context: Context = None): + self.col_list = [] + super().__init__(parent, node, context) def init(self, _): self.datasource = self.parent.datasource - self.order = orders(self, self.datasource) + self.order = [] self.view = '' def produce(self, node): if type(node) is not list: node = [node] for n in node: order = not ('sort' in n and n['sort'] == 'desc') + col_id = self.datasource.columns_byname[n['value']].id + self.col_list.append(col_id if order else -col_id-1) self.order.append(order_item(n['value'], self, order)) - - def consume(self, _): - self.datasource.order.append(self.order) \ No newline at end of file + def finialize(self, references): + self.order = [ o for o in self.order if o.name in references ] \ No newline at end of file diff --git a/engine/projection.py b/engine/projection.py index 644deb4..204632e 100644 --- a/engine/projection.py +++ b/engine/projection.py @@ -14,6 +14,7 @@ class projection(ast_node): self.disp = disp self.outname = outname self.group_node = None + self.assumption = None self.where = None ast_node.__init__(self, parent, node, context) def init(self, _): @@ -45,9 +46,8 @@ class projection(ast_node): elif type(value) is str: self.datasource = self.context.tables_byname[value] if 'assumptions' in from_clause: - for assumption in enlist(from_clause['assumptions']): - orderby(self, assumption) - + self.assumption = orderby(self, enlist(from_clause['assumptions'])) + elif type(from_clause) is str: self.datasource = self.context.tables_byname[from_clause] @@ -83,7 +83,8 @@ class projection(ast_node): self.out_table = TableInfo('out_'+base62uuid(4), [], self.context) if 'outfile' in node: flatten = True - + + new_names = [] for i, proj in enumerate(self.projections): cname = '' compound = False @@ -92,12 +93,14 @@ class projection(ast_node): if 'value' in proj: e = proj['value'] sname = expr(self, e)._expr - fname = expr.toCExpr(sname) - absname = expr(self, e, abs_col=True)._expr + fname = expr.toCExpr(sname) # fastest access method at innermost context + absname = expr(self, e, abs_col=True)._expr # absolute name at function scope compound = True cexprs.append(fname) - cname = ''.join([a if a in base62alp else '' for a in fname()]) - + cname = e if type(e) is str else ''.join([a if a in base62alp else '' for a in expr.toCExpr(absname)()]) + if 'name' in proj: # renaming column by AS keyword + cname = proj['name'] + new_names.append(cname) compound = compound and has_groupby and self.datasource.rec not in self.group_node.referenced cols.append(ColRef(cname, expr.toCExpr(f'decays')(0), self.out_table, 0, None, cname, i, compound=compound)) @@ -114,21 +117,17 @@ class projection(ast_node): self.where.finalize() has_orderby = 'orderby' in node - if has_orderby: self.datasource = self.out_table self.context.datasource = self.out_table # discard current ds orderby_node = orderby(self, node['orderby']) - self.context.datasource.materialize_orderbys() - self.emit_no_ln(f"{f'{disp_varname}:+' if flatten else ''}(") - - if self.disp or has_orderby: - self.emit(f'print(*{self.out_table.cxt_name});') - - if has_orderby: - self.emit(f')[{orderby_node.view}]') + self.emit(f'auto {disp_varname} ={self.out_table.reference()}->order_by_view<{",".join([f"{c}" for c in orderby_node.col_list])}>();') else: - self.context.emit_flush() + disp_varname = f'*{self.out_table.cxt_name}' + if self.disp: + self.emit(f'print({disp_varname});') + + if flatten: if len(self.projections) > 1 and not self.inv: self.emit(f"{disp_varname}:+{disp_varname}") diff --git a/out.cpp b/out.cpp index 6867019..d91c898 100644 --- a/out.cpp +++ b/out.cpp @@ -1,8 +1,8 @@ #include "csv.h" -#include -#include "./server/libaquery.h" #include "./server/hasher.h" +#include #include "./server/aggregations.h" +#include "./server/libaquery.h" extern "C" int __DLLEXPORT__ dllmain(Context* cxt) { using namespace std; @@ -17,40 +17,45 @@ test_a.init(); test_b.init(); test_c.init(); test_d.init(); -io::CSVReader<4> csv_reader_6qlGpe("test.csv"); -csv_reader_6qlGpe.read_header(io::ignore_extra_column, "a","b","c","d"); -int tmp_39gHMkie; -int tmp_190h2sZs; -int tmp_4a8dDzSN; -int tmp_3LAKxSmM; -while(csv_reader_6qlGpe.read_row(tmp_39gHMkie,tmp_190h2sZs,tmp_4a8dDzSN,tmp_3LAKxSmM)) { +io::CSVReader<4> csv_reader_53LkPG("test.csv"); +csv_reader_53LkPG.read_header(io::ignore_extra_column, "a","b","c","d"); +int tmp_43xeYChp; +int tmp_3Vnt4fLK; +int tmp_1HKZwQBO; +int tmp_6IwJuIpg; +while(csv_reader_53LkPG.read_row(tmp_43xeYChp,tmp_3Vnt4fLK,tmp_1HKZwQBO,tmp_6IwJuIpg)) { -test_a.emplace_back(tmp_39gHMkie); -test_b.emplace_back(tmp_190h2sZs); -test_c.emplace_back(tmp_4a8dDzSN); -test_d.emplace_back(tmp_3LAKxSmM); +test_a.emplace_back(tmp_43xeYChp); +test_b.emplace_back(tmp_3Vnt4fLK); +test_c.emplace_back(tmp_1HKZwQBO); +test_d.emplace_back(tmp_6IwJuIpg); } -typedef record record_type2Te4GFo; -unordered_map, transTypes> g79JNXM8; -for (uint32_t i5x = 0; i5x < test_a.size; ++i5x){ -g79JNXM8[forward_as_tuple(test_a[i5x],test_b[i5x],test_d[i5x])].emplace_back(i5x); +typedef record record_type1CmZCvh; +unordered_map, transTypes> g6nov6MR; +for (uint32_t i4I = 0; i4I < test_a.size; ++i4I){ +g6nov6MR[forward_as_tuple(test_a[i4I],test_b[i4I],test_d[i4I])].emplace_back(i4I); } -auto out_5NL7 = new TableInfo,decays,decays>("out_5NL7", 3); -cxt->tables.insert({"out_5NL7", out_5NL7}); -auto& out_5NL7_sumtestc = *(ColRef> *)(&out_5NL7->colrefs[0]); -auto& out_5NL7_get1None = *(ColRef> *)(&out_5NL7->colrefs[1]); -auto& out_5NL7_get2None = *(ColRef> *)(&out_5NL7->colrefs[2]); -out_5NL7_sumtestc.init(); -out_5NL7_get1None.init(); -out_5NL7_get2None.init(); -for(auto& i4l : g79JNXM8) { -auto &key_ADPihOU = i4l.first; -auto &val_7LsrkDP = i4l.second; -out_5NL7_sumtestc.emplace_back(sum(test_c[val_7LsrkDP])); -out_5NL7_get1None.emplace_back(get<1>(key_ADPihOU)); -out_5NL7_get2None.emplace_back(get<2>(key_ADPihOU)); +auto out_684r = new TableInfo,decays,decays>("out_684r", 3); +cxt->tables.insert({"out_684r", out_684r}); +auto& out_684r_sumtestc = *(ColRef> *)(&out_684r->colrefs[0]); +auto& out_684r_b = *(ColRef> *)(&out_684r->colrefs[1]); +auto& out_684r_d = *(ColRef> *)(&out_684r->colrefs[2]); +out_684r_sumtestc.init(); +out_684r_b.init(); +out_684r_d.init(); +for(auto& i3d : g6nov6MR) { +auto &key_1TaM8D7 = i3d.first; +auto &val_129np3x = i3d.second; +out_684r_sumtestc.emplace_back(sum(test_c[val_129np3x])); +out_684r_b.emplace_back(get<1>(key_1TaM8D7)); +out_684r_d.emplace_back(get<2>(key_1TaM8D7)); } -print(*out_5NL7); - +auto d2X3bP6l =out_684r->order_by_view<-3,1>(); +puts("a"); +print(*(out_684r->order_by<-3,1>())); +puts("b"); +print(out_684r->order_by_view<-3,1>()); +puts("e"); +print(*out_684r); return 0; } \ No newline at end of file diff --git a/q1.sql b/q1.sql index bd6d5fd..360dc2f 100644 --- a/q1.sql +++ b/q1.sql @@ -7,4 +7,4 @@ FIELDS TERMINATED BY "," SELECT sum(c), b, d FROM test group by a,b,d --- order by d DESC, b ASC +order by d DESC, b ASC diff --git a/server/io.h b/server/io.h new file mode 100644 index 0000000..ad7835b --- /dev/null +++ b/server/io.h @@ -0,0 +1,15 @@ +#pragma once +#include "types.h" +#include +#include +#include +#include +template +std::string generate_printf_string(const char* sep = " ", const char* end = "\n") { + std::string str; + (void)std::initializer_list{ + (str += types::printf_str[types::Types::getType()], str += sep, 0)... + }; + str += end; + return str; +} diff --git a/server/priority_vector.hpp b/server/priority_vector.hpp new file mode 100644 index 0000000..f1c7d6f --- /dev/null +++ b/server/priority_vector.hpp @@ -0,0 +1,19 @@ +#pragma once +#include "vector_type.hpp" +#include +#include +template +class priority_vector : public vector_type { + const Comparator comp; +public: + priority_vector(Comparator comp = std::less{}) : + comp(comp), vector_type(0) {} + void emplace_back(T val) { + vector_type::emplace_back(val); + std::push_heap(container, container + size, comp); + } + void pop_back() { + std::pop_heap(container, container + size, comp); + --size; + } +}; \ No newline at end of file diff --git a/server/server.cpp b/server/server.cpp index 5ff0738..fe06af1 100644 --- a/server/server.cpp +++ b/server/server.cpp @@ -75,6 +75,7 @@ int main(int argc, char** argv) { shm.FreeMemoryMap(); return 0; } +#include "utils.h" int _main() { @@ -83,6 +84,7 @@ int _main() //t.emplace_back(2); //print(t); //return 0; + puts(cpp_17 ?"true":"false"); void* handle = dlopen("dll.so", RTLD_LAZY); printf("handle: %x\n", handle); Context* cxt = new Context(); @@ -96,7 +98,8 @@ int _main() } dlclose(handle); } - + static_assert(std::is_same_v()), std::integer_sequence>, ""); return 0; + } diff --git a/server/server.vcxproj b/server/server.vcxproj index 221f1dd..03f1f79 100644 --- a/server/server.vcxproj +++ b/server/server.vcxproj @@ -168,6 +168,7 @@ + @@ -175,10 +176,4 @@ - - - - This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - - \ No newline at end of file diff --git a/server/table.h b/server/table.h index abdeb06..ffbe96b 100644 --- a/server/table.h +++ b/server/table.h @@ -1,3 +1,5 @@ +// TODO: Replace `cout, printf` with sprintf&fputs and custom buffers + #ifndef _TABLE_H #define _TABLE_H @@ -85,13 +87,31 @@ inline ColRef ColRef<_Ty>::scast() return *(ColRef *)this; } using uColRef = ColRef; + +template struct TableInfo; +template struct TableView; + +template +constexpr inline auto& get(const TableInfo<_Types...>& table) noexcept { + if constexpr (order) + return *(ColRef>> *) & (table.colrefs[_Index]); + else + return *(ColRef>> *) & (table.colrefs[-1-_Index]); +} + +template +constexpr inline ColRef>>& get(const TableView<_Types...>& table) noexcept { + return *(ColRef>> *) & (table.info.colrefs[_Index]); +} +template +struct TableView; template struct TableInfo { const char* name; ColRef* colrefs; uint32_t n_cols; - void print(const char* __restrict sep, const char* __restrict end) const; typedef std::tuple tuple_type; + void print(const char* __restrict sep, const char* __restrict end) const; template typename std::enable_if::type print_impl(const uint32_t& i, const char* __restrict sep = " ") const; template @@ -103,46 +123,60 @@ struct TableInfo { template using getRecordType = typename GetTypes::type; TableInfo(const char* name, uint32_t n_cols); - //template - //struct Iterator_t { - // uint32_t val; - // const TableInfo* info; - // constexpr Iterator_t(const uint32_t* val, const TableInfo* info) noexcept : val(val), info(info) {} - // getRecordType operator*() { - // return getRecordType(info->colrefs[Idxs].operator[](*val)...); - // } - // bool operator != (const Iterator_t& rhs) { return rhs.val != val; } - // Iterator_t& operator++ () { - // ++val; - // return *this; - // } - // Iterator_t operator++ (int) { - // Iterator_t tmp = *this; - // ++val; - // return tmp; - // } - //}; - //template - //Iterator_t begin() const { - // - //} - //template - //Iterator_t end() const { - // - //} - // - //template - //order_by() { - // vector_type order(colrefs[0].size); - // std::sort(this->begin) - //} + template + inline void materialize(const vector_type& idxs, TableInfo* tbl = nullptr) { // inplace materialize + if constexpr(prog == 0) tbl = 0 ? this : tbl; + if constexpr (prog == sizeof...(Types)) return; + else { + auto& col = get(*this); + auto new_col = decays{idxs.size}; + for(uint32_t i = 0; i < idxs.size; ++i) + new_col[i] = col[idxs[i]]; + get(*tbl) = new_col; + materialize(); + } + } + inline TableInfo* materialize_copy(const vector_type& idxs) { + auto tbl = new TableInfo(this->name, sizeof...(Types)); + materialize<0>(idxs, tbl); + return tbl; + } + template + inline vector_type* order_by() { + vector_type* ord = new vector_type(colrefs[0].size); + for (uint32_t i = 0; i < colrefs[0].size; ++i) + (*ord)[i] = i; + std::sort(ord->begin(), ord->end(), [this](const uint32_t& lhs, const uint32_t& rhs) { + return + std::forward_as_tuple((cols >= 0 ? get= 0)>(*this)[lhs] : -get= 0)>(*this)[lhs]) ...) + < + std::forward_as_tuple((cols >= 0 ? get= 0)>(*this)[rhs] : -get= 0)>(*this)[rhs]) ...); + }); + return ord; + } + template + auto order_by_view () { + return TableView(order_by(), *this); + } + }; -template -constexpr inline ColRef>>& get(const TableInfo<_Types...>& table) noexcept { - return *(ColRef>> *) & (table.colrefs[_Index]); -} +template +struct TableView { + const vector_type* idxs; + const TableInfo& info; + constexpr TableView(const vector_type* idxs, const TableInfo& info) noexcept : idxs(idxs), info(info) {} + void print(const char* __restrict sep, const char* __restrict end) const; + template + typename std::enable_if::type print_impl(const uint32_t& i, const char* __restrict sep = " ") const; + template + typename std::enable_if < j < sizeof...(Types) - 1, void>::type print_impl(const uint32_t& i, const char* __restrict sep = " ") const; + + ~TableView() { + delete idxs; + } +}; template constexpr static inline bool is_vector(const ColRef&) { return true; @@ -159,7 +193,32 @@ template TableInfo::TableInfo(const char* name, uint32_t n_cols) : name(name), n_cols(n_cols) { this->colrefs = (ColRef*)malloc(sizeof(ColRef) * n_cols); } +template +template +inline typename std::enable_if::type +TableView::print_impl(const uint32_t& i, const char* __restrict sep) const { + std::cout << (get(*this))[(*idxs)[i]]; +} + +template +template +inline typename std::enable_if < j < sizeof...(Types) - 1, void>::type + TableView::print_impl(const uint32_t& i, const char* __restrict sep) const +{ + std::cout << (get(*this))[(*idxs)[i]] << sep; + print_impl(i, sep); +} +template +inline void TableView::print(const char* __restrict sep, const char* __restrict end) const { + int n_rows = 0; + if (info.colrefs[0].size > 0) + n_rows = info.colrefs[0].size; + for (int i = 0; i < n_rows; ++i) { + print_impl(i); + std::cout << end; + } +} template template inline typename std::enable_if::type @@ -247,17 +306,24 @@ template void print(const TableInfo& v, const char* delimiter = " ", const char* endline = "\n") { v.print(delimiter, endline); } +template +void print(const TableView& v, const char* delimiter = " ", const char* endline = "\n") { + v.print(delimiter, endline); +} template void print(const T& v, const char* delimiter = " ") { - printf(types::printf_str[types::Types::getType()], v); + std::cout<< v; + // printf(types::printf_str[types::Types::getType()], v); } template void inline print_impl(const T& v, const char* delimiter, const char* endline) { for (const auto& vi : v) { print(vi); - printf("%s", delimiter); + std::cout << delimiter; + // printf("%s", delimiter); } - printf("%s", endline); + std::cout << endline; + //printf("%s", endline); } template class VT> diff --git a/server/types.h b/server/types.h index 6d6c4b9..11c397b 100644 --- a/server/types.h +++ b/server/types.h @@ -118,6 +118,16 @@ struct decayS >{ using type = T::type ...>; }; template -using decays = typename decayS::type; +using decays = typename decayS::type>::type; +template +using decay_inner = typename decayS::type; + +template +auto fill_integer_array() { + if constexpr (n == 0) + return std::integer_sequence{}; + else + return fill_integer_array(); +}; #endif // !_TYPES_H diff --git a/server/utils.h b/server/utils.h index 0e1a4ef..cb58974 100644 --- a/server/utils.h +++ b/server/utils.h @@ -1,18 +1,16 @@ #pragma once #include #include - -template -struct const_range { - int arr[cnt]; - constexpr const_range() { - for (int i = begin, n = 0; n < cnt; ++n, i += interval) - arr[n] = i; - } - const int* begin() const { - return arr; - } - const int* end() const { - return arr + cnt; - } -}; \ No newline at end of file +#if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) +constexpr static bool cpp_17 = true; +#else +constexpr static bool cpp_17 = false; +#endif +template +inline const char* str(const T& v) { + return ""; +} +template <> +inline const char* str(const bool& v) { + return v ? "true" : "false"; +} \ No newline at end of file diff --git a/server/vector_type.hpp b/server/vector_type.hpp index 05fe03c..2ec4cd6 100644 --- a/server/vector_type.hpp +++ b/server/vector_type.hpp @@ -22,7 +22,7 @@ template class vector_type { public: - void inline _copy(vector_type<_Ty>& vt) { + void inline _copy(const vector_type<_Ty>& vt) { this->size = vt.size; this->capacity = vt.capacity; this->container = (_Ty*)malloc(size * sizeof(_Ty)); @@ -30,6 +30,8 @@ public: memcpy(container, vt.container, sizeof(_Ty) * size); } void inline _move(vector_type<_Ty>&& vt) { + if (capacity > 0) free(container); + this->size = vt.size; this->capacity = vt.capacity; this->container = vt.container; @@ -52,7 +54,7 @@ public: } } constexpr vector_type() noexcept : size(0), capacity(0), container(0) {}; - constexpr vector_type(vector_type<_Ty>& vt) noexcept { + constexpr vector_type(const vector_type<_Ty>& vt) noexcept { _copy(vt); } constexpr vector_type(vector_type<_Ty>&& vt) noexcept { @@ -67,7 +69,7 @@ public: container[0] = vt; return *this; } - vector_type<_Ty> operator =(vector_type<_Ty>& vt) { + vector_type<_Ty> operator =(const vector_type<_Ty>& vt) { _copy(vt); return *this; }