dev
Bill 2 years ago
parent 3dfecaebb1
commit 613941ce06

1
.gitignore vendored

@ -49,6 +49,7 @@ test*.c*
!test2.csv !test2.csv
!moving_avg.csv !moving_avg.csv
!nyctx100.csv !nyctx100.csv
!network.csv
*.out *.out
*.asm *.asm
!mmw.so !mmw.so

@ -0,0 +1,7 @@
src, dst, len, time
s1, s2, 250, 1
s1, s2, 270, 20
s1, s2, 235, 141
s2, s1, 330, 47
s2, s1, 280, 150
s2, s1, 305, 155
1 src dst len time
2 s1 s2 250 1
3 s1 s2 270 20
4 s1 s2 235 141
5 s2 s1 330 47
6 s2 s1 280 150
7 s2 s1 305 155

@ -88,9 +88,9 @@ class TypeCollection:
type_table = dict() type_table = dict()
AnyT = Types(-1) AnyT = Types(-1)
LazyT = Types(240, name = 'Lazy', cname = '', sqlname = '', ctype_name = '') LazyT = Types(240, name = 'Lazy', cname = '', sqlname = '', ctype_name = '')
LazyT = Types(200, name = 'DATE', cname = 'types::date_t', sqlname = 'DATE', ctype_name = 'types::ADATE') DateT = Types(200, name = 'DATE', cname = 'types::date_t', sqlname = 'DATE', ctype_name = 'types::ADATE')
LazyT = Types(201, name = 'TIME', cname = 'types::time_t', sqlname = 'TIME', ctype_name = 'types::ATIME') TimeT = Types(201, name = 'TIME', cname = 'types::time_t', sqlname = 'TIME', ctype_name = 'types::ATIME')
LazyT = Types(202, name = 'TIMESTAMP', cname = 'types::timestamp_t', sqlname = 'TIMESTAMP', ctype_name = 'ATIMESTAMP') TimeStampT = Types(202, name = 'TIMESTAMP', cname = 'types::timestamp_t', sqlname = 'TIMESTAMP', ctype_name = 'ATIMESTAMP')
DoubleT = Types(17, name = 'double', cname='double', sqlname = 'DOUBLE', is_fp = True) DoubleT = Types(17, name = 'double', cname='double', sqlname = 'DOUBLE', is_fp = True)
LDoubleT = Types(18, name = 'long double', cname='long double', sqlname = 'LDOUBLE', is_fp = True) LDoubleT = Types(18, name = 'long double', cname='long double', sqlname = 'LDOUBLE', is_fp = True)
FloatT = Types(16, name = 'float', cname = 'float', sqlname = 'REAL', FloatT = Types(16, name = 'float', cname = 'float', sqlname = 'REAL',
@ -137,7 +137,8 @@ def _ty_make_dict(fn : str, *ty : Types):
int_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', LongT, ByteT, ShortT, IntT) int_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', LongT, ByteT, ShortT, IntT)
uint_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', ULongT, UByteT, UShortT, UIntT) uint_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', ULongT, UByteT, UShortT, UIntT)
fp_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', FloatT, DoubleT) fp_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', FloatT, DoubleT)
builtin_types : Dict[str, Types] = {**_ty_make_dict('t.sqlname.lower()', AnyT, StrT), **int_types, **fp_types} temporal_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', DateT, TimeT, TimeStampT)
builtin_types : Dict[str, Types] = {**_ty_make_dict('t.sqlname.lower()', AnyT, StrT), **int_types, **fp_types, **temporal_types}
def get_int128_support(): def get_int128_support():
for t in int_types.values(): for t in int_types.values():

@ -357,7 +357,7 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
cxt = xengine.exec(state.stmts, cxt, keep) cxt = xengine.exec(state.stmts, cxt, keep)
this_udf = cxt.finalize_udf() this_udf = cxt.finalize_udf()
if False and this_udf: if this_udf:
with open('udf.hpp', 'wb') as outfile: with open('udf.hpp', 'wb') as outfile:
outfile.write(this_udf.encode('utf-8')) outfile.write(this_udf.encode('utf-8'))

@ -233,12 +233,16 @@ class projection(ast_node):
out_typenames[key] = decltypestring out_typenames[key] = decltypestring
else: else:
out_typenames[key] = val[0].cname out_typenames[key] = val[0].cname
if (type(val[2].udf_called) is udf and if (type(val[2].udf_called) is udf and # should bulkret also be colref?
val[2].udf_called.return_pattern == udf.ReturnPattern.elemental_return val[2].udf_called.return_pattern == udf.ReturnPattern.elemental_return
or or
self.group_node and self.group_node.use_sp_gb and self.group_node and
(self.group_node.use_sp_gb and
val[2].cols_mentioned.intersection( val[2].cols_mentioned.intersection(
self.datasource.all_cols.difference(self.group_node.refs)) self.datasource.all_cols.difference(self.group_node.refs))
) and val[2].is_compound # compound val not in key
# or
# (not self.group_node and val[2].is_compound)
): ):
out_typenames[key] = f'ColRef<{out_typenames[key]}>' out_typenames[key] = f'ColRef<{out_typenames[key]}>'

@ -66,7 +66,6 @@ class expr(ast_node):
def init(self, _): def init(self, _):
from reconstruct.ast import projection from reconstruct.ast import projection
parent = self.parent parent = self.parent
self.isvector = parent.isvector if type(parent) is expr else False
self.is_compound = parent.is_compound if type(parent) is expr else False self.is_compound = parent.is_compound if type(parent) is expr else False
if type(parent) in [projection, expr]: if type(parent) in [projection, expr]:
self.datasource = parent.datasource self.datasource = parent.datasource
@ -75,13 +74,16 @@ class expr(ast_node):
self.udf_map = parent.context.udf_map self.udf_map = parent.context.udf_map
self.func_maps = {**builtin_func, **self.udf_map, **user_module_func} self.func_maps = {**builtin_func, **self.udf_map, **user_module_func}
self.operators = {**builtin_operators, **self.udf_map, **user_module_func} self.operators = {**builtin_operators, **self.udf_map, **user_module_func}
self.ext_aggfuncs = ['sum', 'avg', 'count', 'min', 'max'] self.ext_aggfuncs = ['sum', 'avg', 'count', 'min', 'max', 'last']
def produce(self, node): def produce(self, node):
from engine.utils import enlist from engine.utils import enlist
from reconstruct.ast import udf from reconstruct.ast import udf
if type(node) is dict: if type(node) is dict:
if len(node) > 1:
print(f'Parser Error: {node} has more than 1 dict entry.')
for key, val in node.items(): for key, val in node.items():
if key in self.operators: if key in self.operators:
if key in builtin_func: if key in builtin_func:
@ -96,6 +98,11 @@ class expr(ast_node):
exp_vals = [expr(self, v, c_code = self.c_code) for v in val] exp_vals = [expr(self, v, c_code = self.c_code) for v in val]
str_vals = [e.sql for e in exp_vals] str_vals = [e.sql for e in exp_vals]
type_vals = [e.type for e in exp_vals] type_vals = [e.type for e in exp_vals]
is_compound = any([e.is_compound for e in exp_vals])
if key in self.ext_aggfuncs:
self.is_compound = False
else:
self.is_compound = is_compound
try: try:
self.type = op.return_type(*type_vals) self.type = op.return_type(*type_vals)
except AttributeError as e: except AttributeError as e:
@ -107,7 +114,7 @@ class expr(ast_node):
self.sql = op(self.c_code, *str_vals) self.sql = op(self.c_code, *str_vals)
special_func = [*self.context.udf_map.keys(), *self.context.module_map.keys(), special_func = [*self.context.udf_map.keys(), *self.context.module_map.keys(),
"maxs", "mins", "avgs", "sums", "deltas", "last"] "maxs", "mins", "avgs", "sums", "deltas"]
if self.context.special_gb: if self.context.special_gb:
special_func = [*special_func, *self.ext_aggfuncs] special_func = [*special_func, *self.ext_aggfuncs]
@ -203,10 +210,6 @@ class expr(ast_node):
# get the column from the datasource in SQL context # get the column from the datasource in SQL context
else: else:
p = self.parent
while type(p) is expr and not p.isvector:
p.isvector = True
p = p.parent
if self.datasource is not None: if self.datasource is not None:
self.raw_col = self.datasource.parse_col_names(node) self.raw_col = self.datasource.parse_col_names(node)
self.raw_col = self.raw_col if type(self.raw_col) is ColRef else None self.raw_col = self.raw_col if type(self.raw_col) is ColRef else None
@ -214,6 +217,7 @@ class expr(ast_node):
self.is_ColExpr = True self.is_ColExpr = True
self.sql = self.raw_col.name self.sql = self.raw_col.name
self.type = self.raw_col.type self.type = self.raw_col.type
self.is_compound = True
else: else:
self.sql = node self.sql = node
self.type = StrT self.type = StrT

@ -172,13 +172,7 @@ decayed_t<VT, T> deltas(const VT<T>& arr) {
template<class T, template<typename ...> class VT> template<class T, template<typename ...> class VT>
T last(const VT<T>& arr) { T last(const VT<T>& arr) {
const uint32_t& len = arr.size; const uint32_t& len = arr.size;
decayed_t<VT, T> ret(len); return arr[arr.size - 1];
uint32_t i = 0;
if (len)
ret[i++] = arr[0];
for (; i < len; ++i)
ret[i] = arr[i-1];
return ret;
} }
// wrong behavior with count(0) // wrong behavior with count(0)

@ -17,6 +17,39 @@ inline size_t append_bytes(const unsigned char* _First) noexcept {
return _Val; return _Val;
} }
namespace std{
template<>
struct hash<astring_view> {
size_t operator()(const astring_view& _Keyval) const noexcept {
return append_bytes(_Keyval.str);
}
};
template<>
struct hash<types::date_t> {
size_t operator() (const types::date_t& _Keyval) const noexcept {
return std::hash<unsigned int>()(*(unsigned int*)(&_Keyval));
}
};
template<>
struct hash<types::time_t> {
size_t operator() (const types::time_t& _Keyval) const noexcept {
return std::hash<unsigned int>()(_Keyval.ms) ^
std::hash<unsigned char>()(_Keyval.seconds) ^
std::hash<unsigned char>()(_Keyval.minutes) ^
std::hash<unsigned char>()(_Keyval.hours)
;
}
};
template<>
struct hash<types::timestamp_t>{
size_t operator() (const types::timestamp_t& _Keyval) const noexcept {
return std::hash<types::date_t>()(_Keyval.date) ^
std::hash<types::time_t>()(_Keyval.time);
}
};
}
inline size_t append_bytes(const astring_view& view) noexcept { inline size_t append_bytes(const astring_view& view) noexcept {
return append_bytes(view.str); return append_bytes(view.str);
} }
@ -32,9 +65,7 @@ struct hasher {
template <size_t i = 0> typename std::enable_if< i < sizeof ...(Types), template <size_t i = 0> typename std::enable_if< i < sizeof ...(Types),
size_t>::type hashi(const std::tuple<Types...>& record) const { size_t>::type hashi(const std::tuple<Types...>& record) const {
using current_type = typename std::decay<typename std::tuple_element<i, std::tuple<Types...>>::type>::type; using current_type = typename std::decay<typename std::tuple_element<i, std::tuple<Types...>>::type>::type;
if constexpr (is_cstr<current_type>())
return append_bytes((const unsigned char*)std::get<i>(record)) ^ hashi<i + 1>(record);
else
return std::hash<current_type>()(std::get<i>(record)) ^ hashi<i+1>(record); return std::hash<current_type>()(std::get<i>(record)) ^ hashi<i+1>(record);
} }
size_t operator()(const std::tuple<Types...>& record) const { size_t operator()(const std::tuple<Types...>& record) const {

@ -6,4 +6,4 @@ FIELDS TERMINATED BY ","
select names, val * 10000 + id from types_test select names, val * 10000 + id from types_test
create table date_time(id int, _date date, _time time, _timestamp timestamp); create table date_time(id int, _date date, _time time, _timestamp timestamp);

Loading…
Cancel
Save