dev
Bill 2 years ago
parent 3dfecaebb1
commit 613941ce06

1
.gitignore vendored

@ -49,6 +49,7 @@ test*.c*
!test2.csv !test2.csv
!moving_avg.csv !moving_avg.csv
!nyctx100.csv !nyctx100.csv
!network.csv
*.out *.out
*.asm *.asm
!mmw.so !mmw.so

@ -0,0 +1,7 @@
src, dst, len, time
s1, s2, 250, 1
s1, s2, 270, 20
s1, s2, 235, 141
s2, s1, 330, 47
s2, s1, 280, 150
s2, s1, 305, 155
1 src dst len time
2 s1 s2 250 1
3 s1 s2 270 20
4 s1 s2 235 141
5 s2 s1 330 47
6 s2 s1 280 150
7 s2 s1 305 155

@ -88,9 +88,9 @@ class TypeCollection:
type_table = dict() type_table = dict()
AnyT = Types(-1) AnyT = Types(-1)
LazyT = Types(240, name = 'Lazy', cname = '', sqlname = '', ctype_name = '') LazyT = Types(240, name = 'Lazy', cname = '', sqlname = '', ctype_name = '')
LazyT = Types(200, name = 'DATE', cname = 'types::date_t', sqlname = 'DATE', ctype_name = 'types::ADATE') DateT = Types(200, name = 'DATE', cname = 'types::date_t', sqlname = 'DATE', ctype_name = 'types::ADATE')
LazyT = Types(201, name = 'TIME', cname = 'types::time_t', sqlname = 'TIME', ctype_name = 'types::ATIME') TimeT = Types(201, name = 'TIME', cname = 'types::time_t', sqlname = 'TIME', ctype_name = 'types::ATIME')
LazyT = Types(202, name = 'TIMESTAMP', cname = 'types::timestamp_t', sqlname = 'TIMESTAMP', ctype_name = 'ATIMESTAMP') TimeStampT = Types(202, name = 'TIMESTAMP', cname = 'types::timestamp_t', sqlname = 'TIMESTAMP', ctype_name = 'ATIMESTAMP')
DoubleT = Types(17, name = 'double', cname='double', sqlname = 'DOUBLE', is_fp = True) DoubleT = Types(17, name = 'double', cname='double', sqlname = 'DOUBLE', is_fp = True)
LDoubleT = Types(18, name = 'long double', cname='long double', sqlname = 'LDOUBLE', is_fp = True) LDoubleT = Types(18, name = 'long double', cname='long double', sqlname = 'LDOUBLE', is_fp = True)
FloatT = Types(16, name = 'float', cname = 'float', sqlname = 'REAL', FloatT = Types(16, name = 'float', cname = 'float', sqlname = 'REAL',
@ -137,7 +137,8 @@ def _ty_make_dict(fn : str, *ty : Types):
int_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', LongT, ByteT, ShortT, IntT) int_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', LongT, ByteT, ShortT, IntT)
uint_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', ULongT, UByteT, UShortT, UIntT) uint_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', ULongT, UByteT, UShortT, UIntT)
fp_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', FloatT, DoubleT) fp_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', FloatT, DoubleT)
builtin_types : Dict[str, Types] = {**_ty_make_dict('t.sqlname.lower()', AnyT, StrT), **int_types, **fp_types} temporal_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', DateT, TimeT, TimeStampT)
builtin_types : Dict[str, Types] = {**_ty_make_dict('t.sqlname.lower()', AnyT, StrT), **int_types, **fp_types, **temporal_types}
def get_int128_support(): def get_int128_support():
for t in int_types.values(): for t in int_types.values():

@ -357,7 +357,7 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
cxt = xengine.exec(state.stmts, cxt, keep) cxt = xengine.exec(state.stmts, cxt, keep)
this_udf = cxt.finalize_udf() this_udf = cxt.finalize_udf()
if False and this_udf: if this_udf:
with open('udf.hpp', 'wb') as outfile: with open('udf.hpp', 'wb') as outfile:
outfile.write(this_udf.encode('utf-8')) outfile.write(this_udf.encode('utf-8'))

@ -233,12 +233,16 @@ class projection(ast_node):
out_typenames[key] = decltypestring out_typenames[key] = decltypestring
else: else:
out_typenames[key] = val[0].cname out_typenames[key] = val[0].cname
if (type(val[2].udf_called) is udf and if (type(val[2].udf_called) is udf and # should bulkret also be colref?
val[2].udf_called.return_pattern == udf.ReturnPattern.elemental_return val[2].udf_called.return_pattern == udf.ReturnPattern.elemental_return
or or
self.group_node and self.group_node.use_sp_gb and self.group_node and
(self.group_node.use_sp_gb and
val[2].cols_mentioned.intersection( val[2].cols_mentioned.intersection(
self.datasource.all_cols.difference(self.group_node.refs)) self.datasource.all_cols.difference(self.group_node.refs))
) and val[2].is_compound # compound val not in key
# or
# (not self.group_node and val[2].is_compound)
): ):
out_typenames[key] = f'ColRef<{out_typenames[key]}>' out_typenames[key] = f'ColRef<{out_typenames[key]}>'

@ -66,7 +66,6 @@ class expr(ast_node):
def init(self, _): def init(self, _):
from reconstruct.ast import projection from reconstruct.ast import projection
parent = self.parent parent = self.parent
self.isvector = parent.isvector if type(parent) is expr else False
self.is_compound = parent.is_compound if type(parent) is expr else False self.is_compound = parent.is_compound if type(parent) is expr else False
if type(parent) in [projection, expr]: if type(parent) in [projection, expr]:
self.datasource = parent.datasource self.datasource = parent.datasource
@ -75,13 +74,16 @@ class expr(ast_node):
self.udf_map = parent.context.udf_map self.udf_map = parent.context.udf_map
self.func_maps = {**builtin_func, **self.udf_map, **user_module_func} self.func_maps = {**builtin_func, **self.udf_map, **user_module_func}
self.operators = {**builtin_operators, **self.udf_map, **user_module_func} self.operators = {**builtin_operators, **self.udf_map, **user_module_func}
self.ext_aggfuncs = ['sum', 'avg', 'count', 'min', 'max'] self.ext_aggfuncs = ['sum', 'avg', 'count', 'min', 'max', 'last']
def produce(self, node): def produce(self, node):
from engine.utils import enlist from engine.utils import enlist
from reconstruct.ast import udf from reconstruct.ast import udf
if type(node) is dict: if type(node) is dict:
if len(node) > 1:
print(f'Parser Error: {node} has more than 1 dict entry.')
for key, val in node.items(): for key, val in node.items():
if key in self.operators: if key in self.operators:
if key in builtin_func: if key in builtin_func:
@ -96,6 +98,11 @@ class expr(ast_node):
exp_vals = [expr(self, v, c_code = self.c_code) for v in val] exp_vals = [expr(self, v, c_code = self.c_code) for v in val]
str_vals = [e.sql for e in exp_vals] str_vals = [e.sql for e in exp_vals]
type_vals = [e.type for e in exp_vals] type_vals = [e.type for e in exp_vals]
is_compound = any([e.is_compound for e in exp_vals])
if key in self.ext_aggfuncs:
self.is_compound = False
else:
self.is_compound = is_compound
try: try:
self.type = op.return_type(*type_vals) self.type = op.return_type(*type_vals)
except AttributeError as e: except AttributeError as e:
@ -107,7 +114,7 @@ class expr(ast_node):
self.sql = op(self.c_code, *str_vals) self.sql = op(self.c_code, *str_vals)
special_func = [*self.context.udf_map.keys(), *self.context.module_map.keys(), special_func = [*self.context.udf_map.keys(), *self.context.module_map.keys(),
"maxs", "mins", "avgs", "sums", "deltas", "last"] "maxs", "mins", "avgs", "sums", "deltas"]
if self.context.special_gb: if self.context.special_gb:
special_func = [*special_func, *self.ext_aggfuncs] special_func = [*special_func, *self.ext_aggfuncs]
@ -203,10 +210,6 @@ class expr(ast_node):
# get the column from the datasource in SQL context # get the column from the datasource in SQL context
else: else:
p = self.parent
while type(p) is expr and not p.isvector:
p.isvector = True
p = p.parent
if self.datasource is not None: if self.datasource is not None:
self.raw_col = self.datasource.parse_col_names(node) self.raw_col = self.datasource.parse_col_names(node)
self.raw_col = self.raw_col if type(self.raw_col) is ColRef else None self.raw_col = self.raw_col if type(self.raw_col) is ColRef else None
@ -214,6 +217,7 @@ class expr(ast_node):
self.is_ColExpr = True self.is_ColExpr = True
self.sql = self.raw_col.name self.sql = self.raw_col.name
self.type = self.raw_col.type self.type = self.raw_col.type
self.is_compound = True
else: else:
self.sql = node self.sql = node
self.type = StrT self.type = StrT
@ -234,7 +238,7 @@ class expr(ast_node):
self.type = IntT self.type = IntT
elif type(node) is float: elif type(node) is float:
self.type = DoubleT self.type = DoubleT
def finalize(self, override = False): def finalize(self, override = False):
from reconstruct.ast import udf from reconstruct.ast import udf
if self.codebuf is None or override: if self.codebuf is None or override:

@ -172,13 +172,7 @@ decayed_t<VT, T> deltas(const VT<T>& arr) {
template<class T, template<typename ...> class VT> template<class T, template<typename ...> class VT>
T last(const VT<T>& arr) { T last(const VT<T>& arr) {
const uint32_t& len = arr.size; const uint32_t& len = arr.size;
decayed_t<VT, T> ret(len); return arr[arr.size - 1];
uint32_t i = 0;
if (len)
ret[i++] = arr[0];
for (; i < len; ++i)
ret[i] = arr[i-1];
return ret;
} }
// wrong behavior with count(0) // wrong behavior with count(0)

@ -17,6 +17,39 @@ inline size_t append_bytes(const unsigned char* _First) noexcept {
return _Val; return _Val;
} }
namespace std{
template<>
struct hash<astring_view> {
size_t operator()(const astring_view& _Keyval) const noexcept {
return append_bytes(_Keyval.str);
}
};
template<>
struct hash<types::date_t> {
size_t operator() (const types::date_t& _Keyval) const noexcept {
return std::hash<unsigned int>()(*(unsigned int*)(&_Keyval));
}
};
template<>
struct hash<types::time_t> {
size_t operator() (const types::time_t& _Keyval) const noexcept {
return std::hash<unsigned int>()(_Keyval.ms) ^
std::hash<unsigned char>()(_Keyval.seconds) ^
std::hash<unsigned char>()(_Keyval.minutes) ^
std::hash<unsigned char>()(_Keyval.hours)
;
}
};
template<>
struct hash<types::timestamp_t>{
size_t operator() (const types::timestamp_t& _Keyval) const noexcept {
return std::hash<types::date_t>()(_Keyval.date) ^
std::hash<types::time_t>()(_Keyval.time);
}
};
}
inline size_t append_bytes(const astring_view& view) noexcept { inline size_t append_bytes(const astring_view& view) noexcept {
return append_bytes(view.str); return append_bytes(view.str);
} }
@ -32,10 +65,8 @@ struct hasher {
template <size_t i = 0> typename std::enable_if< i < sizeof ...(Types), template <size_t i = 0> typename std::enable_if< i < sizeof ...(Types),
size_t>::type hashi(const std::tuple<Types...>& record) const { size_t>::type hashi(const std::tuple<Types...>& record) const {
using current_type = typename std::decay<typename std::tuple_element<i, std::tuple<Types...>>::type>::type; using current_type = typename std::decay<typename std::tuple_element<i, std::tuple<Types...>>::type>::type;
if constexpr (is_cstr<current_type>())
return append_bytes((const unsigned char*)std::get<i>(record)) ^ hashi<i + 1>(record); return std::hash<current_type>()(std::get<i>(record)) ^ hashi<i+1>(record);
else
return std::hash<current_type>()(std::get<i>(record)) ^ hashi<i+1>(record);
} }
size_t operator()(const std::tuple<Types...>& record) const { size_t operator()(const std::tuple<Types...>& record) const {
return hashi(record); return hashi(record);

@ -5,7 +5,7 @@ LOAD DATA INFILE "data/network.csv"
INTO TABLE network INTO TABLE network
FIELDS TERMINATED BY "," FIELDS TERMINATED BY ","
SELECT src, dst, avg(len) SELECT src, dst, avg(len)
FROM network FROM network
ASSUMING ASC src, ASC dst, ASC _time ASSUMING ASC src, ASC dst, ASC _time
GROUP BY src, dst, sums (deltas(_time) > 120) GROUP BY src, dst, sums (deltas(_time) > 120)

@ -6,4 +6,4 @@ FIELDS TERMINATED BY ","
select names, val * 10000 + id from types_test select names, val * 10000 + id from types_test
create table date_time(id int, _date date, _time time, _timestamp timestamp); create table date_time(id int, _date date, _time time, _timestamp timestamp);

Loading…
Cancel
Save