Added prev/next aggregation functions

dev
Bill 2 years ago
parent a9b0c185e1
commit 818ab3b2e5

@ -6,7 +6,7 @@ CXXFLAGS = --std=c++1z
ifeq ($(AQ_DEBUG), 1)
OPTFLAGS = -g3
else
OPTFLAGS = -O3 -DNDEBUG -fno-stack-protector
OPTFLAGS = -O3 -DNDEBUG -fno-stack-protector
endif
LINKFLAGS = -flto # + $(AQ_LINK_FLAG)
SHAREDFLAGS = -shared

@ -103,9 +103,11 @@ There're multiple options to run AQuery on Windows. But for better consistency I
- `exec`: execute last parsed statement(s) with Hybrid Execution Engine. Hybrid Execution Engine decouples the query into two parts. The standard SQL (MonetDB dialect) part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed.
- `stats <OPTIONAL: options>` configure statistics.
- no options: show statistics for all queries so far.
- `reset`: resets statistics.
- `on` : statistics will be shown for every future query.
- `off`: statistics will not be shown for every future query.
- `script <filename>`: use automated testing script, this will execute all commands in the script
- `sh <OPTIONAL: shell>` launch a shell. Shell name can be specified (e.g. `sh fish`).
- `dbg` start python interactive interpreter at the current context.
- `print`: print parsed AQuery statements (AST in JSON form)
- `save <OPTIONAL: filename>`: save current code snippet. will use random filename if not specified.
@ -182,3 +184,4 @@ See files in ./tests/ for more examples.
- [ ] Bug: Join-Aware Column management
- [ ] Bug: Order By after Group By
- [ ] Functionality: Having clause, With clause
- [ ] Decouple expr.py

@ -311,6 +311,8 @@ opeq = OperatorBase('eq', 2, logical, cname = '==', sqlname = '=', call = binary
opnot = OperatorBase('not', 1, logical, cname = '!', sqlname = 'NOT', call = unary_op_behavior)
opdistinct = OperatorBase('distinct', 1, as_is, cname = '.distinct()', sqlname = 'distinct', call = distinct_behavior)
# functional
fnprev = OperatorBase('prev', 1, as_is, cname = 'prev', sqlname = 'PREV', call = fn_behavior)
fnnext = OperatorBase('next', 1, as_is, cname = 'aggnext', sqlname = 'NEXT', call = fn_behavior)
fnmax = OperatorBase('max', 1, as_is, cname = 'max', sqlname = 'MAX', call = fn_behavior)
fnmin = OperatorBase('min', 1, as_is, cname = 'min', sqlname = 'MIN', call = fn_behavior)
fndeltas = OperatorBase('deltas', 1, as_is, cname = 'deltas', sqlname = 'DELTAS', call = fn_behavior)
@ -358,7 +360,7 @@ builtin_cstdlib = _op_make_dict(fnsqrt, fnlog, fnsin, fncos, fntan, fnpow)
builtin_func = _op_make_dict(fnmax, fnmin, fnsum, fnavg, fnmaxs,
fnmins, fndeltas, fnratios, fnlast,
fnfirst, fnsums, fnavgs, fncnt,
fnpack, fntrunc)
fnpack, fntrunc, fnprev, fnnext)
user_module_func = {}
builtin_operators : Dict[str, OperatorBase] = {**builtin_binary_arith, **builtin_binary_logical,
**builtin_unary_arith, **builtin_unary_logical, **builtin_unary_special, **builtin_func, **builtin_cstdlib,

@ -563,14 +563,14 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
if len(qs) > 1:
if qs[1].startswith('on'):
state.need_print = True
continue
elif qs[1].startswith('off'):
state.need_print = False
continue
elif qs[1].startswith('last'):
state.currstats.need_print = True
state.currstats.print()
continue
elif qs[1].startswith('reset'):
state.currstats.clear()
continue
state.stats.need_print = True
state.stats.print(clear = False)
continue

@ -293,7 +293,7 @@ class projection(ast_node):
val[1] = val[1](False)
if val[0] == LazyT:
decltypestring = val[2].eval(x,y,gettype=True,c_code=True)(True)
decltypestring = val[2].eval(y=y,gettype=True,c_code=True)(True)
decltypestring = f'value_type<decays<decltype({decltypestring})>>'
out_typenames[key] = decltypestring
else:

@ -88,7 +88,7 @@ class expr(ast_node):
self.udf_map = parent.context.udf_map
self.func_maps = {**builtin_func, **self.udf_map, **user_module_func}
self.operators = {**builtin_operators, **self.udf_map, **user_module_func}
self.ext_aggfuncs = ['sum', 'avg', 'count', 'min', 'max', 'last', 'first']
self.ext_aggfuncs = ['sum', 'avg', 'count', 'min', 'max', 'last', 'first', 'prev', 'next']
def produce(self, node):
from engine.utils import enlist

@ -214,6 +214,28 @@ decayed_t<VT, T> deltas(const VT<T>& arr) {
return ret;
}
template<class T, template<typename ...> class VT>
decayed_t<VT, T> prev(const VT<T>& arr) {
const uint32_t& len = arr.size;
decayed_t<VT, T> ret(len);
uint32_t i = 0;
if (len) ret[i++] = arr[0];
for (; i < len; ++i)
ret[i] = arr[i - 1];
return ret;
}
template<class T, template<typename ...> class VT>
decayed_t<VT, T> aggnext(const VT<T>& arr) {
const uint32_t& len = arr.size;
decayed_t<VT, T> ret(len);
uint32_t i = 1;
for (; i < len; ++i)
ret[i - 1] = arr[i];
if (len > 0) ret[len - 1] = arr[len - 1];
return ret;
}
template<class T, template<typename ...> class VT>
T last(const VT<T>& arr) {
if (!arr.size) return 0;
@ -232,7 +254,7 @@ T first(const VT<T>& arr) {
#define __DEFAULT_AGGREGATE_FUNCTION__(NAME, RET) \
template <class T> constexpr inline T NAME(const T& v) { return RET; }
// wrong behavior with count(0)
// non-aggreation count. E.g. SELECT COUNT(col) from table;
template <class T> constexpr inline T count(const T& v) { return 1; }
template <class T> constexpr inline T max(const T& v) { return v; }
template <class T> constexpr inline T min(const T& v) { return v; }
@ -248,5 +270,7 @@ template <class T> constexpr inline T mins(const T& v) { return v; }
template <class T> constexpr inline T avgs(const T& v) { return v; }
template <class T> constexpr inline T sums(const T& v) { return v; }
template <class T> constexpr inline T last(const T& v) { return v; }
template <class T> constexpr inline T prev(const T& v) { return v; }
template <class T> constexpr inline T aggnext(const T& v) { return v; }
template <class T> constexpr inline T daltas(const T& v) { return 0; }
template <class T> constexpr inline T ratios(const T& v) { return 1; }

@ -10,3 +10,11 @@ into table ticks fields terminated by ","
select max(price - mins(price))
from ticks assuming asc timestamp
where ID = "S" and tradeDate= '2022-10-01';
DROP TABLE IF EXISTS ticks
CREATE TABLE ticks(ID varchar(20), date int, endofdayprice int)
LOAD DATA INFILE "data/ticks.csv" INTO TABLE TICKS FIELDS TERMINATED BY ","
SELECT max(endofdayprice/prev(endofdayprice)) as Max_Ratio
FROM ticks
ASSUMING ASC date
WHERE ID = "3001"
Loading…
Cancel
Save