From 613941ce064d8b5dd1025327afa7da3032dde9d4 Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Sun, 18 Sep 2022 20:55:43 +0800
Subject: [PATCH] bug fixes

---
 .gitignore            |  1 +
 data/network.csv      |  7 +++++++
 engine/types.py       |  9 +++++----
 prompt.py             |  2 +-
 reconstruct/ast.py    |  8 ++++++--
 reconstruct/expr.py   | 20 ++++++++++++--------
 server/aggregations.h |  8 +-------
 server/hasher.h       | 39 +++++++++++++++++++++++++++++++++++----
 tests/network.a       |  2 +-
 tests/strings.a       |  2 +-
 10 files changed, 70 insertions(+), 28 deletions(-)
 create mode 100644 data/network.csv

diff --git a/.gitignore b/.gitignore
index 3b53a07..a2ad2b0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -49,6 +49,7 @@ test*.c*
 !test2.csv
 !moving_avg.csv
 !nyctx100.csv
+!network.csv
 *.out
 *.asm
 !mmw.so
diff --git a/data/network.csv b/data/network.csv
new file mode 100644
index 0000000..ac4d7b0
--- /dev/null
+++ b/data/network.csv
@@ -0,0 +1,7 @@
+src, dst, len, time
+s1, s2, 250, 1
+s1, s2, 270, 20
+s1, s2, 235, 141
+s2, s1, 330, 47
+s2, s1, 280, 150
+s2, s1, 305, 155
diff --git a/engine/types.py b/engine/types.py
index d65b1d4..477934d 100644
--- a/engine/types.py
+++ b/engine/types.py
@@ -88,9 +88,9 @@ class TypeCollection:
 type_table = dict()
 AnyT = Types(-1)
 LazyT = Types(240, name = 'Lazy', cname = '', sqlname = '', ctype_name = '')
-LazyT = Types(200, name = 'DATE', cname = 'types::date_t', sqlname = 'DATE', ctype_name = 'types::ADATE')
-LazyT = Types(201, name = 'TIME', cname = 'types::time_t', sqlname = 'TIME', ctype_name = 'types::ATIME')
-LazyT = Types(202, name = 'TIMESTAMP', cname = 'types::timestamp_t', sqlname = 'TIMESTAMP', ctype_name = 'ATIMESTAMP')
+DateT = Types(200, name = 'DATE', cname = 'types::date_t', sqlname = 'DATE', ctype_name = 'types::ADATE')
+TimeT = Types(201, name = 'TIME', cname = 'types::time_t', sqlname = 'TIME', ctype_name = 'types::ATIME')
+TimeStampT = Types(202, name = 'TIMESTAMP', cname = 'types::timestamp_t', sqlname = 'TIMESTAMP', ctype_name = 'ATIMESTAMP')
 DoubleT = Types(17, name = 'double', cname='double', sqlname = 'DOUBLE', is_fp = True)
 LDoubleT = Types(18, name = 'long double', cname='long double', sqlname = 'LDOUBLE', is_fp = True)
 FloatT = Types(16, name = 'float', cname = 'float', sqlname = 'REAL', 
@@ -137,7 +137,8 @@ def _ty_make_dict(fn : str, *ty : Types):
 int_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', LongT, ByteT, ShortT, IntT)
 uint_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', ULongT, UByteT, UShortT, UIntT)
 fp_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', FloatT, DoubleT)
-builtin_types : Dict[str, Types] = {**_ty_make_dict('t.sqlname.lower()', AnyT, StrT), **int_types, **fp_types}
+temporal_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', DateT, TimeT, TimeStampT)
+builtin_types : Dict[str, Types] = {**_ty_make_dict('t.sqlname.lower()', AnyT, StrT), **int_types, **fp_types, **temporal_types}
 
 def get_int128_support():
     for t in int_types.values():
diff --git a/prompt.py b/prompt.py
index 9310735..aadbeb7 100644
--- a/prompt.py
+++ b/prompt.py
@@ -357,7 +357,7 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
                 cxt = xengine.exec(state.stmts, cxt, keep)
                 
                 this_udf = cxt.finalize_udf()
-                if False and this_udf:
+                if this_udf:
                     with open('udf.hpp', 'wb') as outfile:
                         outfile.write(this_udf.encode('utf-8'))
                         
diff --git a/reconstruct/ast.py b/reconstruct/ast.py
index df91959..7d5b0c8 100644
--- a/reconstruct/ast.py
+++ b/reconstruct/ast.py
@@ -233,12 +233,16 @@ class projection(ast_node):
                 out_typenames[key] = decltypestring
             else:
                 out_typenames[key] = val[0].cname
-            if (type(val[2].udf_called) is udf and 
+            if (type(val[2].udf_called) is udf and # should bulkret also be colref?
                     val[2].udf_called.return_pattern == udf.ReturnPattern.elemental_return
                     or 
-                    self.group_node and self.group_node.use_sp_gb and
+                    self.group_node and 
+                    (self.group_node.use_sp_gb and
                     val[2].cols_mentioned.intersection(
                         self.datasource.all_cols.difference(self.group_node.refs))
+                    ) and val[2].is_compound # compound val not in key
+                    # or 
+                    # (not self.group_node and val[2].is_compound)
                     ):
                     out_typenames[key] = f'ColRef<{out_typenames[key]}>'
         
diff --git a/reconstruct/expr.py b/reconstruct/expr.py
index 885eef4..b636667 100644
--- a/reconstruct/expr.py
+++ b/reconstruct/expr.py
@@ -66,7 +66,6 @@ class expr(ast_node):
     def init(self, _):
         from reconstruct.ast import projection
         parent = self.parent
-        self.isvector = parent.isvector if type(parent) is expr else False
         self.is_compound = parent.is_compound if type(parent) is expr else False
         if type(parent) in [projection, expr]:
             self.datasource = parent.datasource
@@ -75,13 +74,16 @@ class expr(ast_node):
         self.udf_map = parent.context.udf_map
         self.func_maps = {**builtin_func, **self.udf_map, **user_module_func}
         self.operators = {**builtin_operators, **self.udf_map, **user_module_func}
-        self.ext_aggfuncs = ['sum', 'avg', 'count', 'min', 'max']
+        self.ext_aggfuncs = ['sum', 'avg', 'count', 'min', 'max', 'last']
         
     def produce(self, node):
         from engine.utils import enlist
         from reconstruct.ast import udf
         
         if type(node) is dict:
+            if len(node) > 1:
+                print(f'Parser Error: {node} has more than 1 dict entry.')
+                
             for key, val in node.items():
                 if key in self.operators:
                     if key in builtin_func:
@@ -96,6 +98,11 @@ class expr(ast_node):
                     exp_vals = [expr(self, v, c_code = self.c_code) for v in val]
                     str_vals = [e.sql for e in exp_vals]
                     type_vals = [e.type for e in exp_vals]
+                    is_compound = any([e.is_compound for e in exp_vals])
+                    if key in self.ext_aggfuncs:
+                        self.is_compound = False
+                    else:
+                        self.is_compound = is_compound
                     try:
                         self.type = op.return_type(*type_vals)
                     except AttributeError as e:
@@ -107,7 +114,7 @@ class expr(ast_node):
                         
                     self.sql = op(self.c_code, *str_vals)
                     special_func = [*self.context.udf_map.keys(), *self.context.module_map.keys(), 
-                                    "maxs", "mins", "avgs", "sums", "deltas", "last"]
+                                    "maxs", "mins", "avgs", "sums", "deltas"]
                     if self.context.special_gb:
                         special_func = [*special_func, *self.ext_aggfuncs]
                         
@@ -203,10 +210,6 @@ class expr(ast_node):
     
             # get the column from the datasource in SQL context
             else:
-                p = self.parent
-                while type(p) is expr and not p.isvector:
-                    p.isvector = True
-                    p = p.parent
                 if self.datasource is not None:
                     self.raw_col = self.datasource.parse_col_names(node)
                     self.raw_col = self.raw_col if type(self.raw_col) is ColRef else None
@@ -214,6 +217,7 @@ class expr(ast_node):
                     self.is_ColExpr = True
                     self.sql = self.raw_col.name
                     self.type = self.raw_col.type
+                    self.is_compound = True
                 else:
                     self.sql = node
                     self.type = StrT
@@ -234,7 +238,7 @@ class expr(ast_node):
                     self.type = IntT
             elif type(node) is float:
                 self.type = DoubleT
-
+    
     def finalize(self, override = False):
         from reconstruct.ast import udf
         if self.codebuf is None or override:
diff --git a/server/aggregations.h b/server/aggregations.h
index e3442d0..0e41fc9 100644
--- a/server/aggregations.h
+++ b/server/aggregations.h
@@ -172,13 +172,7 @@ decayed_t<VT, T> deltas(const VT<T>& arr) {
 template<class T, template<typename ...> class VT>
 T last(const VT<T>& arr) {
 	const uint32_t& len = arr.size;
-	decayed_t<VT, T> ret(len);
-	uint32_t i = 0;
-	if (len)
-		ret[i++] = arr[0];
-	for (; i < len; ++i) 
-		ret[i] = arr[i-1];
-	return ret;
+	return arr[arr.size - 1];
 }
 
 // wrong behavior with count(0)
diff --git a/server/hasher.h b/server/hasher.h
index 8e3f510..2de5555 100644
--- a/server/hasher.h
+++ b/server/hasher.h
@@ -17,6 +17,39 @@ inline size_t append_bytes(const unsigned char* _First) noexcept {
 	return _Val;
 }
 
+namespace std{
+	template<>
+	struct hash<astring_view> {
+		size_t operator()(const astring_view& _Keyval) const noexcept {
+			return append_bytes(_Keyval.str);
+		}
+	};
+	template<>
+	struct hash<types::date_t> {
+		size_t operator() (const types::date_t& _Keyval) const noexcept {
+			return std::hash<unsigned int>()(*(unsigned int*)(&_Keyval));
+		}
+	};
+	template<>
+	struct hash<types::time_t> {
+		size_t operator() (const types::time_t& _Keyval) const noexcept {
+			return std::hash<unsigned int>()(_Keyval.ms) ^ 
+			std::hash<unsigned char>()(_Keyval.seconds) ^
+			std::hash<unsigned char>()(_Keyval.minutes) ^
+			std::hash<unsigned char>()(_Keyval.hours)
+			;
+		}
+	};
+	template<>
+	struct hash<types::timestamp_t>{
+		size_t operator() (const types::timestamp_t& _Keyval) const noexcept {
+			return std::hash<types::date_t>()(_Keyval.date) ^ 
+				std::hash<types::time_t>()(_Keyval.time);
+		}
+	};
+
+}
+
 inline size_t append_bytes(const astring_view& view) noexcept {
 	return append_bytes(view.str);
 }
@@ -32,10 +65,8 @@ struct hasher {
 	template <size_t i = 0> typename std::enable_if< i < sizeof ...(Types), 
 		size_t>::type hashi(const std::tuple<Types...>& record) const {
 		using current_type = typename std::decay<typename std::tuple_element<i, std::tuple<Types...>>::type>::type;
-		if constexpr (is_cstr<current_type>())
-			return append_bytes((const unsigned char*)std::get<i>(record)) ^ hashi<i + 1>(record);
-		else
-			return std::hash<current_type>()(std::get<i>(record)) ^ hashi<i+1>(record);
+		
+		return std::hash<current_type>()(std::get<i>(record)) ^ hashi<i+1>(record);
 	}
 	size_t operator()(const std::tuple<Types...>& record) const {
 		return hashi(record);
diff --git a/tests/network.a b/tests/network.a
index a6238ad..78ecc48 100644
--- a/tests/network.a
+++ b/tests/network.a
@@ -5,7 +5,7 @@ LOAD DATA INFILE "data/network.csv"
 INTO TABLE network
 FIELDS TERMINATED BY ","
 
-SELECT	 src, dst,  avg(len) 
+SELECT	 src, dst, avg(len)
 FROM	 network
 	     ASSUMING ASC src, ASC dst, ASC _time 
 GROUP BY src, dst, sums (deltas(_time) > 120)
diff --git a/tests/strings.a b/tests/strings.a
index e38630f..e0fd643 100644
--- a/tests/strings.a
+++ b/tests/strings.a
@@ -6,4 +6,4 @@ FIELDS TERMINATED BY ","
 
 select names, val * 10000 + id from types_test
 
-create table date_time(id int, _date date, _time time, _timestamp timestamp);
+ create table date_time(id int, _date date, _time time, _timestamp timestamp);