diff --git a/.gitignore b/.gitignore
index a2ad2b0..3ef09c6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
+tests/datagen_jose/histgen
+tests/datagen_jose/tickgen
+datagen
 *.dSYM
 testmain.lib
 testmain.exp
diff --git a/aquery_config.py b/aquery_config.py
index 0470d20..3330b6e 100644
--- a/aquery_config.py
+++ b/aquery_config.py
@@ -2,7 +2,7 @@
 
 ## GLOBAL CONFIGURATION FLAGS
 
-version_string = '0.4.4a'
+version_string = '0.4.5a'
 add_path_to_ldpath = True
 rebuild_backend = False
 run_backend = True
diff --git a/aquery_parser/keywords.py b/aquery_parser/keywords.py
index 479081b..5ae05bf 100644
--- a/aquery_parser/keywords.py
+++ b/aquery_parser/keywords.py
@@ -44,6 +44,7 @@ LEFT = keyword("left")
 LIKE = keyword("like")
 LIMIT = keyword("limit").suppress()
 MINUS = keyword("minus")
+NATURAL = keyword("natural")
 OFFSET = keyword("offset").suppress()
 ON = keyword("on").suppress()
 ORDER = keyword("order").suppress()
@@ -145,7 +146,7 @@ VIEW = keyword("view")
 
 joins = (
     (
-        Optional(CROSS | OUTER | INNER | ((FULL | LEFT | RIGHT) + Optional(INNER | OUTER)))
+        Optional(CROSS | OUTER | INNER | NATURAL | ((FULL | LEFT | RIGHT) + Optional(INNER | OUTER)))
         + JOIN
         + Optional(LATERAL)
     )
@@ -214,6 +215,7 @@ RESERVED = MatchFirst([
     LIKE,
     LIMIT,
     MINUS,
+    NATURAL,
     NOCASE,
     NOT,
     NULL,
@@ -253,6 +255,7 @@ EQ = Char("=").suppress()
 
 join_keywords = {
     "join",
+    "natural join",
     "full join",
     "cross join",
     "inner join",
diff --git a/aquery_parser/sql_parser.py b/aquery_parser/sql_parser.py
index c07aea3..45bbe28 100644
--- a/aquery_parser/sql_parser.py
+++ b/aquery_parser/sql_parser.py
@@ -323,9 +323,12 @@ def parser(literal_string, ident, sqlserver=False):
 
         table_source = Forward()
 
+        assumption = Group((ASC|DESC) ("sort") + var_name("value"))
+        assumptions = Optional(ASSUMING.suppress() + Group(delimited_list(assumption)))
+
         join = (
             Group(joins)("op")
-            + table_source("join")
+            + (table_source )("join")
             + Optional((ON + expr("on")) | (USING + expr("using")))
             | (
                 Group(WINDOW)("op")
@@ -403,7 +406,12 @@ def parser(literal_string, ident, sqlserver=False):
             | selection
             + Optional(INTO + table_source("into"))
             + Optional(
-                (FROM + delimited_list(table_source) + ZeroOrMore(join))("from")
+                (
+                    FROM 
+                    + (delimited_list(table_source) 
+                    + ZeroOrMore(join))("table_source")
+                    + Optional(assumptions) ("assumptions")
+                )("from")
                 + Optional(WHERE + expr("where"))
                 + Optional(GROUP_BY + delimited_list(Group(named_column))("groupby"))
                 + Optional(HAVING + expr("having"))
@@ -443,12 +451,8 @@ def parser(literal_string, ident, sqlserver=False):
             + RB,
         )
 
-        assumption = Group((ASC|DESC) ("sort") + var_name("value"))
-        assumptions = (ASSUMING + Group(delimited_list(assumption))("assumptions"))
-
         table_source << Group(
             ((LB + query + RB) | stack | call_function | var_name)("value")
-            + Optional(assumptions)
             + Optional(flag("with ordinality"))
             + Optional(tablesample)
             + alias
diff --git a/build.py b/build.py
index b6d4006..3312a0b 100644
--- a/build.py
+++ b/build.py
@@ -21,7 +21,10 @@ class checksums:
                 server = 'server.so'
         ):
         from platform import machine
-        self.env = aquery_config.os_platform + machine() + aquery_config.build_driver
+        self.env = (aquery_config.os_platform +
+                    machine() + 
+                    aquery_config.build_driver
+                )
         for key in self.__dict__.keys():
             try:
                 with open(eval(key), 'rb') as file:
@@ -37,12 +40,13 @@ class checksums:
             except FileNotFoundError:
                 print('missing component: ' + key)
                 self.sources[key] = None
+                
     def __ne__(self, __o: 'checksums') -> 'checksums':
         ret = checksums()
         for key in self.__dict__.keys():
             try:
                 ret.__dict__[key] = (
-                    self.__dict__[key] and __o.__dict__[key] and
+                    not (self.__dict__[key] and __o.__dict__[key]) or
                     self.__dict__[key] != __o.__dict__[key]
                 )
             except KeyError:
@@ -54,7 +58,7 @@ class checksums:
         for key in self.__dict__.keys():
             try:
                 ret.__dict__[key] = (
-                    not (self.__dict__[key] and __o.__dict__[key]) or
+                    self.__dict__[key] and __o.__dict__[key] and
                     self.__dict__[key] == __o.__dict__[key]
                 )
             except KeyError:
@@ -82,16 +86,19 @@ class build_manager:
             self.mgr = mgr
             self.build_cmd = []
         def libaquery_a(self) :
-            pass
+            return False
         def pch(self):
-            pass
+            return False
         def build(self, stdout = sys.stdout, stderr = sys.stderr):
+            ret = True
             for c in self.build_cmd:
                 if c:
                     try:
-                        subprocess.call(c, stdout = stdout, stderr = stderr)
+                        ret = subprocess.call(c, stdout = stdout, stderr = stderr) and ret
                     except (FileNotFoundError):
+                        ret = False
                         pass
+            return ret
                 
     class MakefileDriver(DriverBase):
         def __init__(self, mgr : 'build_manager') -> None:
@@ -132,7 +139,7 @@ class build_manager:
             loc = os.path.abspath('./msc-plugin/libaquery.vcxproj')
             self.get_flags()
             self.build_cmd = [['del', 'libaquery.lib'], [aquery_config.msbuildroot, loc, self.opt, self.platform]]
-            self.build()
+            return self.build()
 
         def pch(self):
             pass
@@ -141,24 +148,17 @@ class build_manager:
             loc = os.path.abspath('./msc-plugin/server.vcxproj')
             self.get_flags()
             self.build_cmd = [['del', 'server.so'], [aquery_config.msbuildroot, loc, self.opt, self.platform]]
-            self.build()
+            return self.build()
 
         def snippet(self):
             loc = os.path.abspath('./msc-plugin/msc-plugin.vcxproj')
             self.get_flags()
             self.build_cmd = [[aquery_config.msbuildroot, loc, self.opt, self.platform]]
-            self.build()
+            return self.build()
 
     #class PythonDriver(DriverBase):
     #    def __init__(self, mgr : 'build_manager') -> None:
-    #        super().__init__(mgr)
-            
-    #@property
-    #def MSBuild(self):
-    #    return MSBuildDriver(self)
-    #@property
-    #def Makefile(self):
-    #    return MakefileDriver(self)
+    #        super().__init__(mgr)           
 
     def __init__(self) -> None:
         self.method = 'make'
@@ -181,7 +181,9 @@ class build_manager:
     def build_caches(self, force = False):
         cached = checksums()
         current = checksums()
-        libaquery_a = 'libaquery.lib' if aquery_config.os_platform else 'libaquery.a'
+        libaquery_a = 'libaquery.a' 
+        if aquery_config.os_platform == 'win':
+            libaquery_a = 'libaquery.lib'
         current.calc(libaquery_a)
         try:
             with open('.cached', 'rb') as cache_sig:
@@ -190,18 +192,25 @@ class build_manager:
             pass
         self.cache_status = current != cached
         
+        success = True
         if  force or self.cache_status.sources:
             self.driver.pch()
             self.driver.libaquery_a()
             self.driver.server()
         else:
             if self.cache_status.libaquery_a:
-                self.driver.libaquery_a()
+                success = self.driver.libaquery_a() and success
             if self.cache_status.pch_hpp_gch:
-                self.driver.pch()
+                success = self.driver.pch() and success
             if self.cache_status.server:
-                self.driver.server()
-        current.calc(libaquery_a)
-        with open('.cached', 'wb') as cache_sig:
-            cache_sig.write(pickle.dumps(current))
+                success = self.driver.server() and success
+        if success:
+            current.calc(libaquery_a)
+            with open('.cached', 'wb') as cache_sig:
+                cache_sig.write(pickle.dumps(current))
+        else:
+            try:
+                os.remove('./.cached')
+            except:
+                pass
             
diff --git a/datagen.cpp b/datagen.cpp
index a94d3e6..88f5a48 100644
--- a/datagen.cpp
+++ b/datagen.cpp
@@ -37,7 +37,7 @@ void permutation(int *v, int n) {
     }
 }
 
-int main(int argc, char* argv[])
+int gen_trade_data(int argc, char* argv[])
 {
     using std::vector;
     float frac = .3;
@@ -108,3 +108,48 @@ int main(int argc, char* argv[])
     fclose(fp);
     return 0;
 }
+#include "./server/utils.h"
+#include "./server/types.h"
+#include <string>
+types::date_t rand_date(){
+    unsigned char d = ui(engine) % 28 + 1;
+    unsigned char m = ui(engine) % 12 + 1;
+    short y = ui(engine) % 40 + 1990;
+    if (ui(engine) % 2) return types::date_t((unsigned char)10, (unsigned char)1, 2003);
+    return types::date_t{d, m, y};
+}
+int gen_stock_data(int argc, char* argv[]){
+    using std::string;
+    using namespace types;
+    int n_stocks = 5;
+    int n_data = 1000;
+    string* IDs = new string[n_stocks + 1];
+    string* names = new string[n_stocks + 1];
+    for(int i = 0; i < n_stocks; ++i){
+        IDs[i] = base62uuid();
+        names[i] = base62uuid();
+    }
+    IDs[n_stocks] = "S";
+    names[n_stocks] = "x";
+    FILE* fp = fopen("./data/stock.csv", "w");
+    fprintf(fp, "ID, timestamp, tradeDate, price\n");
+    char date_str_buf [types::date_t::string_length()];
+    int* timestamps = new int[n_data];
+    for(int i = 0; i < n_data; ++i) timestamps[i] = i+1;
+    permutation(timestamps, n_data);
+    for(int i = 0; i < n_data; ++i){
+        auto date = rand_date().toString(date_str_buf + date_t::string_length());
+        fprintf(fp, "%s,%d,%s,%d\n", IDs[ui(engine)%(n_stocks + 1)].c_str(), timestamps[i], date, ui(engine) % 1000);
+    }
+    fclose(fp);
+    fp = fopen("./data/base.csv", "w");
+    fprintf(fp, "ID, name\n");
+    for(int i = 0; i < n_stocks + 1; ++ i){
+        fprintf(fp, "%s,%s\n", IDs[i].c_str(), names[i].c_str());
+    }
+    fclose(fp);
+}
+
+int main(int argc, char* argv[]){
+    gen_stock_data(argc, argv);
+}
diff --git a/engine/projection.py b/engine/projection.py
index fa199ed..f813005 100644
--- a/engine/projection.py
+++ b/engine/projection.py
@@ -29,7 +29,7 @@ class projection(ast_node):
     def spawn(self, node):
         self.datasource = None
         if 'from' in node:
-            from_clause = node['from']
+            from_clause = node['from']['table_source']
             if type(from_clause) is list:
                 # from joins
                 join(self, from_clause)
@@ -47,8 +47,8 @@ class projection(ast_node):
                         self.datasource = self.context.tables_byname[value]
                     if 'name' in value:
                         self.datasource.add_alias(value['name'])
-                if 'assumptions' in from_clause:
-                    self.assumptions = enlist(from_clause['assumptions'])
+                if 'assuming' in node['from']:
+                    self.assumptions = enlist(node['from']['assuming'])
                     
             elif type(from_clause) is str:
                 self.datasource = self.context.tables_byname[from_clause]
diff --git a/engine/types.py b/engine/types.py
index 477934d..de80c7d 100644
--- a/engine/types.py
+++ b/engine/types.py
@@ -1,5 +1,5 @@
 from copy import deepcopy
-from engine.utils import defval
+from engine.utils import base62uuid, defval
 from aquery_config import have_hge
 from typing import Dict, List
 
@@ -244,6 +244,14 @@ def fn_behavior(op:OperatorBase, c_code, *x):
     name = op.cname if c_code else op.sqlname
     return f'{name}({", ".join([f"{xx}" for xx in x])})'
 
+def count_behavior(op:OperatorBase, c_code, x, distinct = False):
+    if not c_code:
+        return f'{op.sqlname}({"distinct " if distinct else ""}{x})'
+    elif distinct:
+        return '({x}).distinct_size()'
+    else:
+        return '{count()}'
+
 def windowed_fn_behavor(op: OperatorBase, c_code, *x):
     if not c_code:
         return f'{op.sqlname}({", ".join([f"{xx}" for xx in x])})'
@@ -282,7 +290,7 @@ fnmaxs = OperatorBase('maxs', [1, 2], ty_clamp(as_is, -1), cname = 'maxs', sqlna
 fnmins = OperatorBase('mins', [1, 2], ty_clamp(as_is, -1), cname = 'mins', sqlname = 'MINS', call = windowed_fn_behavor)
 fnsums = OperatorBase('sums', [1, 2], ext(ty_clamp(auto_extension, -1)), cname = 'sums', sqlname = 'SUMS', call = windowed_fn_behavor)
 fnavgs = OperatorBase('avgs', [1, 2], fp(ext(ty_clamp(auto_extension, -1))), cname = 'avgs', sqlname = 'AVGS', call = windowed_fn_behavor)
-fncnt = OperatorBase('count', 1, int_return, cname = 'count', sqlname = 'COUNT', call = fn_behavior)
+fncnt = OperatorBase('count', 1, int_return, cname = 'count', sqlname = 'COUNT', call = count_behavior)
 # special
 def is_null_call_behavior(op:OperatorBase, c_code : bool, x : str):
     if c_code : 
diff --git a/prompt.py b/prompt.py
index aadbeb7..0f59d33 100644
--- a/prompt.py
+++ b/prompt.py
@@ -479,6 +479,10 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
             print(e)
             continue
         except (ValueError, FileNotFoundError) as e:
+            try:
+                os.remove('./cached')
+            except:
+                pass
             print(e)
         except (KeyboardInterrupt):
             break
diff --git a/reconstruct/ast.py b/reconstruct/ast.py
index 7d5b0c8..a66da39 100644
--- a/reconstruct/ast.py
+++ b/reconstruct/ast.py
@@ -73,7 +73,7 @@ class projection(ast_node):
         self.datasource = join(self, [], self.context) # datasource is Join instead of TableInfo
         self.assumptions = []
         if 'from' in node:
-            from_clause = node['from']
+            from_clause = node['from']['table_source']
             self.datasource = join(self, from_clause)
             if 'assumptions' in from_clause:
                 self.assumptions = enlist(from_clause['assumptions'])
@@ -129,12 +129,17 @@ class projection(ast_node):
                     if not proj_expr.is_special:
                         y = lambda x:x
                         name = eval('f\'' + name + '\'')
+                        offset = len(col_exprs)
                         if name not in self.var_table:
-                            self.var_table[name] = len(col_exprs)
-                        proj_map[i] = [this_type, len(col_exprs), proj_expr]
+                            self.var_table[name] = offset
+                            if proj_expr.is_ColExpr and type(proj_expr.raw_col) is ColRef:
+                                for n in (proj_expr.raw_col.table.alias):
+                                    self.var_table[f'{n}.'+name] = offset
+                        proj_map[i] = [this_type, offset, proj_expr]
                         col_expr = name + ' AS ' + alias if alias else name
                         if alias:
-                            self.var_table[alias] = len(col_exprs)
+                            self.var_table[alias] = offset
+                            
                         col_exprs.append((col_expr, proj_expr.type))
                     else:
                         self.context.headers.add('"./server/aggregations.h"')
@@ -164,10 +169,12 @@ class projection(ast_node):
         self.add(', '.join([c[0] for c in col_exprs] + col_ext_names))
     
         _base_offset = len(col_exprs)
-        for i, col in enumerate(col_ext_names):
-            if col not in self.var_table:
-                self.var_table[col] = i + _base_offset
-    
+        for i, col in enumerate(self.col_ext):
+            if col.name not in self.var_table:
+                offset = i + _base_offset
+                self.var_table[col.name] = offset
+                for n in (col.table.alias):
+                    self.var_table[f'{n}.'+col.name] = offset
     
         def finialize(astnode:ast_node):
             if(astnode is not None):
@@ -223,12 +230,15 @@ class projection(ast_node):
             if type(val[1]) is str:
                 x = True
                 y = lambda t: self.pyname2cname[t]
-                val[1] = val[2].eval(x, y, gettype=True)
+                count = lambda : '0'
+                if vid2cname:
+                    count = lambda : f'{vid2cname[0]}.size'
+                val[1] = val[2].eval(x, y, count=count)
                 if callable(val[1]):
-                    val[1] = val[1](True)
-                decltypestring = val[1] 
+                    val[1] = val[1](False)
                 
             if val[0] == LazyT:
+                decltypestring = val[2].eval(x,y,gettype=True)(True)
                 decltypestring = f'value_type<decays<decltype({decltypestring})>>'
                 out_typenames[key] = decltypestring
             else:
@@ -461,7 +471,8 @@ class groupby_c(ast_node):
                 return get_var_names(sql_code)
             else:
                 return varex.eval(c_code=True, y = get_var_names, 
-                        materialize_builtin = materialize_builtin)
+                        materialize_builtin = materialize_builtin, 
+                        count=lambda:f'{val_var}.size')
                 
         for ce in cexprs:
             ex = ce[1]
@@ -544,24 +555,25 @@ class join(ast_node):
         self.tables_dir = dict()
         self.rec = None
         self.top_level = self.parent and type(self.parent) is projection
+        self.have_sep = False
         # self.tmp_name = 'join_' + base62uuid(4)
         # self.datasource = TableInfo(self.tmp_name, [], self.context)
     def append(self, tbls, __alias = ''):
-        alias = lambda t : '(' + t + ') ' + __alias if len(__alias) else t
+        alias = lambda t : t + ' ' + __alias if len(__alias) else t
         if type(tbls) is join:
-            self.joins.append(alias(tbls.__str__()))
+            self.joins.append((alias(tbls.__str__()), tbls.have_sep))
             self.tables += tbls.tables
             self.tables_dir = {**self.tables_dir, **tbls.tables_dir}
             
         elif type(tbls) is TableInfo:
-            self.joins.append(alias(tbls.table_name))
+            self.joins.append((alias(tbls.table_name), False))
             self.tables.append(tbls)
             self.tables_dir[tbls.table_name] = tbls
             for a in tbls.alias:
                 self.tables_dir[a] = tbls
             
         elif type(tbls) is projection:
-            self.joins.append(alias(tbls.finalize()))
+            self.joins.append((alias(tbls.finalize()), False))
             
     def produce(self, node):
         if type(node) is list:
@@ -585,13 +597,14 @@ class join(ast_node):
                         tbl.add_alias(node['name'])
                 self.append(tbl, alias)
             else:
-                keys = node.keys()
+                keys = list(node.keys())
                 if keys[0].lower().endswith('join'):
+                    self.have_sep = True
                     j = join(self, node[keys[0]])
                     tablename = f' {keys[0]} {j}'
-                    if keys[1].lower() == 'on':
+                    if len(keys) > 1 and keys[1].lower() == 'on':
                         tablename += f' on {expr(self, node[keys[1]])}' 
-                    self.joins.append(tablename)
+                    self.joins.append((tablename, self.have_sep))
                     self.tables += j.tables
                     self.tables_dir = {**self.tables_dir, **j.tables_dir}
                 
@@ -618,18 +631,27 @@ class join(ast_node):
             if datasource is None:
                 raise ValueError(f'Table name/alias not defined{parsedColExpr[0]}')
             else:
-                return datasource.parse_col_names(parsedColExpr[1])
+                datasource.rec = self.rec
+                ret = datasource.parse_col_names(parsedColExpr[1])
+                datasource.rec = None
+                return ret
+                
     @property
     def all_cols(self):
         return set([c for t in self.tables for c in t.columns])
     def consume(self, node):
-        self.sql = ', '.join(self.joins)
+        self.sql = ''
+        for j in self.joins:
+            if not self.sql or j[1]:
+                self.sql += j[0]
+            else:
+                self.sql += ', ' + j[0]        
         if node and self.sql and self.top_level:
             self.sql = ' FROM ' + self.sql 
         return super().consume(node)
     
     def __str__(self):
-        return ', '.join(self.joins)
+        return self.sql
     def __repr__(self):
         return self.__str__()
     
diff --git a/reconstruct/expr.py b/reconstruct/expr.py
index b636667..ce5ea4f 100644
--- a/reconstruct/expr.py
+++ b/reconstruct/expr.py
@@ -26,7 +26,13 @@ class expr(ast_node):
     
     def __init__(self, parent, node, *, c_code = None, supress_undefined = False):
         from reconstruct.ast import projection, udf
-        
+        # gen2 expr have multi-passes
+        # first pass parse json into expr tree
+        # generate target code in later passes upon need
+        self.children = []
+        self.opname = ''
+        self.curr_code = ''
+        self.counts = {}
         self.type = None
         self.raw_col = None
         self.udf : Optional[udf] = None
@@ -81,106 +87,124 @@ class expr(ast_node):
         from reconstruct.ast import udf
         
         if type(node) is dict:
-            if len(node) > 1:
-                print(f'Parser Error: {node} has more than 1 dict entry.')
+            if 'literal' in node:
+                node = node['literal']
+            else:
+                if len(node) > 1:
+                    print(f'Parser Error: {node} has more than 1 dict entry.')
                 
-            for key, val in node.items():
-                if key in self.operators:
-                    if key in builtin_func:
-                        if self.is_agg_func:
-                            self.root.is_special = True # Nested Aggregation
-                        else:
-                            self.is_agg_func = True
-                    
-                    op = self.operators[key]
-
-                    val = enlist(val)
-                    exp_vals = [expr(self, v, c_code = self.c_code) for v in val]
-                    str_vals = [e.sql for e in exp_vals]
-                    type_vals = [e.type for e in exp_vals]
-                    is_compound = any([e.is_compound for e in exp_vals])
-                    if key in self.ext_aggfuncs:
-                        self.is_compound = False
-                    else:
-                        self.is_compound = is_compound
-                    try:
-                        self.type = op.return_type(*type_vals)
-                    except AttributeError as e:
-                        if type(self.root) is not udf:
-                            # TODO: do something when this is not an error
-                            # print(f'alert: {e}')
-                            pass
-                        self.type = AnyT
+                for key, val in node.items():
+                    key = key.lower()
+                    if key in self.operators:
+                        if key in builtin_func:
+                            if self.is_agg_func:
+                                self.root.is_special = True # Nested Aggregation
+                            else:
+                                self.is_agg_func = True
                         
-                    self.sql = op(self.c_code, *str_vals)
-                    special_func = [*self.context.udf_map.keys(), *self.context.module_map.keys(), 
-                                    "maxs", "mins", "avgs", "sums", "deltas"]
-                    if self.context.special_gb:
-                        special_func = [*special_func, *self.ext_aggfuncs]
+                        op = self.operators[key]
+                        count_distinct = False
+                        if key == 'count' and type(val) is dict and 'distinct' in val:
+                            count_distinct = True
+                            val = val['distinct']
+                        val = enlist(val)
+                        exp_vals = [expr(self, v, c_code = self.c_code) for v in val]
+                        self.children = exp_vals
+                        self.opname = key
                         
-                    if key in special_func and not self.is_special:
-                        self.is_special = True
-                        if key in self.context.udf_map:
-                            self.root.udf_called = self.context.udf_map[key]
-                            if self.is_udfexpr and key == self.root.udf.name:
-                                self.root.is_recursive_call_inudf = True
-                        elif key in user_module_func.keys():
-                            udf.try_init_udf(self.context)                           
-                        # TODO: make udf_called a set!
-                        p = self.parent
-                        while type(p) is expr and not p.udf_called:
-                            p.udf_called = self.udf_called
-                            p = p.parent
-                        p = self.parent
-                        while type(p) is expr and not p.is_special:
-                            p.is_special = True
-                            p = p.parent
+                        str_vals = [e.sql for e in exp_vals]
+                        type_vals = [e.type for e in exp_vals]
+                        is_compound = any([e.is_compound for e in exp_vals])
+                        if key in self.ext_aggfuncs:
+                            self.is_compound = False
+                        else:
+                            self.is_compound = is_compound
+                        try:
+                            self.type = op.return_type(*type_vals)
+                        except AttributeError as e:
+                            if type(self.root.parent) is not udf:
+                                # TODO: do something when this is not an error
+                                print(f'alert: {e}')
+                                pass
+                            self.type = AnyT
+                            
+                        if count_distinct: # inject distinct col later
+                            self.sql = f'{{{op(self.c_code, *str_vals, True)}}}'
+                        else:
+                            self.sql = op(self.c_code, *str_vals)
+                            
+                        special_func = [*self.context.udf_map.keys(), *self.context.module_map.keys(), 
+                                        "maxs", "mins", "avgs", "sums", "deltas", "last"]
+                        if self.context.special_gb:
+                            special_func = [*special_func, *self.ext_aggfuncs]
+                            
+                        if key in special_func and not self.is_special:
+                            self.is_special = True
+                            if key in self.context.udf_map:
+                                self.root.udf_called = self.context.udf_map[key]
+                                if self.is_udfexpr and key == self.root.udf.name:
+                                    self.root.is_recursive_call_inudf = True
+                            elif key in user_module_func.keys():
+                                udf.try_init_udf(self.context)                           
+                            # TODO: make udf_called a set!
+                            p = self.parent
+                            while type(p) is expr and not p.udf_called:
+                                p.udf_called = self.udf_called
+                                p = p.parent
+                            p = self.parent
+                            while type(p) is expr and not p.is_special:
+                                p.is_special = True
+                                p = p.parent
 
-                    need_decltypestr = any([e.need_decltypestr for e in exp_vals])        
-                    if need_decltypestr or (self.udf_called and type(op) is udf):
-                        decltypestr_vals = [e.udf_decltypecall for e in exp_vals]
-                        self.udf_decltypecall = op(self.c_code, *decltypestr_vals)
+                        need_decltypestr = any([e.need_decltypestr for e in exp_vals])        
+                        if need_decltypestr or (self.udf_called and type(op) is udf):
+                            decltypestr_vals = [e.udf_decltypecall for e in exp_vals]
+                            self.udf_decltypecall = op(self.c_code, *decltypestr_vals)
 
-                        if self.udf_called and type(op) is udf:
-                            self.udf_decltypecall = op.decltypecall(self.c_code, *decltypestr_vals)
-            
-                elif self.is_udfexpr:
-                    var_table = self.root.udf.var_table
-                    vec = key.split('.')
-                    _vars = [*var_table, *self.builtin_vars]
-                    def get_vname (node):
-                        if node in self.builtin_vars:
-                            self.root.udf.builtin[node].enabled = True
-                            self.builtin_var = node
-                            return node
+                            if self.udf_called and type(op) is udf:
+                                self.udf_decltypecall = op.decltypecall(self.c_code, *decltypestr_vals)
+                
+                    elif self.is_udfexpr:
+                        var_table = self.root.udf.var_table
+                        vec = key.split('.')
+                        _vars = [*var_table, *self.builtin_vars]
+                        def get_vname (node):
+                            if node in self.builtin_vars:
+                                self.root.udf.builtin[node].enabled = True
+                                self.builtin_var = node
+                                return node
+                            else:
+                                return var_table[node]
+                        if vec[0] not in _vars:
+                            # print(f'Use of undefined variable {vec[0]}')
+                            # TODO: do something when this is not an error
+                            pass
                         else:
-                            return var_table[node]
-                    if vec[0] not in _vars:
-                        # print(f'Use of undefined variable {vec[0]}')
-                        # TODO: do something when this is not an error
-                        pass
+                            vname = get_vname(vec[0])
+                            val = enlist(val)
+                            if(len(val) > 2):
+                                print('Warning: more than 2 indexes found for subvec operator.')
+                            ex = [expr(self, v, c_code = self.c_code) for v in val]
+                            idxs = ', '.join([e.sql for e in ex])
+                            self.sql = f'{vname}.subvec({idxs})'
+                            if any([e.need_decltypestr for e in ex]):
+                                self.udf_decltypecall = f'{vname}.subvec({[", ".join([e.udf_decltypecall for e in ex])]})'
+                        if key == 'get' and len(val) > 1:
+                            ex_vname = expr(self, val[0], c_code=self.c_code)
+                            self.sql = f'{ex_vname.sql}[{expr(self, val[1], c_code=self.c_code).sql}]'
+                            if hasattr(ex_vname, 'builtin_var'):
+                                if not hasattr(self, 'builtin_var'):
+                                    self.builtin_var = []
+                                self.builtin_var = [*self.builtin_var, *ex_vname.builtin_var]
+                                self.udf_decltypecall = ex_vname.sql
                     else:
-                        vname = get_vname(vec[0])
-                        val = enlist(val)
-                        if(len(val) > 2):
-                            print('Warning: more than 2 indexes found for subvec operator.')
-                        ex = [expr(self, v, c_code = self.c_code) for v in val]
-                        idxs = ', '.join([e.sql for e in ex])
-                        self.sql = f'{vname}.subvec({idxs})'
-                        if any([e.need_decltypestr for e in ex]):
-                            self.udf_decltypecall = f'{vname}.subvec({[", ".join([e.udf_decltypecall for e in ex])]})'
-                    if key == 'get' and len(val) > 1:
-                        ex_vname = expr(self, val[0], c_code=self.c_code)
-                        self.sql = f'{ex_vname.sql}[{expr(self, val[1], c_code=self.c_code).sql}]'
-                        if hasattr(ex_vname, 'builtin_var'):
-                            if not hasattr(self, 'builtin_var'):
-                                self.builtin_var = []
-                            self.builtin_var = [*self.builtin_var, *ex_vname.builtin_var]
-                            self.udf_decltypecall = ex_vname.sql
-                else:
-                    print(f'Undefined expr: {key}{val}')
-
-        elif type(node) is str:
+                        print(f'Undefined expr: {key}{val}')
+            if 'distinct' in val and key != count:
+                if self.c_code:
+                    self.sql = 'distinct ' + self.sql
+                elif self.is_compound:
+                    self.sql = '(' + self.sql + ').distinct()' 
+        if type(node) is str:
             if self.is_udfexpr:
                 curr_udf : udf = self.root.udf
                 var_table = curr_udf.var_table
@@ -215,22 +239,41 @@ class expr(ast_node):
                     self.raw_col = self.raw_col if type(self.raw_col) is ColRef else None
                 if self.raw_col is not None:
                     self.is_ColExpr = True
-                    self.sql = self.raw_col.name
+                    table_name = ''
+                    if '.' in node:
+                        table_name = self.raw_col.table.table_name
+                        if self.raw_col.table.alias:
+                            alias = iter(self.raw_col.table.alias)
+                            try:
+                                a = next(alias)
+                                while(not a or a == table_name):
+                                    a = next(alias)
+                                if (a and a != table_name):
+                                    table_name = a
+                            except StopIteration:
+                                pass
+                    if table_name:
+                        table_name = table_name + '.'
+                    self.sql = table_name + self.raw_col.name
                     self.type = self.raw_col.type
                     self.is_compound = True
+                    self.opname = self.raw_col
                 else:
-                    self.sql = node
+                    self.sql = '\'' + node + '\''
                     self.type = StrT
+                    self.opname = self.sql
                 if self.c_code and self.datasource is not None:
                     self.sql = f'{{y(\"{self.sql}\")}}'
         elif type(node) is bool:
             self.type = BoolT
+            self.opname = node
             if self.c_code:
                 self.sql = '1' if node else '0'
             else:
                 self.sql = 'TRUE' if node else 'FALSE'
-        else:
+        elif type(node) is not dict:
             self.sql = f'{node}'
+            self.opname = node
             if type(node) is int:
                 if (node >= 2**63 - 1 or node <= -2**63):
                     self.type = LongT
@@ -252,6 +295,12 @@ class expr(ast_node):
                     self.codebuf += c.finalize(override=override)
         return self.codebuf
 
+    def codegen(self, delegate):
+        self.curr_code = ''
+        for c in self.children:
+            self.curr_code += c.codegen(delegate)
+        return self.curr_code
+    
     def __str__(self):
         return self.sql
     def __repr__(self):
@@ -259,10 +308,17 @@ class expr(ast_node):
     
     # builtins is readonly, so it's okay to set default value as an object
     # eval is only called at root expr.
-    def eval(self, c_code = None, y = lambda t: t, materialize_builtin = False, _decltypestr = False, *, gettype = False):
+    def eval(self, c_code = None, y = lambda t: t, 
+             materialize_builtin = False, _decltypestr = False, 
+             count = lambda : 'count', var_inject = None, 
+             *, 
+             gettype = False):
         assert(self.is_root)
         def call(decltypestr = False) -> str:
-            nonlocal c_code, y, materialize_builtin
+            nonlocal c_code, y, materialize_builtin, count, var_inject
+            if var_inject:
+                for k, v in var_inject.items():
+                    locals()[k] = v
             if self.udf_called is not None:
                 loc = locals()
                 builtin_vars = self.udf_called.builtin_used
@@ -344,4 +400,4 @@ class getrefs(expr):
     def consume(self, _):
         if self.root == self:
             self.rec = self.datasource.rec
-            self.datasource.rec = None
\ No newline at end of file
+            self.datasource.rec = None
diff --git a/server/aggregations.h b/server/aggregations.h
index ac15d8e..2add603 100644
--- a/server/aggregations.h
+++ b/server/aggregations.h
@@ -16,7 +16,8 @@ constexpr static inline size_t count(const T&) { return 1; }
 
 // TODO: Specializations for dt/str/none
 template<class T, template<typename ...> class VT>
-types::GetLongType<T> sum(const VT<T>& v) {
+types::GetLongType<T> 
+sum(const VT<T>& v) {
 	types::GetLongType<T> ret = 0;
 	for (const auto& _v : v)
 		ret += _v;
diff --git a/server/io.cpp b/server/io.cpp
index 9a527d4..c34dc42 100644
--- a/server/io.cpp
+++ b/server/io.cpp
@@ -181,7 +181,7 @@ namespace types {
 		return !operator==(other);
 	}
 	bool time_t::validate() const{
-		return hours < 24 && minutes < 60 && seconds < 60 && ms < 1000;
+		return hours < 24 && minutes < 60 && seconds < 60 && ms < 1000000;
 	}
 
 	timestamp_t::timestamp_t(const char* str) { fromString(str); }
@@ -244,13 +244,13 @@ std::ostream& operator<<(std::ostream& os, types::timestamp_t & v)
 
 
 using std::string;
-string base62uuid(int l = 8) {
+string base62uuid(int l) {
     using namespace std;
     constexpr static const char* base62alp = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
     static mt19937_64 engine(chrono::system_clock::now().time_since_epoch().count());
     static uniform_int_distribution<uint64_t> u(0x10000, 0xfffff);
     uint64_t uuid = (u(engine) << 32ull) + (chrono::system_clock::now().time_since_epoch().count() & 0xffffffff);
-    printf("%llu\n", uuid);
+    //printf("%llu\n", uuid);
     string ret;
     while (uuid && l-- >= 0) {
         ret = string("") + base62alp[uuid % 62] + ret;
diff --git a/server/server.cpp b/server/server.cpp
index b5b8195..dd63e3e 100644
--- a/server/server.cpp
+++ b/server/server.cpp
@@ -100,10 +100,10 @@ void Context::end_session(){
 void* Context::get_module_function(const char* fname){
     auto fmap = static_cast<std::unordered_map<std::string, void*>*>
         (this->module_function_maps);
-    printf("%p\n", fmap->find("mydiv")->second);
-     for (const auto& [key, value] : *fmap){
-         printf("%s %p\n", key.c_str(), value);
-     }
+    // printf("%p\n", fmap->find("mydiv")->second);
+    //  for (const auto& [key, value] : *fmap){
+    //      printf("%s %p\n", key.c_str(), value);
+    //  }
     auto ret = fmap->find(fname);
     return ret == fmap->end() ? nullptr : ret->second;
 }
@@ -188,9 +188,9 @@ int dll_main(int argc, char** argv, Context* cxt){
                         case 'F': // Register Function in Module
                             {
                                 auto fname = n_recvd[i] + 1;
-                                printf("F:: %s: %p, %p\n", fname, user_module_handle, dlsym(user_module_handle, fname));
+                                //printf("F:: %s: %p, %p\n", fname, user_module_handle, dlsym(user_module_handle, fname));
                                 module_fn_map->insert_or_assign(fname, dlsym(user_module_handle, fname));
-                                printf("F::: %p\n", module_fn_map->find("mydiv") != module_fn_map->end() ? module_fn_map->find("mydiv")->second : nullptr);
+                                //printf("F::: %p\n", module_fn_map->find("mydiv") != module_fn_map->end() ? module_fn_map->find("mydiv")->second : nullptr);
                             }
                             break;
                         case 'U': // Unload Module
diff --git a/server/table.h b/server/table.h
index 36b653a..f96c0e6 100644
--- a/server/table.h
+++ b/server/table.h
@@ -139,7 +139,9 @@ public:
 		const ColRef<_Ty>& orig;
 		constexpr Iterator_t(const uint32_t* val, const ColRef<_Ty>& orig) noexcept : val(val), orig(orig) {}
 		_Ty& operator*() { return orig[*val]; }
-		bool operator != (const Iterator_t& rhs) { return rhs.val != val; }
+		bool operator != (const Iterator_t& rhs) const { return rhs.val != val; }
+		bool operator == (const Iterator_t& rhs) const { return rhs.val == val; }
+		size_t operator - (const Iterator_t& rhs) const { return val - rhs.val; }
 		Iterator_t& operator++ () {
 			++val;
 			return *this;
@@ -180,6 +182,20 @@ public:
 			subvec[i] = operator[](i);
 		return subvec;
 	}
+	std::unordered_set<_Ty> distinct_common() {
+		return std::unordered_set<_Ty> {begin(), end()};
+	}
+	uint32_t distinct_size(){
+		return distinct_common().size();
+	}
+	ColRef<_Ty> distinct(){
+		auto set = distinct_common();
+		ColRef<_Ty> ret(set.size());
+		uint32_t i = 0;
+		for (auto& val : set)
+			ret.container[i++] = val;
+		return ret;
+	}
 	inline ColRef<_Ty> subvec(uint32_t start = 0) { return subvec_deep(start, size); }
 };
 template <template <class...> class VT, class T>
@@ -408,10 +424,43 @@ struct TableInfo {
 		applyIntegerSequence<sizeof...(Types), applier>::apply(*this, sep, end, view, fp);
 	}
 
-	TableInfo< Types... >* rename(const char* name) {
+	TableInfo<Types...>* rename(const char* name) {
 		this->name = name;
 		return this;
 	}
+	template <size_t ...Is> 
+	void inline
+	reserve(std::index_sequence<Is...>, uint32_t size) {
+		const auto& assign_sz = [&size](auto& col){ col.size = size;};
+		(assign_sz(get_col<Is>(*this)), ...);
+	}
+	template <size_t ...Is> 
+	decltype(auto) inline 
+	get_record(std::index_sequence<Is...>, uint32_t i) {
+		return std::forward_as_tuple((get_col<Is>(*this)[i], ...));
+	}
+	template <size_t ...Is> 
+	void inline 
+	set_record(std::index_sequence<Is...>, tuple_type t) {
+		const auto& assign_field = 
+			[](auto& l, const auto& r){
+				l = r;
+			};
+		(assign_field(get_col<Is>(*this)[Is], std::get<Is>(t)), ...);
+	}
+	TableInfo<Types ...>* distinct() {
+		std::unordered_set<tuple_type> d_records;
+		std::make_index_sequence<sizeof...(Types)> seq;
+		
+		for (uint32_t j = 0; j < colrefs[0].size; ++j) {
+			d_records.insert(get_record(seq, j));
+		}
+		reserve(seq, d_records.size());
+		for (const auto& dr : d_records) {
+			set_record(seq, dr);
+		}
+		return this;
+	}
 	// defined in monetdb_conn.cpp
 	void monetdb_append_table(void* srv, const char* alt_name = nullptr);
 };
@@ -419,6 +468,7 @@ struct TableInfo {
 
 template<class ...Types>
 struct TableView {
+	typedef std::tuple<Types...> tuple_type;
 	const vector_type<uint32_t>* idxs;
 	const TableInfo<Types...>& info;
 	constexpr TableView(const vector_type<uint32_t>* idxs, const TableInfo<Types...>& info) noexcept : idxs(idxs), info(info) {}
diff --git a/server/types.h b/server/types.h
index db79abe..43d2d1a 100644
--- a/server/types.h
+++ b/server/types.h
@@ -69,7 +69,7 @@ namespace types {
 		time_t& fromString(const char*);
 		bool validate() const;
 		constexpr static unsigned string_length() {
-			return 13;
+			return 16;
 		};
 		char* toString(char* buf) const;
 		bool operator > (const time_t&) const;
diff --git a/server/utils.h b/server/utils.h
index cb58974..e54ed3c 100644
--- a/server/utils.h
+++ b/server/utils.h
@@ -1,5 +1,4 @@
 #pragma once
-#include <string>
 #include <ctime>
 #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L)
 constexpr static bool cpp_17 = true;
@@ -13,4 +12,6 @@ inline const char* str(const T& v) {
 template <>
 inline const char* str(const bool& v) {
 	return v ? "true" : "false";
-}
\ No newline at end of file
+}
+#include<string>
+extern std::string base62uuid(int l = 6);
\ No newline at end of file
diff --git a/server/vector_type.hpp b/server/vector_type.hpp
index c56d7c2..720db75 100644
--- a/server/vector_type.hpp
+++ b/server/vector_type.hpp
@@ -11,6 +11,8 @@
 #include <cstdint>
 #include <iterator>
 #include <initializer_list>
+#include <unordered_set>
+#include "hasher.h"
 #include "types.h"
 
 #pragma pack(push, 1)
@@ -107,6 +109,34 @@ public:
 		
 		return *this;
 	}
+	inline std::unordered_set<value_t> distinct_common(){
+		return std::unordered_set<value_t>(container, container + size);
+	}
+	vector_type<_Ty>& distinct_inplace(){
+		uint32_t i = 0;
+		for(const auto& v : distinct_common()){
+			container[i++] = v;
+		}
+		return *this;
+	}
+	vector_type<_Ty> distinct_copy(){
+		auto d_vals = distinct_common();
+		vector_type<_Ty> ret(d_vals.size());
+		uint32_t i = 0;
+		for(const auto& v : d_vals){
+			ret[i++] = v;
+		}
+		return ret;
+	}
+	uint32_t distinct_size(){
+		return distinct_common().size();
+	}
+	vector_type<_Ty> distinct(){
+		if (capacity)
+			return distinct_inplace();
+		else
+			return distinct_copy();
+	}
 	inline void grow() {
 		if (size >= capacity) { // geometric growth
 			uint32_t new_capacity = size + 1 + (size >> 1);
diff --git a/test.aquery b/test.aquery
index 84e4bea..f9cd33c 100644
Binary files a/test.aquery and b/test.aquery differ
diff --git a/tests/best_profit.a b/tests/best_profit.a
new file mode 100644
index 0000000..912399f
--- /dev/null
+++ b/tests/best_profit.a
@@ -0,0 +1,41 @@
+-- please run datagen.get_stock_data() to generate data/stock.csv first
+
+-- create table ticks(ID varchar(10), timestamp int, tradeDate date, price int);
+
+-- LOAD DATA INFILE "data/stock.csv"
+-- INTO TABLE ticks
+-- FIELDS TERMINATED BY ","
+
+-- SELECT max(price-mins(price))
+-- FROM   ticks    ASSUMING ASC timestamp
+-- WHERE  ID="S"
+--     AND  tradeDate='2003-01-10'
+
+-- create table base(ID varchar(10), name varchar(10));
+
+-- LOAD DATA INFILE "data/base.csv"
+-- INTO TABLE base
+-- FIELDS TERMINATED BY ","
+
+-- SELECT last(price)
+-- FROM    ticks t, base b
+--     ASSUMING ASC name, ASC timestamp
+-- WHERE   t.ID=b.ID
+--         AND name="x"
+
+create table TradedStocks(ID varchar(15), SeqNo int, TradeDate date, TimeStamp time, Type varchar(5));
+create table HistoricQuotes(ID varchar(15), TradeDate date, HighPrice real, LowPrice real, ClosePrice real, OpenPrice real, volume bigint);
+
+LOAD DATA INFILE "data/tick-price-file.csv"
+INTO TABLE TradedStocks
+FIELDS TERMINATED BY "|"
+LOAD DATA INFILE "data/hist-price-file.csv"
+INTO TABLE HistoricQuotes
+FIELDS TERMINATED BY "|"
+
+
+SELECT ts.ID, avgs(10, hq.ClosePrice)
+FROM   TradedStocks AS ts NATURAL JOIN
+       HistoricQuotes AS hq
+              ASSUMING ASC hq.TradeDate
+GROUP BY ts.ID
diff --git a/tests/datagen_jose/README b/tests/datagen_jose/README
new file mode 100644
index 0000000..65c798c
--- /dev/null
+++ b/tests/datagen_jose/README
@@ -0,0 +1,26 @@
+There are 2 programs included in this package:
+1. histgen - generation program for historical data
+2. tickgen - generation program for tick data
+
+histgen takes 2 parameters 
+	a) Scale-factor: Number of securities for which data is to be generated
+	b) Depth of history: The number of days for which data is to be generated
+and genertes 2 files
+	a) hist-base-file: contains all the static data
+	b) hist-price-file: contains the pricing info
+	c) hist-split-file: contains the split info
+
+tickgen also takes 2 parameters
+	a) Scale-factor: Number of securities for which data is to be generated
+	b) Ticks per day: A measure of the activity on a per security basis. e.g 100 would mean
+	                  that each security ticks about 100 time in a trading day
+and generates 2 files
+	a) tick-base-file
+	b) tick-price-file
+
+run the programs without args to see the parameters required.
+
+------------------
+Make of programs:
+
+run the make program. It requires the C++ compiler (CC) to be in your path.
diff --git a/tests/datagen_jose/RandGen.H b/tests/datagen_jose/RandGen.H
new file mode 100644
index 0000000..4b9522e
--- /dev/null
+++ b/tests/datagen_jose/RandGen.H
@@ -0,0 +1,61 @@
+// cmvc_id = %Z% %W% %I% %E% %U%
+
+#ifndef RandGenHEADER
+#define RandGenHEADER
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// Copyright (c) 1997 Morgan Stanley & Co. Incorporated, All Rights Reserved
+//
+// Unpublished copyright.  All rights reserved.  This material contains
+// proprietary information that shall be used or copied only within Morgan
+// Stanley, except with written permission of Morgan Stanley.
+//
+// Module Name   : RandGen.H
+// Version       : %Z% %W% %I% %E% %U%
+// Project       : DataGen
+// Description   : Random Number generator
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include <stdlib.h>
+#include <iostream>
+#include <sys/time.h>
+using namespace std;
+
+class RandNumGen
+{
+public:
+  RandNumGen(void)
+    {
+      struct timeval tv;
+      gettimeofday(&tv,0 );
+      srand(tv.tv_sec + tv.tv_usec);
+    }
+  
+  RandNumGen(unsigned long seed_)
+    {
+      srand(seed_);
+    }
+  
+  ~RandNumGen(){}
+
+  inline unsigned long operator() (void);
+  inline int operator() (int min_, int max_);
+};
+
+// Implementation of inline functions
+
+inline unsigned long RandNumGen::operator() (void)
+{
+  return rand();
+}
+
+inline int  RandNumGen::operator() (int min_, int  max_)
+{
+  unsigned long t = (*this)();
+  int r = max_ - min_;
+  return (min_ + t % r);
+}
+#endif
+
diff --git a/tests/datagen_jose/Time.C b/tests/datagen_jose/Time.C
new file mode 100644
index 0000000..5f852cb
--- /dev/null
+++ b/tests/datagen_jose/Time.C
@@ -0,0 +1,71 @@
+// cmvc_id = %Z% %W% %I% %E% %U%
+
+#ifndef TimeIMPLEMENTATION
+#define TimeIMPLEMENTATION
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// Copyright (c) 1997 Morgan Stanley & Co. Incorporated, All Rights Reserved
+//
+// Unpublished copyright.  All rights reserved.  This material contains
+// proprietary information that shall be used or copied only within Morgan
+// Stanley, except with written permission of Morgan Stanley.
+//
+// Module Name   : Time.C
+// Version       : %Z% %W% %I% %E% %U%
+// Project       : 
+// Description   : 
+//
+///////////////////////////////////////////////////////////////////////////////
+#include <stdio.h>
+#include "Time.H"
+
+Time::Time(char *startTime_)
+{
+  sscanf(startTime_,"%d:%d:%d", &_hrs, &_mins, &_secs);
+  cout << "Hrs: " << _hrs << ",Mins: " << _mins << ",Secs: " << _secs << endl;
+}
+
+Time::~Time()
+{}
+
+Time &Time::operator++ (int)
+{
+  _secs++;
+  adjust();
+  return *this;
+}
+
+void Time::adjust(void)
+{
+  if (_secs >= 60) 
+   {
+     _mins += _secs/60;
+     _secs = _secs%60;
+   }
+
+  if (_mins >= 60) 
+   {
+     _hrs += _mins/60;
+     _mins = _mins%60;
+   }
+  if (_hrs >= 24) _hrs = _hrs = _hrs%24;
+}
+
+ostream &operator<< (ostream &os_, Time &that_)
+{
+  if (that_.hrs() < 10) os_ << "0";
+  os_ << that_.hrs();
+  os_ << ":" ;
+
+  if (that_.mins() < 10) os_ << "0";
+  os_ << that_.mins();
+  os_ << ":" ;
+
+  if (that_.secs() < 10) os_ << "0";
+  os_ << that_.secs();
+  return os_;
+}
+
+
+#endif
diff --git a/tests/datagen_jose/Time.H b/tests/datagen_jose/Time.H
new file mode 100644
index 0000000..0f8fc16
--- /dev/null
+++ b/tests/datagen_jose/Time.H
@@ -0,0 +1,43 @@
+// cmvc_id = %Z% %W% %I% %E% %U%
+
+#ifndef TimeHEADER
+#define TimeHEADER
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// Copyright (c) 1997 Morgan Stanley & Co. Incorporated, All Rights Reserved
+//
+// Unpublished copyright.  All rights reserved.  This material contains
+// proprietary information that shall be used or copied only within Morgan
+// Stanley, except with written permission of Morgan Stanley.
+//
+// Module Name   : Time.H
+// Version       : %Z% %W% %I% %E% %U%
+// Project       : 
+// Description   : 
+//
+///////////////////////////////////////////////////////////////////////////////
+#include <iostream>
+
+using namespace std;
+
+class Time 
+{
+public:
+  Time(char *startTime_);
+  ~Time();
+  
+  Time &operator++(int);
+  void adjust(void);
+
+  int hrs(void) { return _hrs; }
+  int mins(void) { return _mins; }
+  int secs(void) { return _secs; }
+
+friend ostream &operator<< (ostream &os_, Time &that_);
+
+private:
+  int _hrs, _mins, _secs;
+};
+#endif
+
diff --git a/tests/datagen_jose/cal.C b/tests/datagen_jose/cal.C
new file mode 100644
index 0000000..cfff4e8
--- /dev/null
+++ b/tests/datagen_jose/cal.C
@@ -0,0 +1,79 @@
+#include <sys/types.h>
+#include <time.h>
+#include <iostream>
+
+#include "cal.H"
+using namespace std;
+
+Calendar::Calendar(void)
+{
+  time_t clk = time(0);
+  struct tm *now = localtime(&clk);
+  _currdate = asJulianNumber(now->tm_mon+1, now->tm_mday, now->tm_year+1900);
+}
+
+Calendar::~Calendar()
+{}
+
+// year_ in yyyy format
+unsigned int Calendar::asJulianNumber(int month_,int day_,int year_)
+{
+  unsigned long c,ya;
+  
+  if (month_>2) month_-=3;
+  else { month_+=9; year_--; } 
+  c=year_/100;
+  ya=year_-100*c;
+  return ((146097*c)>>2)+((1461*ya)>>2)+(153*month_+2)/5+day_+1721119;
+} 
+
+void Calendar::split(int& month_,int& day_,int& year_)
+{
+  unsigned long d;
+  unsigned long j=_currdate-1721119;
+  year_=(int) (((j<<2)-1)/146097);
+  j=(j<<2)-1-146097*year_;
+  d=(j>>2);
+  j=((d<<2)+3)/1461;
+  d=(d<<2)+3-1461*j;
+  d=(d+4)>>2;
+  month_=(int)(5*d-3)/153;
+  d=5*d-3-153*month_;
+  day_=(int)((d+5)/5);
+  year_=(int)(100*year_+j);
+  if (month_<10) month_+=3;
+  else { month_-=9; year_++; } 
+} 
+
+int Calendar::dayInWeek(void)
+{ 
+  return ((((_currdate+1)%7)+6)%7)+1; 
+}
+
+Calendar &Calendar::nextWeekday(void)
+{
+  (*this) += 1;
+  while (!isWeekday()) (*this)+= 1;
+  return *this;
+}
+
+int Calendar::isWeekday(void)
+{
+  return (dayInWeek()<6)?1:0;
+}
+
+Calendar &Calendar::operator+= (int incr_)
+{
+  _currdate += incr_;
+  return *this;
+}
+
+ostream &operator<< (ostream &os_, Calendar &that_)
+{
+  int mo, day, year;
+  that_.split(mo,day,year);
+  os_ << year << "-" << mo << "-" << day;
+  // the below is a pain for monetdb
+  //os_ << mo << "/" << day << "/" << year;
+  return os_;
+}
diff --git a/tests/datagen_jose/cal.H b/tests/datagen_jose/cal.H
new file mode 100644
index 0000000..78cf6cb
--- /dev/null
+++ b/tests/datagen_jose/cal.H
@@ -0,0 +1,26 @@
+#ifndef _CAL_H_
+#define _CAL_H_
+
+#include <iostream>
+using namespace std;
+
+class Calendar
+{
+public:
+  Calendar(void);
+  ~Calendar();
+
+  unsigned int asJulianNumber(int month_, int day_, int year_);
+  int isWeekday(void);
+  Calendar &operator+= (int incr_);
+
+friend ostream &operator<< (ostream &os_,Calendar &that_);
+
+  int dayInWeek(void);
+  Calendar &nextWeekday(void);
+  void split(int &mo_, int &day_, int &year_);  
+private:
+  unsigned int _currdate;
+};
+#endif
+
diff --git a/tests/datagen_jose/cal.cpp b/tests/datagen_jose/cal.cpp
new file mode 100644
index 0000000..56cd2c6
--- /dev/null
+++ b/tests/datagen_jose/cal.cpp
@@ -0,0 +1,76 @@
+#include <sys/types.h>
+#include <time.h>
+#include <iostream>
+
+#include "cal.H"
+
+Calendar::Calendar(void)
+{
+  time_t clk = time(0);
+  struct tm *now = localtime(&clk);
+  _currdate = asJulianNumber(now->tm_mon+1, now->tm_mday, now->tm_year+1900);
+}
+
+Calendar::~Calendar()
+{}
+
+// year_ in yyyy format
+unsigned int Calendar::asJulianNumber(int month_,int day_,int year_)
+{
+  unsigned long c,ya;
+  
+  if (month_>2) month_-=3;
+  else { month_+=9; year_--; } 
+  c=year_/100;
+  ya=year_-100*c;
+  return ((146097*c)>>2)+((1461*ya)>>2)+(153*month_+2)/5+day_+1721119;
+} 
+
+void Calendar::split(int& month_,int& day_,int& year_)
+{
+  unsigned long d;
+  unsigned long j=_currdate-1721119;
+  year_=(int) (((j<<2)-1)/146097);
+  j=(j<<2)-1-146097*year_;
+  d=(j>>2);
+  j=((d<<2)+3)/1461;
+  d=(d<<2)+3-1461*j;
+  d=(d+4)>>2;
+  month_=(int)(5*d-3)/153;
+  d=5*d-3-153*month_;
+  day_=(int)((d+5)/5);
+  year_=(int)(100*year_+j);
+  if (month_<10) month_+=3;
+  else { month_-=9; year_++; } 
+} 
+
+int Calendar::dayInWeek(void)
+{ 
+  return ((((_currdate+1)%7)+6)%7)+1; 
+}
+
+Calendar &Calendar::nextWeekday(void)
+{
+  (*this) += 1;
+  while (!isWeekday()) (*this)+= 1;
+  return *this;
+}
+
+int Calendar::isWeekday(void)
+{
+  return (dayInWeek()<6)?1:0;
+}
+
+Calendar &Calendar::operator+= (int incr_)
+{
+  _currdate += incr_;
+  return *this;
+}
+
+ostream &operator<< (ostream &os_, Calendar &that_)
+{
+  int mo, day, year;
+  that_.split(mo,day,year);
+  os_ << mo << "/" << day << "/" << year;
+  return os_;
+}
diff --git a/tests/datagen_jose/gen.C b/tests/datagen_jose/gen.C
new file mode 100644
index 0000000..aeb34f6
--- /dev/null
+++ b/tests/datagen_jose/gen.C
@@ -0,0 +1,58 @@
+// cmvc_id = %Z% %W% %I% %E% %U%
+
+#ifndef genIMPLEMENTATION
+#define genIMPLEMENTATION
+
+#include <iostream.h>
+#include "RandGen.H"
+
+int num[6];
+int nelems=0;
+
+int member(int a_)
+{
+  for (int i=0; i<nelems; i++) 
+   {
+     if (num[i] == a_) return 1;
+   }
+  return 0;
+}
+
+int gen(int flag_)
+{
+  RandNumGen rg;
+  if (flag_ == 0) 
+   {
+     for (int i=0;i<6;i++) num[i] = 0;
+     nelems=0;
+     return 0;
+   }
+
+  int rn;
+  while (1) 
+   {
+     rn = rg(1,52);
+     if (member(rn) == 0) break;
+   }
+
+  num[nelems++] = rn;
+  return rn;
+}
+
+int main(int ac, char *av[])
+{
+  if (ac < 2) 
+   {
+     cerr << "Usage: " << av[0] << " <number>" << endl;
+     return 1;
+   }
+  
+  int k = atoi(av[1]);
+  for(int i=0; i<k; i++)
+   {
+     gen(0);     
+     for (int j=0;j<6;j++) cout << gen(1) << " ";
+     cout << endl;
+   }
+}
+#endif
diff --git a/tests/datagen_jose/histgen.C b/tests/datagen_jose/histgen.C
new file mode 100644
index 0000000..e2ac660
--- /dev/null
+++ b/tests/datagen_jose/histgen.C
@@ -0,0 +1,198 @@
+// cmvc_id = %Z% %W% %I% %E% %U%
+
+#ifndef histgenIMPLEMENTATION
+#define histgenIMPLEMENTATION
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <iostream>
+#include <fstream>
+#include <sys/time.h>
+
+#include "RandGen.H"
+#include "cal.H"
+using namespace std;
+inline int max(int a, int b) 
+{
+  return (a>b)?a:b;
+}
+
+inline int min(int a, int b) 
+{
+  return (a<b)?a:b;
+}
+
+int main(int ac, char *av[])
+{
+  RandNumGen rg;
+  int i, j, d, k;
+  ofstream basefile("../../data/hist-base-file.csv");
+  ofstream pricefile("../../data/hist-price-file.csv");
+  ofstream splitfile("../../data/hist-split-file.csv");
+  ofstream dividendfile("../../data/hist-dividend-file.csv");
+
+  if (!basefile) 
+   {
+     cerr << "Cannot open base-file" << endl;
+     return 1;
+   }
+
+  if (!pricefile) 
+   {
+     cerr << "Cannot open price-file" << endl;
+     return 1;
+   }
+
+  if (!splitfile)
+   {
+     cerr << "Cannot open split-file" << endl;
+     return 1;
+   }
+   
+   if (!dividendfile)
+    {
+      cerr << "Cannot open dividend-file" << endl;
+      return 1;
+    }
+   
+   
+  
+  if (ac < 2) 
+   {
+     cerr << "Usage: " << av[0] << " <scale - no of elements> [depth - in days. Default = 4000days]" << endl;
+     return 1;
+   }
+
+  int scale=0, ndays = 4000;
+  if (ac >= 2) scale = atoi(av[1]);
+  if (ac >= 3) ndays = atoi(av[2]);
+
+  // Generation of base info
+  int nex = 5;
+  char *ex[] = { "NY", "O", "AM", "LN", "TK"};
+
+  int nsic = 10;
+  char *sic[] = { "COMPUTERS", "CHEMICALS", "FINANCIAL", "INDUSTRIAL", "PHARMACEUTICALS",
+                       "MEDICAL", "BANKING", "SOFTWARE", "ENTERTAINMENT", "CONSTRUCTION" };
+  
+  char *cu[] = { "USD", "DEM", "JPY", "FFR", "GBP"};
+  int ncu = 5;
+  
+  char *spr[] = { "AAA", "AA", "A", "BBB", "BB", "B", "CCC", "CC", "C"};
+  int nspr = 9;
+  
+
+  unsigned int rnum;
+  char id[100];
+  char descr[256];
+  // the below is a pain with monetdb, changing to better date format
+  //char *crdate = "3/11/1999";
+  char *crdate = "1999-11-03";
+  
+  basefile << "Id|Ex|Descr|SIC|SPR|Cu|CreateDate" << endl;
+  for (i=0; i<scale; i++) 
+   {
+     sprintf(id,"Security_%d", i);
+     sprintf(descr, "'Financial security number: %d'", i);
+     
+     basefile << id;
+     basefile << "|" << ex[rg(0,nex)];
+     basefile << "|" << descr;
+     basefile << "|" << sic[rg(0,nsic)];
+     basefile << "|" << spr[rg(0,nspr)];
+     basefile << "|" << cu[rg(0, ncu)];
+     basefile << "|" << crdate;
+     basefile << endl;
+   }
+
+  basefile.close();
+
+  
+  // generation of price info
+  double *minop = new double[scale];
+  for (i=0; i<scale; i++) minop[i] = 0.0;
+
+  double *op = new double[scale];
+  for (i=0; i<scale; i++) op[i] = 0.0;
+  
+  unsigned long *vs = new unsigned long[scale];
+  for (i=0; i<scale; i++) vs[i] = 0;
+
+  double cp, hp, lp;
+  Calendar cal;
+  
+  pricefile << "Id|TradeDate|HighPrice|LowPrice|ClosePrice|OpenPrice|Volume" << endl;
+  splitfile << "Id|SplitDate|EntryDate|SplitFactor" << endl;
+  dividendfile << "Id|XdivDate|DivAmt|AnnounceDate" << endl;
+  
+  for (d=0;d<ndays; d++)
+   {
+     cal.nextWeekday();
+     for (k=0;k<scale;k++)
+      {
+        sprintf(id,"Security_%d", k);
+
+        if (op[k]==0.0) op[k] = rg(0,100);
+        if (minop[k]==0.0) minop[k] = op[k];
+        
+        if (vs[k]==0) vs[k] = rg();
+        else vs[k] = vs[k]*(100.0+rg(-10,+10))/100.0;
+
+        int skew = rg(0,+2);
+        double f = (100.0 + rg(-2,3+skew))/100.0;
+        cp = op[k] * f;
+        hp = max(op[k], cp) * (100.0+rg(0,+10))/100.0;
+        lp = min(op[k], cp) * (100.0-rg(0,+10))/100.0;
+
+        pricefile << id;
+        pricefile << "|" << cal;
+        pricefile << "|" << hp;
+        pricefile << "|" << lp;
+        pricefile << "|" << cp;
+        pricefile << "|" << op[k];
+        pricefile << "|" << vs[k];
+        pricefile << endl;
+        
+        op[k] = cp;
+
+
+        // check splits
+        if (op[k] > 2.0*minop[k]) 
+         {
+           int splitfactor = rg(1,4);
+           op[k] /= (double)splitfactor;
+           vs[k] *= splitfactor;
+           
+           splitfile << id;
+           splitfile << "|" << cal;
+           splitfile << "|" << cal;
+           splitfile << "|" << splitfactor;
+           splitfile << endl;
+         }
+         
+         // check dividends
+         if (op[k] > minop[k]) 
+          {
+            // dividend as a fraction of current closing price
+            double dividend = (rg(1, 100) / 100.0) * cp;
+           
+            dividendfile << id;
+            dividendfile << "|" << cal;
+            dividendfile << "|" << dividend;
+            // assumes announced and disbursed same day, 
+            // queries can be trivially modified to do away with this assumption
+            dividendfile << "|" << cal; 
+            dividendfile << endl;
+          }
+      }
+      
+      
+   }
+  splitfile.close();
+  pricefile.close();
+  dividendfile.close();
+}
+
+
+#endif
diff --git a/tests/datagen_jose/makefile b/tests/datagen_jose/makefile
new file mode 100644
index 0000000..a4c16cc
--- /dev/null
+++ b/tests/datagen_jose/makefile
@@ -0,0 +1,20 @@
+.PHONY: clean
+
+all: histgen tickgen
+
+clean:
+	rm -rf *.o histgen tickgen
+	
+%.o: %.C
+	g++-12 -Ofast -march=native -g -c $<
+
+tickgen: cal.o Time.o tickgen.o
+	g++-12 -lstdc++ -Ofast -march=native -flto -o tickgen cal.o Time.o tickgen.o
+
+histgen: cal.o histgen.o 
+	g++-12 -lstdc++ -Ofast -flto -march=native -o histgen cal.o histgen.o
+
+timetest: Time.o timetest.o
+	g++-12 -lstdc++ -g -o timetest Time.o timetest.o
+
+
diff --git a/tests/datagen_jose/tickgen.C b/tests/datagen_jose/tickgen.C
new file mode 100644
index 0000000..d332633
--- /dev/null
+++ b/tests/datagen_jose/tickgen.C
@@ -0,0 +1,197 @@
+// cmvc_id = %Z% %W% %I% %E% %U%
+
+#ifndef histgenIMPLEMENTATION
+#define histgenIMPLEMENTATION
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <iostream>
+#include <fstream>
+#include <sys/time.h>
+
+#include "RandGen.H"
+#include "cal.H"
+#include "Time.H"
+
+using namespace std;
+
+inline int max(int a, int b) 
+{
+  return (a>b)?a:b;
+}
+
+inline int min(int a, int b) 
+{
+  return (a<b)?a:b;
+}
+
+int main(int ac, char *av[])
+{
+  RandNumGen rg;
+  int i, j, d, k;
+  ofstream basefile("../../data/tick-base-file.csv");
+  ofstream pricefile("../../data/tick-price-file.csv");
+  
+
+  if (!basefile) 
+   {
+     cerr << "Cannot open base-file" << endl;
+     return 1;
+   }
+
+  if (!pricefile) 
+   {
+     cerr << "Cannot open price-file" << endl;
+     return 1;
+   }
+
+  if (ac < 2) 
+   {
+     cerr << "Usage: " << av[0] << " <n -scale> [t -ticks per day.Default=100] [d - no of days.Default=90]" << endl;
+     return 1;
+   }
+  
+  int n=0,t=100,days=90;
+  
+  if (ac >= 2) n = atoi(av[1]);
+  if (ac >= 3) t = atoi(av[2]);
+  if (ac >= 4) days = atoi(av[3]);
+     
+
+  int tps = (n*t)/28800; // ticks per second
+  tps++;
+  cout << "Ticks per second: " << tps << endl;
+
+  // Generation of base info
+  int nex = 5;
+  char *ex[] = { "NY", "O", "AM", "LN", "TK"};
+
+  int nsic = 10;
+  char *sic[] = { "COMPUTERS", "CHEMICALS", "FINANCIAL", "INDUSTRIAL", "PHARMACEUTICALS",
+                       "MEDICAL", "BANKING", "SOFTWARE", "ENTERTAINMENT", "CONSTRUCTION" };
+  
+  char *cu[] = { "USD", "DEM", "JPY", "FFR", "GBP"};
+  int ncu = 5;
+  
+  char *spr[] = { "AAA", "AA", "A", "BBB", "BB", "B", "CCC", "CC", "C"};
+  int nspr = 9;
+  
+
+  unsigned int rnum;
+  char id[100];
+  char descr[256];
+  char *crdate = "3/11/1999";
+
+  basefile << "Id | Ex | Descr | SIC | Cu" << endl;
+  
+  for (i=0; i<n; i++) 
+   {
+     sprintf(id,"Security_%d", i);
+     sprintf(descr, "'Financial security number: %d'", i);
+     
+     basefile << id;
+     basefile << " | " << ex[rg(0,nex)];
+     basefile << " | " << descr;
+     basefile << " | " << sic[rg(0,nsic)];
+     basefile << " | " << cu[rg(0, ncu)];
+     basefile << endl;
+   }
+  basefile.close();
+
+  
+  // generation of price info
+  double tick=1.0/32.0;
+  
+  // 1. gen the starting price of each security
+  double *bp = new double[n];
+  int *seq = new int[n];
+  
+  for (i=0;i<n;i++)
+   {
+     bp[i] = rg(0,100);
+     seq[i] = 0;
+   }
+  
+  Calendar cal;
+  Time tm("9:00:00");
+  cal.nextWeekday();
+  
+  pricefile << "Id | SeqNo | TradeDate | TimeStamp | Type" << endl;
+  
+  for (k=0;k<days; k++)
+   {
+     // for each second of the business day - 8*60*60
+     for(i=0;i<28800;i++) 
+      {
+        // generate the required ticks
+        for(j=0;j<tps;j++) 
+         {
+           //1. select a security
+           int sec = rg(0,n);
+           sprintf(id, "Security_%d", sec);
+           
+           //2.  select  if it is a trade, ask, bid
+           int tqb = rg(0,4);
+           switch (tqb) 
+            {
+            case 0: // trade
+            {
+              double tp = bp[sec];
+              int ts = rg(1,100) * 100;
+              pricefile << id;
+              pricefile << "|" << ++seq[sec];
+              pricefile << "|" << cal;
+              pricefile << "|" << tm;
+              pricefile << "|T" << endl;
+              break;
+            }
+            case 1: // ask
+            {
+              double ap = rg(0,4)*tick+bp[sec];
+              int as = rg(1,100) * 100;
+              pricefile << id;
+              pricefile << "|" << ++seq[sec];
+              pricefile << "|" << cal;
+              pricefile << "|" << tm;
+              pricefile << "|Q" << endl;
+              break;
+            }
+            case 2: // bid
+            {
+              int dir = (rg(3,10) > 5)? +1:-1;
+              bp[sec] = (dir*rg(0,3)*tick)+bp[sec];
+              int bs = rg(1,100) * 100;
+              pricefile << id;
+              pricefile << "|" << ++seq[sec];
+              pricefile << "|" << cal;
+              pricefile << "|" << tm;
+              pricefile << "|Q" << endl;
+              break;
+            }
+            case 3: // cancel/correct as a trade
+            {
+              if (rg(0,100)  < 5) break;
+              double tp = bp[sec];
+              int ts = rg(1,100) * 100;
+              pricefile << id;
+              pricefile << "|" << ++seq[sec];
+              pricefile << "|" << cal;
+              pricefile << "|" << tm;
+              pricefile << "|CT" << endl;
+              break;
+            }
+            default:
+              break;
+            }
+         }
+        // Go to the next second
+        tm++;
+      }         
+     cal.nextWeekday();
+   }
+  pricefile.close();
+}
+
+
+#endif
diff --git a/tests/network.a b/tests/network.a
index 78ecc48..922fec4 100644
--- a/tests/network.a
+++ b/tests/network.a
@@ -5,7 +5,7 @@ LOAD DATA INFILE "data/network.csv"
 INTO TABLE network
 FIELDS TERMINATED BY ","
 
-SELECT	 src, dst, avg(len)
+SELECT	 src, dst, count(*), avg(len)
 FROM	 network
 	     ASSUMING ASC src, ASC dst, ASC _time 
 GROUP BY src, dst, sums (deltas(_time) > 120)
diff --git a/tests/stock.a b/tests/stock.a
index a2ba8b5..ff7b6df 100644
--- a/tests/stock.a
+++ b/tests/stock.a
@@ -30,3 +30,4 @@ SELECT price, timestamp FROM stocks where price - timestamp > 1 and not (price*t
 SELECT max(price-mins(price))
 FROM stocks
    ASSUMING DESC timestamp
+