Bug fixes for alias&join. Add test in presentation.

2 years ago · ff63be720c
parent 2614d010da
commit ff63be720c
24 changed files with 1131 additions and 142 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,6 @@
+tests/datagen_jose/histgen
+tests/datagen_jose/tickgen
+datagen
 *.dSYM
 testmain.lib
 testmain.exp
--- a/aquery_config.py
+++ b/aquery_config.py
@ -2,7 +2,7 @@

 ## GLOBAL CONFIGURATION FLAGS

-version_string = '0.4.4a'
+version_string = '0.4.5a'
 add_path_to_ldpath = True
 rebuild_backend = False
 run_backend = True
--- a/aquery_parser/keywords.py
+++ b/aquery_parser/keywords.py
@ -44,6 +44,7 @@ LEFT = keyword("left")
 LIKE = keyword("like")
 LIMIT = keyword("limit").suppress()
 MINUS = keyword("minus")
+NATURAL = keyword("natural")
 OFFSET = keyword("offset").suppress()
 ON = keyword("on").suppress()
 ORDER = keyword("order").suppress()
@ -145,7 +146,7 @@ VIEW = keyword("view")

 joins = (
    (
-        Optional(CROSS | OUTER | INNER | ((FULL | LEFT | RIGHT) + Optional(INNER | OUTER)))
+        Optional(CROSS | OUTER | INNER | NATURAL | ((FULL | LEFT | RIGHT) + Optional(INNER | OUTER)))
        + JOIN
        + Optional(LATERAL)
    )
@ -214,6 +215,7 @@ RESERVED = MatchFirst([
    LIKE,
    LIMIT,
    MINUS,
+    NATURAL,
    NOCASE,
    NOT,
    NULL,
@ -253,6 +255,7 @@ EQ = Char("=").suppress()

 join_keywords = {
    "join",
+    "natural join",
    "full join",
    "cross join",
    "inner join",
--- a/aquery_parser/sql_parser.py
+++ b/aquery_parser/sql_parser.py
@ -323,9 +323,12 @@ def parser(literal_string, ident, sqlserver=False):

        table_source = Forward()

+        assumption = Group((ASC|DESC) ("sort") + var_name("value"))
+        assumptions = Optional(ASSUMING.suppress() + Group(delimited_list(assumption)))
+
        join = (
            Group(joins)("op")
-            + table_source("join")
+            + (table_source )("join")
            + Optional((ON + expr("on")) | (USING + expr("using")))
            | (
                Group(WINDOW)("op")
@ -403,7 +406,12 @@ def parser(literal_string, ident, sqlserver=False):
            | selection
            + Optional(INTO + table_source("into"))
            + Optional(
-                (FROM + delimited_list(table_source) + ZeroOrMore(join))("from")
+                (
+                    FROM 
+                    + (delimited_list(table_source) 
+                    + ZeroOrMore(join))("table_source")
+                    + Optional(assumptions) ("assumptions")
+                )("from")
                + Optional(WHERE + expr("where"))
                + Optional(GROUP_BY + delimited_list(Group(named_column))("groupby"))
                + Optional(HAVING + expr("having"))
@ -443,12 +451,8 @@ def parser(literal_string, ident, sqlserver=False):
            + RB,
        )

-        assumption = Group((ASC|DESC) ("sort") + var_name("value"))
-        assumptions = (ASSUMING + Group(delimited_list(assumption))("assumptions"))
-
        table_source << Group(
            ((LB + query + RB) | stack | call_function | var_name)("value")
-            + Optional(assumptions)
            + Optional(flag("with ordinality"))
            + Optional(tablesample)
            + alias
--- a/datagen.cpp
+++ b/datagen.cpp
@ -37,7 +37,7 @@ void permutation(int *v, int n) {
    }
 }

-int main(int argc, char* argv[])
+int gen_trade_data(int argc, char* argv[])
 {
    using std::vector;
    float frac = .3;
@ -108,3 +108,48 @@ int main(int argc, char* argv[])
    fclose(fp);
    return 0;
 }
+#include "./server/utils.h"
+#include "./server/types.h"
+#include <string>
+types::date_t rand_date(){
+    unsigned char d = ui(engine) % 28 + 1;
+    unsigned char m = ui(engine) % 12 + 1;
+    short y = ui(engine) % 40 + 1990;
+    if (ui(engine) % 2) return types::date_t((unsigned char)10, (unsigned char)1, 2003);
+    return types::date_t{d, m, y};
+}
+int gen_stock_data(int argc, char* argv[]){
+    using std::string;
+    using namespace types;
+    int n_stocks = 5;
+    int n_data = 1000;
+    string* IDs = new string[n_stocks + 1];
+    string* names = new string[n_stocks + 1];
+    for(int i = 0; i < n_stocks; ++i){
+        IDs[i] = base62uuid();
+        names[i] = base62uuid();
+    }
+    IDs[n_stocks] = "S";
+    names[n_stocks] = "x";
+    FILE* fp = fopen("./data/stock.csv", "w");
+    fprintf(fp, "ID, timestamp, tradeDate, price\n");
+    char date_str_buf [types::date_t::string_length()];
+    int* timestamps = new int[n_data];
+    for(int i = 0; i < n_data; ++i) timestamps[i] = i+1;
+    permutation(timestamps, n_data);
+    for(int i = 0; i < n_data; ++i){
+        auto date = rand_date().toString(date_str_buf + date_t::string_length());
+        fprintf(fp, "%s,%d,%s,%d\n", IDs[ui(engine)%(n_stocks + 1)].c_str(), timestamps[i], date, ui(engine) % 1000);
+    }
+    fclose(fp);
+    fp = fopen("./data/base.csv", "w");
+    fprintf(fp, "ID, name\n");
+    for(int i = 0; i < n_stocks + 1; ++ i){
+        fprintf(fp, "%s,%s\n", IDs[i].c_str(), names[i].c_str());
+    }
+    fclose(fp);
+}
+
+int main(int argc, char* argv[]){
+    gen_stock_data(argc, argv);
+}
--- a/engine/projection.py
+++ b/engine/projection.py
@ -29,7 +29,7 @@ class projection(ast_node):
    def spawn(self, node):
        self.datasource = None
        if 'from' in node:
-            from_clause = node['from']
+            from_clause = node['from']['table_source']
            if type(from_clause) is list:
                # from joins
                join(self, from_clause)
@ -47,8 +47,8 @@ class projection(ast_node):
                        self.datasource = self.context.tables_byname[value]
                    if 'name' in value:
                        self.datasource.add_alias(value['name'])
-                if 'assumptions' in from_clause:
-                    self.assumptions = enlist(from_clause['assumptions'])
+                if 'assuming' in node['from']:
+                    self.assumptions = enlist(node['from']['assuming'])
                    
            elif type(from_clause) is str:
                self.datasource = self.context.tables_byname[from_clause]
--- a/reconstruct/ast.py
+++ b/reconstruct/ast.py
@ -73,7 +73,7 @@ class projection(ast_node):
        self.datasource = join(self, [], self.context) # datasource is Join instead of TableInfo
        self.assumptions = []
        if 'from' in node:
-            from_clause = node['from']
+            from_clause = node['from']['table_source']
            self.datasource = join(self, from_clause)
            if 'assumptions' in from_clause:
                self.assumptions = enlist(from_clause['assumptions'])
@ -129,12 +129,17 @@ class projection(ast_node):
                    if not proj_expr.is_special:
                        y = lambda x:x
                        name = eval('f\'' + name + '\'')
+                        offset = len(col_exprs)
                        if name not in self.var_table:
-                            self.var_table[name] = len(col_exprs)
-                        proj_map[i] = [this_type, len(col_exprs), proj_expr]
+                            self.var_table[name] = offset
+                            if proj_expr.is_ColExpr and type(proj_expr.raw_col) is ColRef:
+                                for n in (proj_expr.raw_col.table.alias):
+                                    self.var_table[f'{n}.'+name] = offset
+                        proj_map[i] = [this_type, offset, proj_expr]
                        col_expr = name + ' AS ' + alias if alias else name
                        if alias:
-                            self.var_table[alias] = len(col_exprs)
+                            self.var_table[alias] = offset
+                            
                        col_exprs.append((col_expr, proj_expr.type))
                    else:
                        self.context.headers.add('"./server/aggregations.h"')
@ -164,10 +169,12 @@ class projection(ast_node):
        self.add(', '.join([c[0] for c in col_exprs] + col_ext_names))
    
        _base_offset = len(col_exprs)
-        for i, col in enumerate(col_ext_names):
-            if col not in self.var_table:
-                self.var_table[col] = i + _base_offset
-    
+        for i, col in enumerate(self.col_ext):
+            if col.name not in self.var_table:
+                offset = i + _base_offset
+                self.var_table[col.name] = offset
+                for n in (col.table.alias):
+                    self.var_table[f'{n}.'+col.name] = offset
    
        def finialize(astnode:ast_node):
            if(astnode is not None):
@ -548,24 +555,25 @@ class join(ast_node):
        self.tables_dir = dict()
        self.rec = None
        self.top_level = self.parent and type(self.parent) is projection
+        self.have_sep = False
        # self.tmp_name = 'join_' + base62uuid(4)
        # self.datasource = TableInfo(self.tmp_name, [], self.context)
    def append(self, tbls, __alias = ''):
-        alias = lambda t : '(' + t + ') ' + __alias if len(__alias) else t
+        alias = lambda t : t + ' ' + __alias if len(__alias) else t
        if type(tbls) is join:
-            self.joins.append(alias(tbls.__str__()))
+            self.joins.append((alias(tbls.__str__()), tbls.have_sep))
            self.tables += tbls.tables
            self.tables_dir = {**self.tables_dir, **tbls.tables_dir}
            
        elif type(tbls) is TableInfo:
-            self.joins.append(alias(tbls.table_name))
+            self.joins.append((alias(tbls.table_name), False))
            self.tables.append(tbls)
            self.tables_dir[tbls.table_name] = tbls
            for a in tbls.alias:
                self.tables_dir[a] = tbls
            
        elif type(tbls) is projection:
-            self.joins.append(alias(tbls.finalize()))
+            self.joins.append((alias(tbls.finalize()), False))
            
    def produce(self, node):
        if type(node) is list:
@ -589,13 +597,14 @@ class join(ast_node):
                        tbl.add_alias(node['name'])
                self.append(tbl, alias)
            else:
-                keys = node.keys()
+                keys = list(node.keys())
                if keys[0].lower().endswith('join'):
+                    self.have_sep = True
                    j = join(self, node[keys[0]])
                    tablename = f' {keys[0]} {j}'
-                    if keys[1].lower() == 'on':
+                    if len(keys) > 1 and keys[1].lower() == 'on':
                        tablename += f' on {expr(self, node[keys[1]])}' 
-                    self.joins.append(tablename)
+                    self.joins.append((tablename, self.have_sep))
                    self.tables += j.tables
                    self.tables_dir = {**self.tables_dir, **j.tables_dir}
                
@ -622,18 +631,27 @@ class join(ast_node):
            if datasource is None:
                raise ValueError(f'Table name/alias not defined{parsedColExpr[0]}')
            else:
-                return datasource.parse_col_names(parsedColExpr[1])
+                datasource.rec = self.rec
+                ret = datasource.parse_col_names(parsedColExpr[1])
+                datasource.rec = None
+                return ret
+                
    @property
    def all_cols(self):
        return set([c for t in self.tables for c in t.columns])
    def consume(self, node):
-        self.sql = ', '.join(self.joins)
+        self.sql = ''
+        for j in self.joins:
+            if not self.sql or j[1]:
+                self.sql += j[0]
+            else:
+                self.sql += ', ' + j[0]        
        if node and self.sql and self.top_level:
            self.sql = ' FROM ' + self.sql 
        return super().consume(node)
    
    def __str__(self):
-        return ', '.join(self.joins)
+        return self.sql
    def __repr__(self):
        return self.__str__()
    
--- a/reconstruct/expr.py
+++ b/reconstruct/expr.py
@ -87,116 +87,119 @@ class expr(ast_node):
        from reconstruct.ast import udf
        
        if type(node) is dict:
-            if len(node) > 1:
-                print(f'Parser Error: {node} has more than 1 dict entry.')
+            if 'literal' in node:
+                node = node['literal']
+            else:
+                if len(node) > 1:
+                    print(f'Parser Error: {node} has more than 1 dict entry.')
                
-            for key, val in node.items():
-                if key in self.operators:
-                    if key in builtin_func:
-                        if self.is_agg_func:
-                            self.root.is_special = True # Nested Aggregation
-                        else:
-                            self.is_agg_func = True
-                    
-                    op = self.operators[key]
-                    count_distinct = False
-                    if key == 'count' and type(val) is dict and 'distinct' in val:
-                        count_distinct = True
-                        val = val['distinct']
-                    val = enlist(val)
-                    exp_vals = [expr(self, v, c_code = self.c_code) for v in val]
-                    self.children = exp_vals
-                    self.opname = key
-                    
-                    str_vals = [e.sql for e in exp_vals]
-                    type_vals = [e.type for e in exp_vals]
-                    is_compound = any([e.is_compound for e in exp_vals])
-                    if key in self.ext_aggfuncs:
-                        self.is_compound = False
-                    else:
-                        self.is_compound = is_compound
-                    try:
-                        self.type = op.return_type(*type_vals)
-                    except AttributeError as e:
-                        if type(self.root) is not udf:
-                            # TODO: do something when this is not an error
-                            # print(f'alert: {e}')
-                            pass
-                        self.type = AnyT
-                        
-                    if count_distinct: # inject distinct col later
-                        self.sql = f'{{{op(self.c_code, *str_vals, True)}}}'
-                    else:
-                        self.sql = op(self.c_code, *str_vals)
+                for key, val in node.items():
+                    if key in self.operators:
+                        if key in builtin_func:
+                            if self.is_agg_func:
+                                self.root.is_special = True # Nested Aggregation
+                            else:
+                                self.is_agg_func = True
                        
-                    special_func = [*self.context.udf_map.keys(), *self.context.module_map.keys(), 
-                                    "maxs", "mins", "avgs", "sums", "deltas"]
-                    if self.context.special_gb:
-                        special_func = [*special_func, *self.ext_aggfuncs]
+                        op = self.operators[key]
+                        count_distinct = False
+                        if key == 'count' and type(val) is dict and 'distinct' in val:
+                            count_distinct = True
+                            val = val['distinct']
+                        val = enlist(val)
+                        exp_vals = [expr(self, v, c_code = self.c_code) for v in val]
+                        self.children = exp_vals
+                        self.opname = key
                        
-                    if key in special_func and not self.is_special:
-                        self.is_special = True
-                        if key in self.context.udf_map:
-                            self.root.udf_called = self.context.udf_map[key]
-                            if self.is_udfexpr and key == self.root.udf.name:
-                                self.root.is_recursive_call_inudf = True
-                        elif key in user_module_func.keys():
-                            udf.try_init_udf(self.context)                           
-                        # TODO: make udf_called a set!
-                        p = self.parent
-                        while type(p) is expr and not p.udf_called:
-                            p.udf_called = self.udf_called
-                            p = p.parent
-                        p = self.parent
-                        while type(p) is expr and not p.is_special:
-                            p.is_special = True
-                            p = p.parent
+                        str_vals = [e.sql for e in exp_vals]
+                        type_vals = [e.type for e in exp_vals]
+                        is_compound = any([e.is_compound for e in exp_vals])
+                        if key in self.ext_aggfuncs:
+                            self.is_compound = False
+                        else:
+                            self.is_compound = is_compound
+                        try:
+                            self.type = op.return_type(*type_vals)
+                        except AttributeError as e:
+                            if type(self.root) is not udf:
+                                # TODO: do something when this is not an error
+                                # print(f'alert: {e}')
+                                pass
+                            self.type = AnyT
+                            
+                        if count_distinct: # inject distinct col later
+                            self.sql = f'{{{op(self.c_code, *str_vals, True)}}}'
+                        else:
+                            self.sql = op(self.c_code, *str_vals)
+                            
+                        special_func = [*self.context.udf_map.keys(), *self.context.module_map.keys(), 
+                                        "maxs", "mins", "avgs", "sums", "deltas", "last"]
+                        if self.context.special_gb:
+                            special_func = [*special_func, *self.ext_aggfuncs]
+                            
+                        if key in special_func and not self.is_special:
+                            self.is_special = True
+                            if key in self.context.udf_map:
+                                self.root.udf_called = self.context.udf_map[key]
+                                if self.is_udfexpr and key == self.root.udf.name:
+                                    self.root.is_recursive_call_inudf = True
+                            elif key in user_module_func.keys():
+                                udf.try_init_udf(self.context)                           
+                            # TODO: make udf_called a set!
+                            p = self.parent
+                            while type(p) is expr and not p.udf_called:
+                                p.udf_called = self.udf_called
+                                p = p.parent
+                            p = self.parent
+                            while type(p) is expr and not p.is_special:
+                                p.is_special = True
+                                p = p.parent

-                    need_decltypestr = any([e.need_decltypestr for e in exp_vals])        
-                    if need_decltypestr or (self.udf_called and type(op) is udf):
-                        decltypestr_vals = [e.udf_decltypecall for e in exp_vals]
-                        self.udf_decltypecall = op(self.c_code, *decltypestr_vals)
+                        need_decltypestr = any([e.need_decltypestr for e in exp_vals])        
+                        if need_decltypestr or (self.udf_called and type(op) is udf):
+                            decltypestr_vals = [e.udf_decltypecall for e in exp_vals]
+                            self.udf_decltypecall = op(self.c_code, *decltypestr_vals)

-                        if self.udf_called and type(op) is udf:
-                            self.udf_decltypecall = op.decltypecall(self.c_code, *decltypestr_vals)
-            
-                elif self.is_udfexpr:
-                    var_table = self.root.udf.var_table
-                    vec = key.split('.')
-                    _vars = [*var_table, *self.builtin_vars]
-                    def get_vname (node):
-                        if node in self.builtin_vars:
-                            self.root.udf.builtin[node].enabled = True
-                            self.builtin_var = node
-                            return node
+                            if self.udf_called and type(op) is udf:
+                                self.udf_decltypecall = op.decltypecall(self.c_code, *decltypestr_vals)
+                
+                    elif self.is_udfexpr:
+                        var_table = self.root.udf.var_table
+                        vec = key.split('.')
+                        _vars = [*var_table, *self.builtin_vars]
+                        def get_vname (node):
+                            if node in self.builtin_vars:
+                                self.root.udf.builtin[node].enabled = True
+                                self.builtin_var = node
+                                return node
+                            else:
+                                return var_table[node]
+                        if vec[0] not in _vars:
+                            # print(f'Use of undefined variable {vec[0]}')
+                            # TODO: do something when this is not an error
+                            pass
                        else:
-                            return var_table[node]
-                    if vec[0] not in _vars:
-                        # print(f'Use of undefined variable {vec[0]}')
-                        # TODO: do something when this is not an error
-                        pass
+                            vname = get_vname(vec[0])
+                            val = enlist(val)
+                            if(len(val) > 2):
+                                print('Warning: more than 2 indexes found for subvec operator.')
+                            ex = [expr(self, v, c_code = self.c_code) for v in val]
+                            idxs = ', '.join([e.sql for e in ex])
+                            self.sql = f'{vname}.subvec({idxs})'
+                            if any([e.need_decltypestr for e in ex]):
+                                self.udf_decltypecall = f'{vname}.subvec({[", ".join([e.udf_decltypecall for e in ex])]})'
+                        if key == 'get' and len(val) > 1:
+                            ex_vname = expr(self, val[0], c_code=self.c_code)
+                            self.sql = f'{ex_vname.sql}[{expr(self, val[1], c_code=self.c_code).sql}]'
+                            if hasattr(ex_vname, 'builtin_var'):
+                                if not hasattr(self, 'builtin_var'):
+                                    self.builtin_var = []
+                                self.builtin_var = [*self.builtin_var, *ex_vname.builtin_var]
+                                self.udf_decltypecall = ex_vname.sql
                    else:
-                        vname = get_vname(vec[0])
-                        val = enlist(val)
-                        if(len(val) > 2):
-                            print('Warning: more than 2 indexes found for subvec operator.')
-                        ex = [expr(self, v, c_code = self.c_code) for v in val]
-                        idxs = ', '.join([e.sql for e in ex])
-                        self.sql = f'{vname}.subvec({idxs})'
-                        if any([e.need_decltypestr for e in ex]):
-                            self.udf_decltypecall = f'{vname}.subvec({[", ".join([e.udf_decltypecall for e in ex])]})'
-                    if key == 'get' and len(val) > 1:
-                        ex_vname = expr(self, val[0], c_code=self.c_code)
-                        self.sql = f'{ex_vname.sql}[{expr(self, val[1], c_code=self.c_code).sql}]'
-                        if hasattr(ex_vname, 'builtin_var'):
-                            if not hasattr(self, 'builtin_var'):
-                                self.builtin_var = []
-                            self.builtin_var = [*self.builtin_var, *ex_vname.builtin_var]
-                            self.udf_decltypecall = ex_vname.sql
-                else:
-                    print(f'Undefined expr: {key}{val}')
+                        print(f'Undefined expr: {key}{val}')

-        elif type(node) is str:
+        if type(node) is str:
            if self.is_udfexpr:
                curr_udf : udf = self.root.udf
                var_table = curr_udf.var_table
@ -231,14 +234,29 @@ class expr(ast_node):
                    self.raw_col = self.raw_col if type(self.raw_col) is ColRef else None
                if self.raw_col is not None:
                    self.is_ColExpr = True
-                    self.sql = self.raw_col.name
+                    table_name = ''
+                    if '.' in node:
+                        table_name = self.raw_col.table.table_name
+                        if self.raw_col.table.alias:
+                            alias = iter(self.raw_col.table.alias)
+                            try:
+                                a = next(alias)
+                                while(not a or a == table_name):
+                                    a = next(alias)
+                                if (a and a != table_name):
+                                    table_name = a
+                            except StopIteration:
+                                pass
+                    if table_name:
+                        table_name = table_name + '.'
+                    self.sql = table_name + self.raw_col.name
                    self.type = self.raw_col.type
                    self.is_compound = True
                    self.opname = self.raw_col
                else:
-                    self.sql = node
+                    self.sql = '\'' + node + '\''
                    self.type = StrT
-                    self.opname = node
+                    self.opname = self.sql
                if self.c_code and self.datasource is not None:
                    self.sql = f'{{y(\"{self.sql}\")}}'
        elif type(node) is bool:
@ -248,7 +266,7 @@ class expr(ast_node):
                self.sql = '1' if node else '0'
            else:
                self.sql = 'TRUE' if node else 'FALSE'
-        else:
+        elif type(node) is not dict:
            self.sql = f'{node}'
            self.opname = node
            if type(node) is int:
--- a/server/io.cpp
+++ b/server/io.cpp
@ -181,7 +181,7 @@ namespace types {
 		return !operator==(other);
 	}
 	bool time_t::validate() const{
-		return hours < 24 && minutes < 60 && seconds < 60 && ms < 1000;
+		return hours < 24 && minutes < 60 && seconds < 60 && ms < 1000000;
 	}

 	timestamp_t::timestamp_t(const char* str) { fromString(str); }
@ -244,13 +244,13 @@ std::ostream& operator<<(std::ostream& os, types::timestamp_t & v)


 using std::string;
-string base62uuid(int l = 8) {
+string base62uuid(int l) {
    using namespace std;
    constexpr static const char* base62alp = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
    static mt19937_64 engine(chrono::system_clock::now().time_since_epoch().count());
    static uniform_int_distribution<uint64_t> u(0x10000, 0xfffff);
    uint64_t uuid = (u(engine) << 32ull) + (chrono::system_clock::now().time_since_epoch().count() & 0xffffffff);
-    printf("%llu\n", uuid);
+    //printf("%llu\n", uuid);
    string ret;
    while (uuid && l-- >= 0) {
        ret = string("") + base62alp[uuid % 62] + ret;
--- a/server/types.h
+++ b/server/types.h
@ -69,7 +69,7 @@ namespace types {
 		time_t& fromString(const char*);
 		bool validate() const;
 		constexpr static unsigned string_length() {
-			return 13;
+			return 16;
 		};
 		char* toString(char* buf) const;
 		bool operator > (const time_t&) const;
--- a/server/utils.h
+++ b/server/utils.h
@ -1,5 +1,4 @@
 #pragma once
-#include <string>
 #include <ctime>
 #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L)
 constexpr static bool cpp_17 = true;
@ -13,4 +12,6 @@ inline const char* str(const T& v) {
 template <>
 inline const char* str(const bool& v) {
 	return v ? "true" : "false";
-}
+}
+#include<string>
+extern std::string base62uuid(int l = 6);
--- a/tests/best_profit.a
+++ b/tests/best_profit.a
@ -0,0 +1,41 @@
+-- please run datagen.get_stock_data() to generate data/stock.csv first
+
+-- create table ticks(ID varchar(10), timestamp int, tradeDate date, price int);
+
+-- LOAD DATA INFILE "data/stock.csv"
+-- INTO TABLE ticks
+-- FIELDS TERMINATED BY ","
+
+-- SELECT max(price-mins(price))
+-- FROM   ticks    ASSUMING ASC timestamp
+-- WHERE  ID="S"
+--     AND  tradeDate='2003-01-10'
+
+-- create table base(ID varchar(10), name varchar(10));
+
+-- LOAD DATA INFILE "data/base.csv"
+-- INTO TABLE base
+-- FIELDS TERMINATED BY ","
+
+-- SELECT last(price)
+-- FROM    ticks t, base b
+--     ASSUMING ASC name, ASC timestamp
+-- WHERE   t.ID=b.ID
+--         AND name="x"
+
+create table TradedStocks(ID varchar(15), SeqNo int, TradeDate date, TimeStamp time, Type varchar(5));
+create table HistoricQuotes(ID varchar(15), TradeDate date, HighPrice real, LowPrice real, ClosePrice real, OpenPrice real, volume bigint);
+
+LOAD DATA INFILE "data/tick-price-file.csv"
+INTO TABLE TradedStocks
+FIELDS TERMINATED BY "|"
+LOAD DATA INFILE "data/hist-price-file.csv"
+INTO TABLE HistoricQuotes
+FIELDS TERMINATED BY "|"
+
+
+SELECT ts.ID, avgs(10, hq.ClosePrice)
+FROM   TradedStocks AS ts NATURAL JOIN
+       HistoricQuotes AS hq
+              ASSUMING ASC hq.TradeDate
+GROUP BY ts.ID
--- a/tests/datagen_jose/README
+++ b/tests/datagen_jose/README
@ -0,0 +1,26 @@
+There are 2 programs included in this package:
+1. histgen - generation program for historical data
+2. tickgen - generation program for tick data
+
+histgen takes 2 parameters 
+	a) Scale-factor: Number of securities for which data is to be generated
+	b) Depth of history: The number of days for which data is to be generated
+and genertes 2 files
+	a) hist-base-file: contains all the static data
+	b) hist-price-file: contains the pricing info
+	c) hist-split-file: contains the split info
+
+tickgen also takes 2 parameters
+	a) Scale-factor: Number of securities for which data is to be generated
+	b) Ticks per day: A measure of the activity on a per security basis. e.g 100 would mean
+	                  that each security ticks about 100 time in a trading day
+and generates 2 files
+	a) tick-base-file
+	b) tick-price-file
+
+run the programs without args to see the parameters required.
+
+------------------
+Make of programs:
+
+run the make program. It requires the C++ compiler (CC) to be in your path.
--- a/tests/datagen_jose/RandGen.H
+++ b/tests/datagen_jose/RandGen.H
@ -0,0 +1,61 @@
+// cmvc_id = %Z% %W% %I% %E% %U%
+
+#ifndef RandGenHEADER
+#define RandGenHEADER
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// Copyright (c) 1997 Morgan Stanley & Co. Incorporated, All Rights Reserved
+//
+// Unpublished copyright.  All rights reserved.  This material contains
+// proprietary information that shall be used or copied only within Morgan
+// Stanley, except with written permission of Morgan Stanley.
+//
+// Module Name   : RandGen.H
+// Version       : %Z% %W% %I% %E% %U%
+// Project       : DataGen
+// Description   : Random Number generator
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include <stdlib.h>
+#include <iostream>
+#include <sys/time.h>
+using namespace std;
+
+class RandNumGen
+{
+public:
+  RandNumGen(void)
+    {
+      struct timeval tv;
+      gettimeofday(&tv,0 );
+      srand(tv.tv_sec + tv.tv_usec);
+    }
+  
+  RandNumGen(unsigned long seed_)
+    {
+      srand(seed_);
+    }
+  
+  ~RandNumGen(){}
+
+  inline unsigned long operator() (void);
+  inline int operator() (int min_, int max_);
+};
+
+// Implementation of inline functions
+
+inline unsigned long RandNumGen::operator() (void)
+{
+  return rand();
+}
+
+inline int  RandNumGen::operator() (int min_, int  max_)
+{
+  unsigned long t = (*this)();
+  int r = max_ - min_;
+  return (min_ + t % r);
+}
+#endif
+
--- a/tests/datagen_jose/Time.C
+++ b/tests/datagen_jose/Time.C
@ -0,0 +1,71 @@
+// cmvc_id = %Z% %W% %I% %E% %U%
+
+#ifndef TimeIMPLEMENTATION
+#define TimeIMPLEMENTATION
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// Copyright (c) 1997 Morgan Stanley & Co. Incorporated, All Rights Reserved
+//
+// Unpublished copyright.  All rights reserved.  This material contains
+// proprietary information that shall be used or copied only within Morgan
+// Stanley, except with written permission of Morgan Stanley.
+//
+// Module Name   : Time.C
+// Version       : %Z% %W% %I% %E% %U%
+// Project       : 
+// Description   : 
+//
+///////////////////////////////////////////////////////////////////////////////
+#include <stdio.h>
+#include "Time.H"
+
+Time::Time(char *startTime_)
+{
+  sscanf(startTime_,"%d:%d:%d", &_hrs, &_mins, &_secs);
+  cout << "Hrs: " << _hrs << ",Mins: " << _mins << ",Secs: " << _secs << endl;
+}
+
+Time::~Time()
+{}
+
+Time &Time::operator++ (int)
+{
+  _secs++;
+  adjust();
+  return *this;
+}
+
+void Time::adjust(void)
+{
+  if (_secs >= 60) 
+   {
+     _mins += _secs/60;
+     _secs = _secs%60;
+   }
+
+  if (_mins >= 60) 
+   {
+     _hrs += _mins/60;
+     _mins = _mins%60;
+   }
+  if (_hrs >= 24) _hrs = _hrs = _hrs%24;
+}
+
+ostream &operator<< (ostream &os_, Time &that_)
+{
+  if (that_.hrs() < 10) os_ << "0";
+  os_ << that_.hrs();
+  os_ << ":" ;
+
+  if (that_.mins() < 10) os_ << "0";
+  os_ << that_.mins();
+  os_ << ":" ;
+
+  if (that_.secs() < 10) os_ << "0";
+  os_ << that_.secs();
+  return os_;
+}
+
+
+#endif
--- a/tests/datagen_jose/Time.H
+++ b/tests/datagen_jose/Time.H
@ -0,0 +1,43 @@
+// cmvc_id = %Z% %W% %I% %E% %U%
+
+#ifndef TimeHEADER
+#define TimeHEADER
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// Copyright (c) 1997 Morgan Stanley & Co. Incorporated, All Rights Reserved
+//
+// Unpublished copyright.  All rights reserved.  This material contains
+// proprietary information that shall be used or copied only within Morgan
+// Stanley, except with written permission of Morgan Stanley.
+//
+// Module Name   : Time.H
+// Version       : %Z% %W% %I% %E% %U%
+// Project       : 
+// Description   : 
+//
+///////////////////////////////////////////////////////////////////////////////
+#include <iostream>
+
+using namespace std;
+
+class Time 
+{
+public:
+  Time(char *startTime_);
+  ~Time();
+  
+  Time &operator++(int);
+  void adjust(void);
+
+  int hrs(void) { return _hrs; }
+  int mins(void) { return _mins; }
+  int secs(void) { return _secs; }
+
+friend ostream &operator<< (ostream &os_, Time &that_);
+
+private:
+  int _hrs, _mins, _secs;
+};
+#endif
+
--- a/tests/datagen_jose/cal.C
+++ b/tests/datagen_jose/cal.C
@ -0,0 +1,79 @@
+#include <sys/types.h>
+#include <time.h>
+#include <iostream>
+
+#include "cal.H"
+using namespace std;
+
+Calendar::Calendar(void)
+{
+  time_t clk = time(0);
+  struct tm *now = localtime(&clk);
+  _currdate = asJulianNumber(now->tm_mon+1, now->tm_mday, now->tm_year+1900);
+}
+
+Calendar::~Calendar()
+{}
+
+// year_ in yyyy format
+unsigned int Calendar::asJulianNumber(int month_,int day_,int year_)
+{
+  unsigned long c,ya;
+  
+  if (month_>2) month_-=3;
+  else { month_+=9; year_--; } 
+  c=year_/100;
+  ya=year_-100*c;
+  return ((146097*c)>>2)+((1461*ya)>>2)+(153*month_+2)/5+day_+1721119;
+} 
+
+void Calendar::split(int& month_,int& day_,int& year_)
+{
+  unsigned long d;
+  unsigned long j=_currdate-1721119;
+  year_=(int) (((j<<2)-1)/146097);
+  j=(j<<2)-1-146097*year_;
+  d=(j>>2);
+  j=((d<<2)+3)/1461;
+  d=(d<<2)+3-1461*j;
+  d=(d+4)>>2;
+  month_=(int)(5*d-3)/153;
+  d=5*d-3-153*month_;
+  day_=(int)((d+5)/5);
+  year_=(int)(100*year_+j);
+  if (month_<10) month_+=3;
+  else { month_-=9; year_++; } 
+} 
+
+int Calendar::dayInWeek(void)
+{ 
+  return ((((_currdate+1)%7)+6)%7)+1; 
+}
+
+Calendar &Calendar::nextWeekday(void)
+{
+  (*this) += 1;
+  while (!isWeekday()) (*this)+= 1;
+  return *this;
+}
+
+int Calendar::isWeekday(void)
+{
+  return (dayInWeek()<6)?1:0;
+}
+
+Calendar &Calendar::operator+= (int incr_)
+{
+  _currdate += incr_;
+  return *this;
+}
+
+ostream &operator<< (ostream &os_, Calendar &that_)
+{
+  int mo, day, year;
+  that_.split(mo,day,year);
+  os_ << year << "-" << mo << "-" << day;
+  // the below is a pain for monetdb
+  //os_ << mo << "/" << day << "/" << year;
+  return os_;
+}
--- a/tests/datagen_jose/cal.H
+++ b/tests/datagen_jose/cal.H
@ -0,0 +1,26 @@
+#ifndef _CAL_H_
+#define _CAL_H_
+
+#include <iostream>
+using namespace std;
+
+class Calendar
+{
+public:
+  Calendar(void);
+  ~Calendar();
+
+  unsigned int asJulianNumber(int month_, int day_, int year_);
+  int isWeekday(void);
+  Calendar &operator+= (int incr_);
+
+friend ostream &operator<< (ostream &os_,Calendar &that_);
+
+  int dayInWeek(void);
+  Calendar &nextWeekday(void);
+  void split(int &mo_, int &day_, int &year_);  
+private:
+  unsigned int _currdate;
+};
+#endif
+
--- a/tests/datagen_jose/cal.cpp
+++ b/tests/datagen_jose/cal.cpp
@ -0,0 +1,76 @@
+#include <sys/types.h>
+#include <time.h>
+#include <iostream>
+
+#include "cal.H"
+
+Calendar::Calendar(void)
+{
+  time_t clk = time(0);
+  struct tm *now = localtime(&clk);
+  _currdate = asJulianNumber(now->tm_mon+1, now->tm_mday, now->tm_year+1900);
+}
+
+Calendar::~Calendar()
+{}
+
+// year_ in yyyy format
+unsigned int Calendar::asJulianNumber(int month_,int day_,int year_)
+{
+  unsigned long c,ya;
+  
+  if (month_>2) month_-=3;
+  else { month_+=9; year_--; } 
+  c=year_/100;
+  ya=year_-100*c;
+  return ((146097*c)>>2)+((1461*ya)>>2)+(153*month_+2)/5+day_+1721119;
+} 
+
+void Calendar::split(int& month_,int& day_,int& year_)
+{
+  unsigned long d;
+  unsigned long j=_currdate-1721119;
+  year_=(int) (((j<<2)-1)/146097);
+  j=(j<<2)-1-146097*year_;
+  d=(j>>2);
+  j=((d<<2)+3)/1461;
+  d=(d<<2)+3-1461*j;
+  d=(d+4)>>2;
+  month_=(int)(5*d-3)/153;
+  d=5*d-3-153*month_;
+  day_=(int)((d+5)/5);
+  year_=(int)(100*year_+j);
+  if (month_<10) month_+=3;
+  else { month_-=9; year_++; } 
+} 
+
+int Calendar::dayInWeek(void)
+{ 
+  return ((((_currdate+1)%7)+6)%7)+1; 
+}
+
+Calendar &Calendar::nextWeekday(void)
+{
+  (*this) += 1;
+  while (!isWeekday()) (*this)+= 1;
+  return *this;
+}
+
+int Calendar::isWeekday(void)
+{
+  return (dayInWeek()<6)?1:0;
+}
+
+Calendar &Calendar::operator+= (int incr_)
+{
+  _currdate += incr_;
+  return *this;
+}
+
+ostream &operator<< (ostream &os_, Calendar &that_)
+{
+  int mo, day, year;
+  that_.split(mo,day,year);
+  os_ << mo << "/" << day << "/" << year;
+  return os_;
+}
--- a/tests/datagen_jose/gen.C
+++ b/tests/datagen_jose/gen.C
@ -0,0 +1,58 @@
+// cmvc_id = %Z% %W% %I% %E% %U%
+
+#ifndef genIMPLEMENTATION
+#define genIMPLEMENTATION
+
+#include <iostream.h>
+#include "RandGen.H"
+
+int num[6];
+int nelems=0;
+
+int member(int a_)
+{
+  for (int i=0; i<nelems; i++) 
+   {
+     if (num[i] == a_) return 1;
+   }
+  return 0;
+}
+
+int gen(int flag_)
+{
+  RandNumGen rg;
+  if (flag_ == 0) 
+   {
+     for (int i=0;i<6;i++) num[i] = 0;
+     nelems=0;
+     return 0;
+   }
+
+  int rn;
+  while (1) 
+   {
+     rn = rg(1,52);
+     if (member(rn) == 0) break;
+   }
+
+  num[nelems++] = rn;
+  return rn;
+}
+
+int main(int ac, char *av[])
+{
+  if (ac < 2) 
+   {
+     cerr << "Usage: " << av[0] << " <number>" << endl;
+     return 1;
+   }
+  
+  int k = atoi(av[1]);
+  for(int i=0; i<k; i++)
+   {
+     gen(0);     
+     for (int j=0;j<6;j++) cout << gen(1) << " ";
+     cout << endl;
+   }
+}
+#endif
--- a/tests/datagen_jose/histgen.C
+++ b/tests/datagen_jose/histgen.C
@ -0,0 +1,198 @@
+// cmvc_id = %Z% %W% %I% %E% %U%
+
+#ifndef histgenIMPLEMENTATION
+#define histgenIMPLEMENTATION
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <iostream>
+#include <fstream>
+#include <sys/time.h>
+
+#include "RandGen.H"
+#include "cal.H"
+using namespace std;
+inline int max(int a, int b) 
+{
+  return (a>b)?a:b;
+}
+
+inline int min(int a, int b) 
+{
+  return (a<b)?a:b;
+}
+
+int main(int ac, char *av[])
+{
+  RandNumGen rg;
+  int i, j, d, k;
+  ofstream basefile("../../data/hist-base-file.csv");
+  ofstream pricefile("../../data/hist-price-file.csv");
+  ofstream splitfile("../../data/hist-split-file.csv");
+  ofstream dividendfile("../../data/hist-dividend-file.csv");
+
+  if (!basefile) 
+   {
+     cerr << "Cannot open base-file" << endl;
+     return 1;
+   }
+
+  if (!pricefile) 
+   {
+     cerr << "Cannot open price-file" << endl;
+     return 1;
+   }
+
+  if (!splitfile)
+   {
+     cerr << "Cannot open split-file" << endl;
+     return 1;
+   }
+   
+   if (!dividendfile)
+    {
+      cerr << "Cannot open dividend-file" << endl;
+      return 1;
+    }
+   
+   
+  
+  if (ac < 2) 
+   {
+     cerr << "Usage: " << av[0] << " <scale - no of elements> [depth - in days. Default = 4000days]" << endl;
+     return 1;
+   }
+
+  int scale=0, ndays = 4000;
+  if (ac >= 2) scale = atoi(av[1]);
+  if (ac >= 3) ndays = atoi(av[2]);
+
+  // Generation of base info
+  int nex = 5;
+  char *ex[] = { "NY", "O", "AM", "LN", "TK"};
+
+  int nsic = 10;
+  char *sic[] = { "COMPUTERS", "CHEMICALS", "FINANCIAL", "INDUSTRIAL", "PHARMACEUTICALS",
+                       "MEDICAL", "BANKING", "SOFTWARE", "ENTERTAINMENT", "CONSTRUCTION" };
+  
+  char *cu[] = { "USD", "DEM", "JPY", "FFR", "GBP"};
+  int ncu = 5;
+  
+  char *spr[] = { "AAA", "AA", "A", "BBB", "BB", "B", "CCC", "CC", "C"};
+  int nspr = 9;
+  
+
+  unsigned int rnum;
+  char id[100];
+  char descr[256];
+  // the below is a pain with monetdb, changing to better date format
+  //char *crdate = "3/11/1999";
+  char *crdate = "1999-11-03";
+  
+  basefile << "Id|Ex|Descr|SIC|SPR|Cu|CreateDate" << endl;
+  for (i=0; i<scale; i++) 
+   {
+     sprintf(id,"Security_%d", i);
+     sprintf(descr, "'Financial security number: %d'", i);
+     
+     basefile << id;
+     basefile << "|" << ex[rg(0,nex)];
+     basefile << "|" << descr;
+     basefile << "|" << sic[rg(0,nsic)];
+     basefile << "|" << spr[rg(0,nspr)];
+     basefile << "|" << cu[rg(0, ncu)];
+     basefile << "|" << crdate;
+     basefile << endl;
+   }
+
+  basefile.close();
+
+  
+  // generation of price info
+  double *minop = new double[scale];
+  for (i=0; i<scale; i++) minop[i] = 0.0;
+
+  double *op = new double[scale];
+  for (i=0; i<scale; i++) op[i] = 0.0;
+  
+  unsigned long *vs = new unsigned long[scale];
+  for (i=0; i<scale; i++) vs[i] = 0;
+
+  double cp, hp, lp;
+  Calendar cal;
+  
+  pricefile << "Id|TradeDate|HighPrice|LowPrice|ClosePrice|OpenPrice|Volume" << endl;
+  splitfile << "Id|SplitDate|EntryDate|SplitFactor" << endl;
+  dividendfile << "Id|XdivDate|DivAmt|AnnounceDate" << endl;
+  
+  for (d=0;d<ndays; d++)
+   {
+     cal.nextWeekday();
+     for (k=0;k<scale;k++)
+      {
+        sprintf(id,"Security_%d", k);
+
+        if (op[k]==0.0) op[k] = rg(0,100);
+        if (minop[k]==0.0) minop[k] = op[k];
+        
+        if (vs[k]==0) vs[k] = rg();
+        else vs[k] = vs[k]*(100.0+rg(-10,+10))/100.0;
+
+        int skew = rg(0,+2);
+        double f = (100.0 + rg(-2,3+skew))/100.0;
+        cp = op[k] * f;
+        hp = max(op[k], cp) * (100.0+rg(0,+10))/100.0;
+        lp = min(op[k], cp) * (100.0-rg(0,+10))/100.0;
+
+        pricefile << id;
+        pricefile << "|" << cal;
+        pricefile << "|" << hp;
+        pricefile << "|" << lp;
+        pricefile << "|" << cp;
+        pricefile << "|" << op[k];
+        pricefile << "|" << vs[k];
+        pricefile << endl;
+        
+        op[k] = cp;
+
+
+        // check splits
+        if (op[k] > 2.0*minop[k]) 
+         {
+           int splitfactor = rg(1,4);
+           op[k] /= (double)splitfactor;
+           vs[k] *= splitfactor;
+           
+           splitfile << id;
+           splitfile << "|" << cal;
+           splitfile << "|" << cal;
+           splitfile << "|" << splitfactor;
+           splitfile << endl;
+         }
+         
+         // check dividends
+         if (op[k] > minop[k]) 
+          {
+            // dividend as a fraction of current closing price
+            double dividend = (rg(1, 100) / 100.0) * cp;
+           
+            dividendfile << id;
+            dividendfile << "|" << cal;
+            dividendfile << "|" << dividend;
+            // assumes announced and disbursed same day, 
+            // queries can be trivially modified to do away with this assumption
+            dividendfile << "|" << cal; 
+            dividendfile << endl;
+          }
+      }
+      
+      
+   }
+  splitfile.close();
+  pricefile.close();
+  dividendfile.close();
+}
+
+
+#endif
--- a/tests/datagen_jose/makefile
+++ b/tests/datagen_jose/makefile
@ -0,0 +1,20 @@
+.PHONY: clean
+
+all: histgen tickgen
+
+clean:
+	rm -rf *.o histgen tickgen
+	
+%.o: %.C
+	g++-12 -Ofast -march=native -g -c $<
+
+tickgen: cal.o Time.o tickgen.o
+	g++-12 -lstdc++ -Ofast -march=native -flto -o tickgen cal.o Time.o tickgen.o
+
+histgen: cal.o histgen.o 
+	g++-12 -lstdc++ -Ofast -flto -march=native -o histgen cal.o histgen.o
+
+timetest: Time.o timetest.o
+	g++-12 -lstdc++ -g -o timetest Time.o timetest.o
+
+
--- a/tests/datagen_jose/tickgen.C
+++ b/tests/datagen_jose/tickgen.C
@ -0,0 +1,197 @@
+// cmvc_id = %Z% %W% %I% %E% %U%
+
+#ifndef histgenIMPLEMENTATION
+#define histgenIMPLEMENTATION
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <iostream>
+#include <fstream>
+#include <sys/time.h>
+
+#include "RandGen.H"
+#include "cal.H"
+#include "Time.H"
+
+using namespace std;
+
+inline int max(int a, int b) 
+{
+  return (a>b)?a:b;
+}
+
+inline int min(int a, int b) 
+{
+  return (a<b)?a:b;
+}
+
+int main(int ac, char *av[])
+{
+  RandNumGen rg;
+  int i, j, d, k;
+  ofstream basefile("../../data/tick-base-file.csv");
+  ofstream pricefile("../../data/tick-price-file.csv");
+  
+
+  if (!basefile) 
+   {
+     cerr << "Cannot open base-file" << endl;
+     return 1;
+   }
+
+  if (!pricefile) 
+   {
+     cerr << "Cannot open price-file" << endl;
+     return 1;
+   }
+
+  if (ac < 2) 
+   {
+     cerr << "Usage: " << av[0] << " <n -scale> [t -ticks per day.Default=100] [d - no of days.Default=90]" << endl;
+     return 1;
+   }
+  
+  int n=0,t=100,days=90;
+  
+  if (ac >= 2) n = atoi(av[1]);
+  if (ac >= 3) t = atoi(av[2]);
+  if (ac >= 4) days = atoi(av[3]);
+     
+
+  int tps = (n*t)/28800; // ticks per second
+  tps++;
+  cout << "Ticks per second: " << tps << endl;
+
+  // Generation of base info
+  int nex = 5;
+  char *ex[] = { "NY", "O", "AM", "LN", "TK"};
+
+  int nsic = 10;
+  char *sic[] = { "COMPUTERS", "CHEMICALS", "FINANCIAL", "INDUSTRIAL", "PHARMACEUTICALS",
+                       "MEDICAL", "BANKING", "SOFTWARE", "ENTERTAINMENT", "CONSTRUCTION" };
+  
+  char *cu[] = { "USD", "DEM", "JPY", "FFR", "GBP"};
+  int ncu = 5;
+  
+  char *spr[] = { "AAA", "AA", "A", "BBB", "BB", "B", "CCC", "CC", "C"};
+  int nspr = 9;
+  
+
+  unsigned int rnum;
+  char id[100];
+  char descr[256];
+  char *crdate = "3/11/1999";
+
+  basefile << "Id | Ex | Descr | SIC | Cu" << endl;
+  
+  for (i=0; i<n; i++) 
+   {
+     sprintf(id,"Security_%d", i);
+     sprintf(descr, "'Financial security number: %d'", i);
+     
+     basefile << id;
+     basefile << " | " << ex[rg(0,nex)];
+     basefile << " | " << descr;
+     basefile << " | " << sic[rg(0,nsic)];
+     basefile << " | " << cu[rg(0, ncu)];
+     basefile << endl;
+   }
+  basefile.close();
+
+  
+  // generation of price info
+  double tick=1.0/32.0;
+  
+  // 1. gen the starting price of each security
+  double *bp = new double[n];
+  int *seq = new int[n];
+  
+  for (i=0;i<n;i++)
+   {
+     bp[i] = rg(0,100);
+     seq[i] = 0;
+   }
+  
+  Calendar cal;
+  Time tm("9:00:00");
+  cal.nextWeekday();
+  
+  pricefile << "Id | SeqNo | TradeDate | TimeStamp | Type" << endl;
+  
+  for (k=0;k<days; k++)
+   {
+     // for each second of the business day - 8*60*60
+     for(i=0;i<28800;i++) 
+      {
+        // generate the required ticks
+        for(j=0;j<tps;j++) 
+         {
+           //1. select a security
+           int sec = rg(0,n);
+           sprintf(id, "Security_%d", sec);
+           
+           //2.  select  if it is a trade, ask, bid
+           int tqb = rg(0,4);
+           switch (tqb) 
+            {
+            case 0: // trade
+            {
+              double tp = bp[sec];
+              int ts = rg(1,100) * 100;
+              pricefile << id;
+              pricefile << "|" << ++seq[sec];
+              pricefile << "|" << cal;
+              pricefile << "|" << tm;
+              pricefile << "|T" << endl;
+              break;
+            }
+            case 1: // ask
+            {
+              double ap = rg(0,4)*tick+bp[sec];
+              int as = rg(1,100) * 100;
+              pricefile << id;
+              pricefile << "|" << ++seq[sec];
+              pricefile << "|" << cal;
+              pricefile << "|" << tm;
+              pricefile << "|Q" << endl;
+              break;
+            }
+            case 2: // bid
+            {
+              int dir = (rg(3,10) > 5)? +1:-1;
+              bp[sec] = (dir*rg(0,3)*tick)+bp[sec];
+              int bs = rg(1,100) * 100;
+              pricefile << id;
+              pricefile << "|" << ++seq[sec];
+              pricefile << "|" << cal;
+              pricefile << "|" << tm;
+              pricefile << "|Q" << endl;
+              break;
+            }
+            case 3: // cancel/correct as a trade
+            {
+              if (rg(0,100)  < 5) break;
+              double tp = bp[sec];
+              int ts = rg(1,100) * 100;
+              pricefile << id;
+              pricefile << "|" << ++seq[sec];
+              pricefile << "|" << cal;
+              pricefile << "|" << tm;
+              pricefile << "|CT" << endl;
+              break;
+            }
+            default:
+              break;
+            }
+         }
+        // Go to the next second
+        tm++;
+      }         
+     cal.nextWeekday();
+   }
+  pricefile.close();
+}
+
+
+#endif
--- a/tests/stock.a
+++ b/tests/stock.a
@ -30,3 +30,4 @@ SELECT price, timestamp FROM stocks where price - timestamp > 1 and not (price*t
 SELECT max(price-mins(price))
 FROM stocks
   ASSUMING DESC timestamp
+