From 6478deb7dad89b6864f72a7aa53fbd85ae32a64b Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Thu, 20 Oct 2022 06:00:57 +0800
Subject: [PATCH 01/30] read complex data from csv

---
 README.md                   | 26 ++++++++-----
 aquery_parser/sql_parser.py | 10 ++++-
 csv.h                       | 61 +++++++++++++++++++++++++-----
 engine/types.py             | 17 +++++----
 engine/utils.py             |  8 +++-
 prompt.py                   | 36 +++++++++---------
 reconstruct/__init__.py     |  1 +
 reconstruct/ast.py          | 74 +++++++++++++++++++++++++++++++++----
 reconstruct/expr.py         |  6 ++-
 reconstruct/storage.py      |  7 +++-
 server/vector_type.hpp      |  4 ++
 tests/complex_data.a        |  3 ++
 tests/q4.a                  |  8 +++-
 13 files changed, 201 insertions(+), 60 deletions(-)
 create mode 100644 tests/complex_data.a

diff --git a/README.md b/README.md
index de90bab..fbc1076 100644
--- a/README.md
+++ b/README.md
@@ -147,8 +147,7 @@ See files in ./tests/ for more examples.
 ## Execution Engines
 - AQuery++ supports different execution engines thanks to the decoupled compiler structure.
 - Hybrid Execution Engine: decouples the query into two parts. The sql-compliant part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed.
-- AQuery Execution Engine: executes queries by compiling the query plan to C++ code. Doesn't support joins and udf functions. 
-- K9 Execution Engine: (discontinued).
+- AQuery Library: A set of header based libraries that provide column arithmetic and operations inspired by array programming languages like kdb. This library is used by C++ post-processor code which can significantly reduce the complexity of generated code, reducing compile time while maintaining the best performance. The set of libraries can also be used by UDFs as well as User modules which makes it easier for users to write simple but powerful extensions. 
   
 # Roadmap
 - [x] SQL Parser -> AQuery Parser (Front End)
@@ -156,14 +155,21 @@ See files in ./tests/ for more examples.
    -  [x] Schema and Data Model 
    -  [x] Data acquisition/output from/to csv file
 - [ ] Execution Engine
-   -  [x] Projections and single-group Aggregations 
-   -  [x] Group by Aggregations
-   -  [x] Filters
-   -  [x] Order by
-   -  [x] Assumption
-   -  [x] Flatten
-   -  [x] Join (Hybrid Engine only)
-   -  [ ] Subqueries 
+  - [x] Single Query
+     -  [x] Projections and single-group Aggregations 
+     -  [x] Group by Aggregations
+     -  [x] Filters
+     -  [x] Order by
+     -  [x] Assumption
+     -  [x] Flatten
+     -  [x] Join (Hybrid Engine only)
+  - [ ] Subquery
+     -  [ ] With Clause
+     -  [ ] From subquery
+     -  [ ] Select sunquery
+     -  [ ] Where subquery
+     -  [ ] Subquery in group by
+     -  [ ] Subquery in order by
 - [x] Query Optimization
   - [x] Selection/Order by push-down
   - [x] Join Optimization (Only in Hybrid Engine)
diff --git a/aquery_parser/sql_parser.py b/aquery_parser/sql_parser.py
index 9c08db6..5308c2a 100644
--- a/aquery_parser/sql_parser.py
+++ b/aquery_parser/sql_parser.py
@@ -8,6 +8,7 @@
 #
 
 from sre_parse import WHITESPACE
+
 from mo_parsing.helpers import restOfLine
 from mo_parsing.infix import delimited_list
 from mo_parsing.whitespaces import NO_WHITESPACE, Whitespace
@@ -655,7 +656,8 @@ def parser(literal_string, ident, sqlserver=False):
         ) / to_json_call
 
         load_data = (
-            keyword("data") ("file_type")
+            Optional(keyword("complex")("complex"))
+            + keyword("data") ("file_type")
             + keyword("infile")("loc")  
             + literal_string ("file")
             + INTO
@@ -667,6 +669,12 @@ def parser(literal_string, ident, sqlserver=False):
                   + keyword("by").suppress() 
                   + literal_string ("term")
             )
+            + Optional(
+                  keyword("element").suppress()
+                  + keyword("terminated").suppress()
+                  + keyword("by").suppress() 
+                  + literal_string ("ele")
+            )
         )
         
         module_func_def = (
diff --git a/csv.h b/csv.h
index c5cb5bc..c0d1762 100644
--- a/csv.h
+++ b/csv.h
@@ -1,4 +1,4 @@
-// Copyright: (2012-2015) Ben Strasser <code@ben-strasser.net>
+// Copyright: (2012-2015) Ben Strasser <code@ben-strasser.net>, 2022 Bill Sun
 // License: BSD-3
 //
 // All rights reserved.
@@ -49,6 +49,7 @@
 #include <cerrno>
 #include <istream>
 #include <limits>
+#include "server/vector_type.hpp"
 
 namespace io{
         ////////////////////////////////////////////////////////////////////////////
@@ -974,8 +975,7 @@ namespace io{
                                                 return;
                                         }
                                         x = 10*x+y;
-                                }else
-                                        throw error::no_digit();
+                                }
                                 ++col;
                         }
                 }
@@ -1005,8 +1005,7 @@ namespace io{
                                                         return;
                                                 }
                                                 x = 10*x-y;
-                                        }else
-                                                throw error::no_digit();
+                                        }
                                         ++col;
                                 }
                                 return;
@@ -1080,19 +1079,37 @@ namespace io{
                                         }
                                         x *= base;
                                 }
-                        }else{
-                                if(*col != '\0')
-                                        throw error::no_digit();
                         }
 
                         if(is_neg)
                                 x = -x;
                 }
 
+
                 template<class overflow_policy> void parse(char*col, float&x) { parse_float(col, x); }
                 template<class overflow_policy> void parse(char*col, double&x) { parse_float(col, x); }
                 template<class overflow_policy> void parse(char*col, long double&x) { parse_float(col, x); }
-
+                
+
+                template<class overflow_policy, class T, char sep2 = ';'>
+                void parse_vector(char* col, vector_type<T>& x) {
+                    while (*col != '\0') {
+                        char* next_col = col;
+                        while (*next_col != sep2 && *next_col != '\0')
+                            ++next_col;
+                        while (*next_col == ' ' || *next_col == '\t' || 
+                            *next_col == sep2 || *next_col == '\r' || 
+                            *next_col == '\n') 
+                            ++next_col;
+                        char _next_end = *next_col;
+                        *next_col = '\0';
+                        T y;
+                        ::io::detail::parse<overflow_policy>(col, y);
+                        x.emplace_back(y);
+                        col = next_col;
+                        *next_col = _next_end;
+                    }
+                }
                 template<class overflow_policy, class T>
                 void parse(char*col, T&x){
                         // Mute unused variable compiler warning
@@ -1108,6 +1125,7 @@ namespace io{
         }
 
         template<unsigned column_count,
+                char sep2 = -2,
                 class trim_policy = trim_chars<' ', '\t'>,
                 class quote_policy = no_quote_escape<','>,
                 class overflow_policy = throw_on_overflow,
@@ -1234,7 +1252,23 @@ namespace io{
                         parse_helper(r+1, cols...);
                 }
 
-
+                template<class T, class ...ColType>
+                void parse_helper(std::size_t r, vector_type<T>&t, ColType&...cols){
+                        if(row[r]){
+                                try{
+                                        try{
+                                                ::io::detail::parse_vector<overflow_policy, T, sep2>(row[r], t);
+                                        }catch(error::with_column_content&err){
+                                                err.set_column_content(row[r]);
+                                                throw;
+                                        }
+                                }catch(error::with_column_name&err){
+                                        err.set_column_name(column_names[r].c_str());
+                                        throw;
+                                }
+                        }
+                        parse_helper(r+1, cols...);
+                }
         public:
                 template<class ...ColType>
                 bool read_row(ColType& ...cols){
@@ -1269,5 +1303,12 @@ namespace io{
                 }
         };
 }
+
+template <unsigned column_count, char sep1 = ',', char sep2 = ';'>
+using AQCSVReader = io::CSVReader<column_count, sep2, 
+        io::trim_chars<(char)32, (char)9>, io::no_quote_escape<sep1>, 
+        io::ignore_overflow, io::empty_line_comment
+        >;
+
 #endif
 
diff --git a/engine/types.py b/engine/types.py
index 8eac736..5a56e12 100644
--- a/engine/types.py
+++ b/engine/types.py
@@ -1,8 +1,9 @@
 from copy import deepcopy
-from engine.utils import base62uuid, defval
-from aquery_config import have_hge
 from typing import Dict, List
 
+from aquery_config import have_hge
+from engine.utils import base62uuid, defval
+
 type_table: Dict[str, "Types"] = {}
 
 class Types:
@@ -65,10 +66,10 @@ class Types:
         return self.sqlname
     
     @staticmethod
-    def decode(aquery_type : str, vector_type:str = 'ColRef') -> "Types":
-        if (aquery_type.startswith('vec')):
+    def decode(aquery_type : str, vector_type:str = 'vector_type') -> "Types":
+        if (aquery_type.lower().startswith('vec')):
             return VectorT(Types.decode(aquery_type[3:]), vector_type)
-        return type_table[aquery_type]
+        return type_table[aquery_type.lower()]
     
 class TypeCollection:
     def __init__(self, sz, deftype, fptype = None, utype = None, *, collection = None) -> None:
@@ -121,7 +122,7 @@ class VectorT(Types):
         return f'{self.vector_type}<{self.inner_type.name}>'
     @property
     def sqlname(self) -> str:
-        return 'BIGINT'
+        return 'HUGEINT' # Store vector_type into 16 bit integers
     @property
     def cname(self) -> str:
         return f'{self.vector_type}<{self.inner_type.cname}>'
@@ -142,7 +143,7 @@ fp_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', FloatT, DoubleT
 temporal_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', DateT, TimeT, TimeStampT)
 builtin_types : Dict[str, Types] = {
     'string' : StrT,
-    **_ty_make_dict('t.sqlname.lower()', AnyT, TextT, VarcharT),
+    **_ty_make_dict('t.sqlname.lower()', AnyT, TextT, VarcharT, HgeT),
     **int_types, **fp_types, **temporal_types}
 
 def get_int128_support():
@@ -365,3 +366,5 @@ user_module_func = {}
 builtin_operators : Dict[str, OperatorBase] = {**builtin_binary_arith, **builtin_binary_logical, 
     **builtin_unary_arith, **builtin_unary_logical, **builtin_unary_special, **builtin_func, **builtin_cstdlib, 
     **user_module_func}
+
+type_table = {**builtin_types, **type_table}
\ No newline at end of file
diff --git a/engine/utils.py b/engine/utils.py
index 065f8c8..dc7f2bc 100644
--- a/engine/utils.py
+++ b/engine/utils.py
@@ -1,6 +1,6 @@
-from collections import OrderedDict
-from collections.abc import MutableMapping, Mapping
 import uuid
+from collections import OrderedDict
+from collections.abc import Mapping, MutableMapping
 
 lower_alp = 'abcdefghijklmnopqrstuvwxyz'
 upper_alp = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
@@ -107,6 +107,8 @@ def defval(val, default):
 
 # escape must be readonly
 from typing import Mapping, Set
+
+
 def remove_last(pattern : str, string : str, escape : Set[str] = set()) -> str:
     idx = string.rfind(pattern)
     if idx == -1:
@@ -126,9 +128,11 @@ class _Counter:
         return cnt
 
 import re
+
 ws = re.compile(r'\s+')
 import os
 
+
 def add_dll_dir(dll: str):
     import sys
     if sys.version_info.major >= 3 and sys.version_info.minor >7 and os.name == 'nt':
diff --git a/prompt.py b/prompt.py
index cd17360..c6a00dd 100644
--- a/prompt.py
+++ b/prompt.py
@@ -1,4 +1,5 @@
 import aquery_config
+
 help_message = '''\
 ======================================================
                 AQUERY COMMANDLINE HELP
@@ -82,31 +83,31 @@ if __name__ == '__main__':
     
 
     
-import os
-from dataclasses import dataclass
+import atexit
+import ctypes
 import enum
-import time
+import mmap
+import os
 # import dbconn
 import re
+import subprocess
+import sys
+import threading
+import time
+from dataclasses import dataclass
 from typing import Callable, List, Optional
+
+import numpy as np
 from mo_parsing import ParseException
+
 import aquery_parser as parser
 import engine
-import engine.projection
 import engine.ddl
+import engine.projection
 import reconstruct as xengine
-import subprocess
-import mmap
-import sys
-from engine.utils import base62uuid
-import atexit
-import threading
-import ctypes
-import numpy as np
-from engine.utils import ws
-from engine.utils import add_dll_dir
-from engine.utils import nullstream
 from build import build_manager
+from engine.utils import add_dll_dir, base62uuid, nullstream, ws
+
 
 ## CLASSES BEGIN
 class RunType(enum.Enum):
@@ -407,7 +408,7 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
                     for t in cxt.tables:
                         lst_cols = []
                         for c in t.columns:
-                            lst_cols.append(f'{c.name} : {c.type}')
+                            lst_cols.append(f'{c.name} : {c.type.name}')
                         print(f'{t.table_name} ({", ".join(lst_cols)})')
                 continue
             elif q.startswith('help'):
@@ -605,7 +606,8 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
             print("\nBye.")
             raise
         except ValueError as e:
-            import code, traceback
+            import code
+            import traceback
             __stdin = os.dup(0)
             raise_exception = True
             sh = code.InteractiveConsole({**globals(), **locals()})
diff --git a/reconstruct/__init__.py b/reconstruct/__init__.py
index fd02f61..97afaba 100644
--- a/reconstruct/__init__.py
+++ b/reconstruct/__init__.py
@@ -1,4 +1,5 @@
 from reconstruct.ast import Context, ast_node
+
 saved_cxt = None
 
 def initialize(cxt = None, keep = False):
diff --git a/reconstruct/ast.py b/reconstruct/ast.py
index f81083e..90615ac 100644
--- a/reconstruct/ast.py
+++ b/reconstruct/ast.py
@@ -1,12 +1,13 @@
 from copy import deepcopy
 from dataclasses import dataclass
 from enum import Enum, auto
-from typing import Set, Tuple, Dict, Union, List, Optional
+from typing import Dict, List, Optional, Set, Tuple, Union
 
 from engine.types import *
-from engine.utils import enlist, base62uuid, base62alp, get_legal_name
-from reconstruct.storage import Context, TableInfo, ColRef
-    
+from engine.utils import base62alp, base62uuid, enlist, get_legal_name
+from reconstruct.storage import ColRef, Context, TableInfo
+
+
 class ast_node:
     header = []
     types = dict()
@@ -70,7 +71,11 @@ class projection(ast_node):
         elif 'select_distinct' in node:
             p = node['select_distinct']
             self.distinct = True
-
+        if 'with' in node:
+            self.with_clause = projection(self, node['value'])
+        else:
+            self.with_clause = None
+            
         self.projections = p if type(p) is list else [p]
         if self.parent is None:
             self.context.sql_begin()
@@ -951,6 +956,9 @@ class load(ast_node):
         if node['load']['file_type'] == 'module':
             self.produce = self.produce_module
             self.module = True
+        elif 'complex' in node['load']:
+            self.produce = self.produce_cpp
+            self.consume = lambda *_: None
         elif self.context.dialect == 'MonetDB':
             self.produce = self.produce_monetdb
         else: 
@@ -1019,7 +1027,56 @@ class load(ast_node):
         self.sql = f'{s1} \'{p}\' {s2} '
         if 'term' in node:
             self.sql += f' {s3} \'{node["term"]["literal"]}\''
-                    
+            
+    def produce_cpp(self, node):
+        self.context.has_dll = True
+        self.context.headers.add('"csv.h"')
+        node = node['load']
+        self.postproc_fname = 'ld_' + base62uuid(5)
+        self.context.postproc_begin(self.postproc_fname)
+        
+        table:TableInfo = self.context.tables_byname[node['table']]
+        self.sql = F"SELECT {', '.join([c.name for c in table.columns])} FROM {table.table_name};"
+        self.emit(self.sql+';\n')
+        self.context.sql_end()
+        length_name = 'len_' + base62uuid(6)
+        self.context.emitc(f'auto {length_name} = server->cnt;')
+        
+        out_typenames = [t.type.cname for t in table.columns]
+        outtable_col_nameslist = ', '.join([f'"{c.name}"' for c in table.columns])
+        
+        self.outtable_col_names = 'names_' + base62uuid(4)
+        self.context.emitc(f'const char* {self.outtable_col_names}[] = {{{outtable_col_nameslist}}};')
+        
+        self.out_table = 'tbl_' + base62uuid(4)
+        self.context.emitc(f'auto {self.out_table} = new TableInfo<{",".join(out_typenames)}>("{table.table_name}", {self.outtable_col_names});')
+        for i, c in enumerate(table.columns):
+            c.cxt_name = 'c_' + base62uuid(6) 
+            self.context.emitc(f'decltype(auto) {c.cxt_name} = {self.out_table}->get_col<{i}>();')
+            self.context.emitc(f'{c.cxt_name}.initfrom({length_name}, server->getCol({i}), "{table.columns[i].name}");')
+        csv_reader_name = 'csv_reader_' + base62uuid(6)
+        col_types = [c.type.cname for c in table.columns]
+        col_tmp_names = ['tmp_'+base62uuid(8) for _ in range(len(table.columns))]
+        #col_names = ','.join([f'"{c.name}"' for c in table.columns])
+        term_field = ',' if 'term' not in node else node['term']['literal']
+        term_ele = ';' if 'ele' not in node else node['ele']['literal']
+        self.context.emitc(f'AQCSVReader<{len(col_types)}, \'{term_field.strip()[0]}\', \'{term_ele.strip()[0]}\'> {csv_reader_name}("{node["file"]["literal"]}");')
+        # self.context.emitc(f'{csv_reader_name}.read_header(io::ignore_extra_column, {col_names});')
+        self.context.emitc(f'{csv_reader_name}.next_line();')
+
+        for t, n in zip(col_types, col_tmp_names):
+            self.context.emitc(f'{t} {n};')
+        self.context.emitc(f'while({csv_reader_name}.read_row({",".join(col_tmp_names)})) {{ \n')
+        for i, c in enumerate(table.columns):
+            self.context.emitc(f'print({col_tmp_names[i]});')
+            self.context.emitc(f'{c.cxt_name}.emplace_back({col_tmp_names[i]});')
+            
+        self.context.emitc('}')
+        self.context.emitc(f'print(*{self.out_table});')
+        self.context.emitc(f'{self.out_table}->monetdb_append_table(cxt->alt_server, "{table.table_name}");')
+        
+        self.context.postproc_end(self.postproc_fname)
+
 class outfile(ast_node):
     name="_outfile"
     def __init__(self, parent, node, context = None, *, sql = None):
@@ -1121,7 +1178,7 @@ class udf(ast_node):
                 
         
     def produce(self, node):
-        from engine.utils import get_legal_name, check_legal_name
+        from engine.utils import check_legal_name, get_legal_name
         node = node[self.name]
         # register udf
         self.agg = 'Agg' in node
@@ -1216,7 +1273,7 @@ class udf(ast_node):
                     
                     
     def consume(self, node):
-        from engine.utils import get_legal_name, check_legal_name
+        from engine.utils import check_legal_name, get_legal_name
         node = node[self.name]
                     
         if 'params' in node:
@@ -1339,4 +1396,5 @@ def include(objs):
             
             
 import sys
+
 include(sys.modules[__name__])
diff --git a/reconstruct/expr.py b/reconstruct/expr.py
index 4fd483b..f1e3d5a 100644
--- a/reconstruct/expr.py
+++ b/reconstruct/expr.py
@@ -1,7 +1,8 @@
 from typing import Optional, Set
+
+from engine.types import *
 from reconstruct.ast import ast_node
 from reconstruct.storage import ColRef, Context
-from engine.types import *
 
 # TODO: Decouple expr and upgrade architecture
 # C_CODE : get ccode/sql code?
@@ -31,6 +32,7 @@ class expr(ast_node):
     
     def __init__(self, parent, node, *, c_code = None, supress_undefined = False):
         from reconstruct.ast import projection, udf
+
         # gen2 expr have multi-passes
         # first pass parse json into expr tree
         # generate target code in later passes upon need
@@ -78,7 +80,7 @@ class expr(ast_node):
         ast_node.__init__(self, parent, node, None)
 
     def init(self, _):
-        from reconstruct.ast import projection, _tmp_join_union
+        from reconstruct.ast import _tmp_join_union, projection
         parent = self.parent
         self.is_compound = parent.is_compound if type(parent) is expr else False
         if type(parent) in [projection, expr, _tmp_join_union]:
diff --git a/reconstruct/storage.py b/reconstruct/storage.py
index d54db52..2873747 100644
--- a/reconstruct/storage.py
+++ b/reconstruct/storage.py
@@ -1,12 +1,14 @@
+from typing import Dict, List, Set
+
 from engine.types import *
 from engine.utils import CaseInsensitiveDict, base62uuid, enlist
-from typing import List, Dict, Set
+
 
 class ColRef:
     def __init__(self, _ty, cobj, table:'TableInfo', name, id, compound = False, _ty_args = None):
         self.type : Types = AnyT
         if type(_ty) is str:
-            self.type = builtin_types[_ty.lower()]
+            self.type = Types.decode(_ty)
             if _ty_args:
                 self.type = self.type(enlist(_ty_args))
         elif type(_ty) is Types:
@@ -17,6 +19,7 @@ class ColRef:
         self.alias = set()
         self.id = id # position in table
         self.compound = compound # compound field (list as a field) 
+        self.cxt_name = ''
         # e.g. order by, group by, filter by expressions
         
         self.__arr__ = (_ty, cobj, table, name, id)
diff --git a/server/vector_type.hpp b/server/vector_type.hpp
index 9b03e89..f0d4cc6 100644
--- a/server/vector_type.hpp
+++ b/server/vector_type.hpp
@@ -159,6 +159,10 @@ public:
 		grow();
 		container[size++] = _val;
 	}
+	void emplace_back(_Ty& _val) {
+		grow();
+		container[size++] = std::move(_val);
+	}
 	void emplace_back(_Ty&& _val) {
 		grow();
 		container[size++] = std::move(_val);
diff --git a/tests/complex_data.a b/tests/complex_data.a
new file mode 100644
index 0000000..e08da4b
--- /dev/null
+++ b/tests/complex_data.a
@@ -0,0 +1,3 @@
+create table f (a float, b vecfloat, c int)
+load complex data infile 'data/test_complex.csv' into table f fields terminated by ',' element terminated by ';'
+select * from f
\ No newline at end of file
diff --git a/tests/q4.a b/tests/q4.a
index 4237b16..d38a246 100644
--- a/tests/q4.a
+++ b/tests/q4.a
@@ -17,4 +17,10 @@ LOAD DATA INFILE "data/ticks.csv" INTO TABLE TICKS FIELDS TERMINATED BY ","
 SELECT max(endofdayprice/prev(endofdayprice)) as Max_Ratio
 FROM ticks
 ASSUMING ASC date
-WHERE ID = "3001"
\ No newline at end of file
+WHERE ID = "3001"
+
+CREATE TABLE ticks2(ID VARCHAR(20), max REAL, min REAL)
+INSERT INTO ticks2 SELECT ID AS ID, max(ratios(endofdayprice)) AS max, min(ratios(endofdayprice)) AS min from ticks  group by ID;
+
+SELECT ID, max, min
+FROM ticks2;
\ No newline at end of file

From d5382c36e93be08bb0df504b199b088fbf40a01c Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Fri, 21 Oct 2022 14:52:01 +0800
Subject: [PATCH 02/30] bug fixes

---
 .gitignore                   |  1 +
 Dockerfile                   |  2 +-
 data/test_complex.csv        |  6 ++++++
 reconstruct/ast.py           |  2 +-
 server/table_ext_monetdb.hpp | 12 ++++++------
 server/vector_type.hpp       |  3 +++
 tests/dt2.a                  | 26 ++++++++++++++++++++++++++
 tests/q1.sql                 |  4 +++-
 8 files changed, 47 insertions(+), 9 deletions(-)
 create mode 100644 data/test_complex.csv
 create mode 100644 tests/dt2.a

diff --git a/.gitignore b/.gitignore
index 4807b2c..644be8b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -57,6 +57,7 @@ test*.c*
 !moving_avg.csv
 !nyctx100.csv
 !network.csv
+!test_complex.csv
 *.out
 *.asm
 !mmw.so
diff --git a/Dockerfile b/Dockerfile
index aac0a4f..953d89f 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,6 +1,6 @@
 FROM ubuntu:latest
 
-RUN cp /bin/bash /bin/sh
+# RUN cp /bin/bash /bin/sh
 
 RUN apt update && apt install -y wget
 
diff --git a/data/test_complex.csv b/data/test_complex.csv
new file mode 100644
index 0000000..efd7b3e
--- /dev/null
+++ b/data/test_complex.csv
@@ -0,0 +1,6 @@
+a,b,c
+5e-3, 3;4 ;5e-3;6.32,7
+1,2,3
+4,5;6;7;8;9, 0
+    3 ,2 ; 4; 5.7; -.3; 5., 6
+-3.12312,-4E+7;67456746744567;75,4
diff --git a/reconstruct/ast.py b/reconstruct/ast.py
index 90615ac..d82ebce 100644
--- a/reconstruct/ast.py
+++ b/reconstruct/ast.py
@@ -1068,7 +1068,7 @@ class load(ast_node):
             self.context.emitc(f'{t} {n};')
         self.context.emitc(f'while({csv_reader_name}.read_row({",".join(col_tmp_names)})) {{ \n')
         for i, c in enumerate(table.columns):
-            self.context.emitc(f'print({col_tmp_names[i]});')
+            # self.context.emitc(f'print({col_tmp_names[i]});')
             self.context.emitc(f'{c.cxt_name}.emplace_back({col_tmp_names[i]});')
             
         self.context.emitc('}')
diff --git a/server/table_ext_monetdb.hpp b/server/table_ext_monetdb.hpp
index c128559..3c93c3f 100644
--- a/server/table_ext_monetdb.hpp
+++ b/server/table_ext_monetdb.hpp
@@ -45,16 +45,16 @@ void TableInfo<Ts ...>::monetdb_append_table(void* srv, const char* alt_name) {
 	puts("getcols...");
 	uint32_t cnt = 0;
 	const auto get_col = [&monetdbe_cols, &i, *this, &gc_vecs, &cnt](auto v) {
-		printf("%d %d\n", i, (ColRef<void>*)v - colrefs);
+		// printf("%d %d\n", i, (ColRef<void>*)v - colrefs);
 		monetdbe_cols[i++] = (monetdbe_column*)v->monetdb_get_col(gc_vecs, cnt);
 	};
 	(get_col((ColRef<Ts>*)(colrefs + i)), ...);
 	puts("getcols done");
-	for(int i = 0; i < sizeof...(Ts); ++i)
-	{
-		printf("no:%d name: %s count:%d data: %p type:%d \n", 
-		i, monetdbe_cols[i]->name, monetdbe_cols[i]->count, monetdbe_cols[i]->data, monetdbe_cols[i]->type);
-	}
+	// for(int i = 0; i < sizeof...(Ts); ++i)
+	// {
+	// 	printf("no:%d name: %s count:%d data: %p type:%d \n", 
+	// 	i, monetdbe_cols[i]->name, monetdbe_cols[i]->count, monetdbe_cols[i]->data, monetdbe_cols[i]->type);
+	// }
 	std::string create_table_str = "CREATE TABLE IF NOT EXISTS ";
 	create_table_str += alt_name;
 	create_table_str += " (";
diff --git a/server/vector_type.hpp b/server/vector_type.hpp
index f0d4cc6..620e14d 100644
--- a/server/vector_type.hpp
+++ b/server/vector_type.hpp
@@ -71,6 +71,9 @@ public:
 	constexpr explicit vector_type(const vector_type<_Ty>& vt) noexcept : capacity(0) {
 		_copy(vt);
 	}
+	constexpr vector_type(vector_type<_Ty>& vt) noexcept : capacity(0) {
+		_move(std::move(vt));
+	}
 	constexpr vector_type(vector_type<_Ty>&& vt) noexcept : capacity(0) {
 		_move(std::move(vt));
 	}
diff --git a/tests/dt2.a b/tests/dt2.a
new file mode 100644
index 0000000..0f9dc7f
--- /dev/null
+++ b/tests/dt2.a
@@ -0,0 +1,26 @@
+LOAD MODULE FROM "./libirf.so"
+FUNCTIONS (
+    newtree(height:int, f:int64, sparse:vecint64, forget:double, maxf:int64, noclasses:int64, e:int, r:int64, rb:int64) -> bool,
+    fit(X:vecvecdouble, y:vecint64) -> bool,
+    predict(X:vecvecdouble) -> vecint64
+);
+
+create table source(x1 double, x2 double, x3 double, x4 double, x5 int64);
+load data infile "data/benchmark" into table source fields terminated by ",";
+
+create table sparse(x int64);
+insert into sparse values (1);
+insert into sparse values (1);
+insert into sparse values (1);
+insert into sparse values (1);
+
+select * from source;
+
+select newtree(6, 4, sparse.x, 0, 4, 2, 0, 400, 2147483647) from sparse;
+
+select fit(pack(x1, x2, x3, x4), x5) from source limit 100;
+select fit(pack(x1, x2, x3, x4), x5) from source limit 100;
+select fit(pack(x1, x2, x3, x4), x5) from source limit 100;
+select fit(pack(x1, x2, x3, x4), x5) from source limit 100;
+
+select predict(pack(x1, x2, x3, x4)) from source limit 100;
\ No newline at end of file
diff --git a/tests/q1.sql b/tests/q1.sql
index 747b83b..eab8904 100644
--- a/tests/q1.sql
+++ b/tests/q1.sql
@@ -7,4 +7,6 @@ FIELDS TERMINATED BY ","
 SELECT sum(c), b, d
 FROM testq1
 group by a,b,d
-order by d DESC, b ASC
+order by d DESC, b ASC;
+
+-- aaaa
\ No newline at end of file

From 259d9ef5665c5030021bdd7020691c8e027d7d93 Mon Sep 17 00:00:00 2001
From: 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 <zyuan04@syr.edu>
Date: Fri, 21 Oct 2022 02:59:00 -0400
Subject: [PATCH 03/30] bug

---
 sdk/Evaluation.cpp |  3 +--
 sdk/irf.cpp        | 42 ++++++++++++++++--------------------------
 tests/dt.a         | 34 +++++++++++++++++-----------------
 3 files changed, 34 insertions(+), 45 deletions(-)

diff --git a/sdk/Evaluation.cpp b/sdk/Evaluation.cpp
index 3683597..8e347a7 100644
--- a/sdk/Evaluation.cpp
+++ b/sdk/Evaluation.cpp
@@ -5,14 +5,13 @@
 
 struct minEval{
         double value;
-        double values;
+        int* values;
 
 	double eval;
         long left; // how many on its left
         double* record;
         long max;
         long** count;
-        long* sorted; // sorted d
 };
 
 minEval giniSparse(double** data, long* result, long* d, long size, long col, long classes, long* totalT){
diff --git a/sdk/irf.cpp b/sdk/irf.cpp
index 8433c95..73eef77 100644
--- a/sdk/irf.cpp
+++ b/sdk/irf.cpp
@@ -4,9 +4,6 @@
 #include "../server/table.h"
 
 DecisionTree* dt = nullptr;
-long pt = 0;
-double** data = nullptr;
-long* result = nullptr;
 
 __AQEXPORT__(bool) newtree(int height, long f, ColRef<int> sparse, double forget, long maxf, long noclasses, Evaluation e, long r, long rb){
 	if(sparse.size!=f)return 0;
@@ -19,36 +16,29 @@ __AQEXPORT__(bool) newtree(int height, long f, ColRef<int> sparse, double forget
 	return 1;
 }
 
-__AQEXPORT__(bool) additem(ColRef<double>X, long y, long size){
-	long j = 0;
-	if(size>0){
-		free(data);
-		free(result);
-		pt = 0;
-		data=(double**)malloc(size*sizeof(double*));
-		result=(long*)malloc(size*sizeof(long));
+__AQEXPORT__(bool) fit(ColRef<ColRef<double>> X, ColRef<int> y){
+	if(X.size != y.size)return 0;
+	double** data = (double**)malloc(X.size*sizeof(double*));
+	long* result = (long*)malloc(y.size*sizeof(long));
+	for(long i=0; i<X.size; i++){
+		data[i] = X.container[i].container;
+		result[i] = y.container[i];
 	}
-	data[pt] = (double*)malloc(X.size*sizeof(double));
-	for(j=0; j<X.size; j++){
-		data[pt][j]=X.container[j];
-	}
-	result[pt] = y;
-	pt ++;
-	return 1;
-}
-__AQEXPORT__(bool) fit(){
-	if(pt<=0)return 0;
-	dt->fit(data, result, pt);
+	dt->fit(data, result, X.size);
 	return 1;
 }
 
-__AQEXPORT__(ColRef_storage) predict(){
-	int* result = (int*)malloc(pt*sizeof(int));
-	for(long i=0; i<pt; i++){
+__AQEXPORT__(ColRef_storage) predict(ColRef<ColRef<double>> X){
+        double** data = (double**)malloc(X.size*sizeof(double*));
+        int* result = (int*)malloc(X.size*sizeof(int));
+        for(long i=0; i<X.size; i++){
+                data[i] = X.container[i].container;
+        }
+	for(long i=0; i<X.size; i++){
 		result[i]=dt->Test(data[i], dt->DTree);
 	}
 	
-	return ColRef_storage(new ColRef_storage(result, pt, 0, "prediction", 0), 1, 0, "prediction", 0);
+	return ColRef_storage(new ColRef_storage(result, X.size, 0, "prediction", 0), 1, 0, "prediction", 0);
 }
 
 
diff --git a/tests/dt.a b/tests/dt.a
index 5a52ac1..abfc6a6 100644
--- a/tests/dt.a
+++ b/tests/dt.a
@@ -1,21 +1,21 @@
 LOAD MODULE FROM "./libirf.so"
 FUNCTIONS (
     newtree(height:int, f:int64, sparse:vecint, forget:double, maxf:int64, noclasses:int64, e:int, r:int64, rb:int64) -> bool,
-    additem(X:vecdouble, y:int64, size:int64) -> bool,
-    fit() -> bool,
-    predict() -> vecint
+    fit(X:vecvecdouble, y:vecint) -> bool,
+    predict(X:vecvecdouble) -> vecint
 );
-create table tb(x int);
-create table tb2(x double, y double, z double);
-insert into tb values (0);
-insert into tb values (0);
-insert into tb values (0);
-select newtree(5, 3, tb.x, 0, 3, 2, 0, 100, 1) from tb;
-insert into tb2 values (1, 0, 1);
-insert into tb2 values (0, 1, 1);
-insert into tb2 values (1, 1, 1);
-select additem(tb2.x, 1, 3) from tb2;
-select additem(tb2.y, 0, -1) from tb2;
-select additem(tb2.z, 1, -1) from tb2;
-select fit();
-select predict();
+
+create table source(x1 double, x2 double, x3 double, x4 double, x5 int);
+load data infile "data/benchmark" into table source fields terminated by ",";
+
+create table sparse(x int);
+insert into sparse values (1);
+insert into sparse values (1);
+insert into sparse values (1);
+insert into sparse values (1);
+
+select newtree(6, 4, sparse.x, 0, 4, 2, 0, 400, 2147483647) from sparse;
+
+select fit(pack(x1, x2, x3, x4), x5) from source;
+
+select predict(pack(x1, x2, x3, x4)) from source;

From 5549706443f70ab6dd57421006c09b4fd03d16f5 Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Sun, 23 Oct 2022 05:47:53 +0800
Subject: [PATCH 04/30] fixed issue for user module

---
 .gitignore             |  1 +
 Makefile               |  5 +++--
 reconstruct/ast.py     |  6 +++---
 sdk/Makefile           |  8 +++++++-
 sdk/irf.cpp            | 33 +++++++++++++++++++++++----------
 server/table.h         | 17 ++++++++++++++++-
 server/vector_type.hpp |  3 +++
 tests/dt.a             | 40 ++++++++++++++++++++--------------------
 tests/dt2.a            | 36 ++++++++++++++++--------------------
 tests/q1.sql           |  2 --
 10 files changed, 92 insertions(+), 59 deletions(-)

diff --git a/.gitignore b/.gitignore
index 644be8b..508685f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -51,6 +51,7 @@ k
 **/Debug
 **/Release
 test*.c*
+data/benchmark
 *.csv
 !test.csv
 !test2.csv
diff --git a/Makefile b/Makefile
index dd7747e..1707240 100644
--- a/Makefile
+++ b/Makefile
@@ -4,11 +4,12 @@ MonetDB_INC =
 Threading = 
 CXXFLAGS = --std=c++1z
 ifeq ($(AQ_DEBUG), 1)
-	OPTFLAGS = -g3 
+	OPTFLAGS = -g3 -fsanitize=address -fsanitize=leak
+	LINKFLAGS = 
 else
 	OPTFLAGS = -O3 -DNDEBUG -fno-stack-protector 
+	LINKFLAGS = -flto
 endif
-LINKFLAGS = -flto # + $(AQ_LINK_FLAG)
 SHAREDFLAGS = -shared  
 FPIC = -fPIC
 COMPILER = $(shell $(CXX) --version | grep -q clang && echo clang|| echo gcc) 
diff --git a/reconstruct/ast.py b/reconstruct/ast.py
index d82ebce..173399b 100644
--- a/reconstruct/ast.py
+++ b/reconstruct/ast.py
@@ -343,7 +343,7 @@ class projection(ast_node):
                     )
                 else:
                     # for funcs evaluate f_i(x, ...)
-                    self.context.emitc(f'{self.out_table.contextname_cpp}->get_col<{key}>() = {val[1]};')
+                    self.context.emitc(f'{self.out_table.contextname_cpp}->get_col<{key}>().initfrom({val[1]}, "{cols[i].name}");')
         # print out col_is
         if 'into' not in node:
             self.context.emitc(f'print(*{self.out_table.contextname_cpp});')
@@ -990,7 +990,7 @@ class load(ast_node):
                 self.context.queries.append(f'F{fname}')
                 ret_type = VoidT
                 if 'ret_type' in f:
-                    ret_type = Types.decode(f['ret_type'])
+                    ret_type = Types.decode(f['ret_type'], vector_type='vector_type')
                 nargs = 0
                 arglist = ''
                 if 'vars' in f:
@@ -1000,7 +1000,7 @@ class load(ast_node):
                     nargs = len(arglist)
                     arglist = ', '.join(arglist)
                 # create c++ stub 
-                cpp_stub = f'{ret_type.cname} (*{fname})({arglist}) = nullptr;'
+                cpp_stub = f'{"vectortype_cstorage" if isinstance(ret_type, VectorT) else ret_type.cname} (*{fname})({arglist}) = nullptr;'
                 self.context.module_stubs += cpp_stub + '\n'
                 self.context.module_map[fname] = cpp_stub
                 #registration for parser
diff --git a/sdk/Makefile b/sdk/Makefile
index 7bd5c8c..b146a81 100644
--- a/sdk/Makefile
+++ b/sdk/Makefile
@@ -1,5 +1,11 @@
+OPT_FLASG = 
+ifneq ($(DEBUG), 1)
+	OPT_FLAGS = -Ofast -march=native -flto -DNDEBUG 
+else 
+	OPT_FLAGS = -g3 -D_DEBUG -fsanitize=leak -fsanitize=address
+endif
 example:
 	$(CXX) -shared -fPIC example.cpp aquery_mem.cpp -fno-semantic-interposition -Ofast -march=native -flto --std=c++1z -o ../test.so
 irf:
-	$(CXX) -shared -fPIC RF.cpp irf.cpp incrementalDecisionTree.cpp aquery_mem.cpp Evaluation.cpp -fno-semantic-interposition -Ofast -march=native -flto --std=c++1z -o ../libirf.so
+	$(CXX) -shared -fPIC RF.cpp irf.cpp incrementalDecisionTree.cpp aquery_mem.cpp Evaluation.cpp -fno-semantic-interposition $(OPT_FLAGS) --std=c++1z -o ../libirf.so
 all: example
diff --git a/sdk/irf.cpp b/sdk/irf.cpp
index 8433c95..36cf4c2 100644
--- a/sdk/irf.cpp
+++ b/sdk/irf.cpp
@@ -36,19 +36,32 @@ __AQEXPORT__(bool) additem(ColRef<double>X, long y, long size){
 	pt ++;
 	return 1;
 }
-__AQEXPORT__(bool) fit(){
-	if(pt<=0)return 0;
-	dt->fit(data, result, pt);
-	return 1;
+__AQEXPORT__(bool) fit(vector_type<vector_type<double>> v, vector_type<long> res){
+	double** data = (double**)malloc(v.size*sizeof(double*));
+	for(int i = 0; i < v.size; ++i)
+		data[i] = v.container[i].container;
+	dt->fit(data, res.container, v.size);
+	return true;
 }
 
-__AQEXPORT__(ColRef_storage) predict(){
-	int* result = (int*)malloc(pt*sizeof(int));
-	for(long i=0; i<pt; i++){
-		result[i]=dt->Test(data[i], dt->DTree);
-	}
+__AQEXPORT__(vectortype_cstorage) predict(vector_type<vector_type<double>> v){
+	int* result = (int*)malloc(v.size*sizeof(int));
 	
-	return ColRef_storage(new ColRef_storage(result, pt, 0, "prediction", 0), 1, 0, "prediction", 0);
+	for(long i=0; i<v.size; i++){
+		result[i]=dt->Test(v.container[i].container, dt->DTree);
+		//printf("%d ", result[i]);
+	}
+	auto container = (vector_type<int>*)malloc(sizeof(vector_type<int>));
+	container->size = v.size;
+	container->capacity = 0;
+	container->container = result;
+	// container->out(10);
+	// ColRef<vector_type<int>>* col = (ColRef<vector_type<int>>*)malloc(sizeof(ColRef<vector_type<int>>));
+	auto ret = vectortype_cstorage{.container = container, .size = 1, .capacity = 0};
+	// col->initfrom(ret, "sibal");
+	// print(*col);
+	return ret;
+	//return true;
 }
 
 
diff --git a/server/table.h b/server/table.h
index 56c7a4b..f3911af 100644
--- a/server/table.h
+++ b/server/table.h
@@ -74,7 +74,7 @@ public:
 		this->container = (_Ty*)container;
 		this->name = name;
 	}
-	template<template <typename ...> class VT, typename T>
+	template<template <typename> class VT, typename T>
 	void initfrom(const VT<T>& v, const char* name = "") {
 		ty = types::Types<_Ty>::getType();
 		this->size = v.size;
@@ -82,6 +82,21 @@ public:
 		this->container = (_Ty*)(v.container);
 		this->name = name;
 	}
+	void initfrom(vectortype_cstorage v, const char* name = "") {
+		ty = types::Types<_Ty>::getType();
+		this->size = v.size;
+		this->capacity = v.capacity;
+		this->container = (_Ty*)v.container;
+		this->name = name;
+	}
+	template<typename T>
+	void initfrom(const T& v, const char* name = "") {
+		ty = types::Types<_Ty>::getType();
+		this->size = 0;
+		this->capacity = 0;
+		this->emplace_back(v);
+		this->name = name;
+	}
 	template <class T>
 	ColRef<_Ty>& operator =(ColRef<T>&& vt) {
 		this->container = (_Ty*)vt.container;
diff --git a/server/vector_type.hpp b/server/vector_type.hpp
index 620e14d..98d79f5 100644
--- a/server/vector_type.hpp
+++ b/server/vector_type.hpp
@@ -77,6 +77,9 @@ public:
 	constexpr vector_type(vector_type<_Ty>&& vt) noexcept : capacity(0) {
 		_move(std::move(vt));
 	}
+	vector_type(vectortype_cstorage vt) noexcept : capacity(vt.capacity), size(vt.size), container((_Ty*)vt.container) {
+		out(10);
+	};
 	// size >= capacity ==> readonly vector
 	constexpr vector_type(const uint32_t size, void* data) : 
 		size(size), capacity(0), container(static_cast<_Ty*>(data)) {}
diff --git a/tests/dt.a b/tests/dt.a
index 5a52ac1..9ac773b 100644
--- a/tests/dt.a
+++ b/tests/dt.a
@@ -1,21 +1,21 @@
 LOAD MODULE FROM "./libirf.so"
-FUNCTIONS (
-    newtree(height:int, f:int64, sparse:vecint, forget:double, maxf:int64, noclasses:int64, e:int, r:int64, rb:int64) -> bool,
-    additem(X:vecdouble, y:int64, size:int64) -> bool,
-    fit() -> bool,
-    predict() -> vecint
-);
-create table tb(x int);
-create table tb2(x double, y double, z double);
-insert into tb values (0);
-insert into tb values (0);
-insert into tb values (0);
-select newtree(5, 3, tb.x, 0, 3, 2, 0, 100, 1) from tb;
-insert into tb2 values (1, 0, 1);
-insert into tb2 values (0, 1, 1);
-insert into tb2 values (1, 1, 1);
-select additem(tb2.x, 1, 3) from tb2;
-select additem(tb2.y, 0, -1) from tb2;
-select additem(tb2.z, 1, -1) from tb2;
-select fit();
-select predict();
+ FUNCTIONS (
+     newtree(height:int, f:int64, sparse:vecint, forget:double, maxf:int64, noclasses:int64, e:int, r:int64, rb:int64) -> bool,
+     additem(X:vecdouble, y:int64, size:int64) -> bool,
+     fit() -> bool,
+     predict() -> vecint
+ );
+ create table tb(x int);
+ create table tb2(x double, y double, z double);
+ insert into tb values (0);
+ insert into tb values (0);
+ insert into tb values (0);
+ select newtree(5, 3, tb.x, 0, 3, 2, 0, 100, 1) from tb;
+ insert into tb2 values (1, 0, 1);
+ insert into tb2 values (0, 1, 1);
+ insert into tb2 values (1, 1, 1);
+ select additem(tb2.x, 1, 3) from tb2;
+ select additem(tb2.y, 0, -1) from tb2;
+ select additem(tb2.z, 1, -1) from tb2;
+ select fit();
+ select predict();
\ No newline at end of file
diff --git a/tests/dt2.a b/tests/dt2.a
index 0f9dc7f..bd2bde7 100644
--- a/tests/dt2.a
+++ b/tests/dt2.a
@@ -1,26 +1,22 @@
 LOAD MODULE FROM "./libirf.so"
-FUNCTIONS (
-    newtree(height:int, f:int64, sparse:vecint64, forget:double, maxf:int64, noclasses:int64, e:int, r:int64, rb:int64) -> bool,
-    fit(X:vecvecdouble, y:vecint64) -> bool,
-    predict(X:vecvecdouble) -> vecint64
-);
+ FUNCTIONS (
+     newtree(height:int, f:int64, sparse:vecint, forget:double, maxf:int64, noclasses:int64, e:int, r:int64, rb:int64) -> bool,
+     fit(X:vecvecdouble, y:vecint64) -> bool,
+     predict(X:vecvecdouble) -> vecint
+ );
 
-create table source(x1 double, x2 double, x3 double, x4 double, x5 int64);
-load data infile "data/benchmark" into table source fields terminated by ",";
+ create table source(x1 double, x2 double, x3 double, x4 double, x5 int64);
+ load data infile "data/benchmark" into table source fields terminated by ",";
 
-create table sparse(x int64);
-insert into sparse values (1);
-insert into sparse values (1);
-insert into sparse values (1);
-insert into sparse values (1);
+ create table sparse(x int);
+ insert into sparse values (1);
+ insert into sparse values (1);
+ insert into sparse values (1);
+ insert into sparse values (1);
 
-select * from source;
+ select newtree(6, 4, sparse.x, 0, 4, 2, 0, 400, 2147483647) from sparse
 
-select newtree(6, 4, sparse.x, 0, 4, 2, 0, 400, 2147483647) from sparse;
+ select fit(pack(x1, x2, x3, x4), x5) from source
 
-select fit(pack(x1, x2, x3, x4), x5) from source limit 100;
-select fit(pack(x1, x2, x3, x4), x5) from source limit 100;
-select fit(pack(x1, x2, x3, x4), x5) from source limit 100;
-select fit(pack(x1, x2, x3, x4), x5) from source limit 100;
-
-select predict(pack(x1, x2, x3, x4)) from source limit 100;
\ No newline at end of file
+-- select pack(x1, x2, x3, x4) from source
+  select predict(pack(x1, x2, x3, x4)) from source
\ No newline at end of file
diff --git a/tests/q1.sql b/tests/q1.sql
index eab8904..b57c2d1 100644
--- a/tests/q1.sql
+++ b/tests/q1.sql
@@ -8,5 +8,3 @@ SELECT sum(c), b, d
 FROM testq1
 group by a,b,d
 order by d DESC, b ASC;
-
--- aaaa
\ No newline at end of file

From 31d823ac89b9fee2e4cfd99c9573f12ad0bfde80 Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Wed, 26 Oct 2022 05:37:49 +0800
Subject: [PATCH 05/30] fixed bugs wrt sp groupbys, insert multiple values

---
 csv.h                  |   2 +-
 engine/utils.py        |  10 ++++
 prompt.py              |   2 +-
 reconstruct/ast.py     | 116 ++++++++++++++++++++++++++++++++---------
 reconstruct/expr.py    |  13 ++++-
 reconstruct/storage.py |   5 ++
 tests/simple2          |  20 +++++++
 tests/udf5.a           |  10 ++++
 8 files changed, 150 insertions(+), 28 deletions(-)
 create mode 100644 tests/simple2
 create mode 100644 tests/udf5.a

diff --git a/csv.h b/csv.h
index c0d1762..6b10915 100644
--- a/csv.h
+++ b/csv.h
@@ -1125,7 +1125,7 @@ namespace io{
         }
 
         template<unsigned column_count,
-                char sep2 = -2,
+                char sep2 = ';',
                 class trim_policy = trim_chars<' ', '\t'>,
                 class quote_policy = no_quote_escape<','>,
                 class overflow_policy = throw_on_overflow,
diff --git a/engine/utils.py b/engine/utils.py
index dc7f2bc..8e65fcd 100644
--- a/engine/utils.py
+++ b/engine/utils.py
@@ -148,3 +148,13 @@ def clamp(val, minval, maxval):
 
 def escape_qoutes(string : str):
     return re.sub(r'^\'', r'\'',re.sub(r'([^\\])\'', r'\1\'', string))
+
+def get_innermost(sl):
+    if sl and type(sl) is dict:
+        if 'literal' in sl and type(sl['literal']) is str:
+            return f"'{get_innermost(sl['literal'])}'"
+        return get_innermost(next(iter(sl.values()), None))
+    elif sl and type(sl) is list:
+        return get_innermost(sl[0])
+    else:
+        return sl
\ No newline at end of file
diff --git a/prompt.py b/prompt.py
index c6a00dd..9c486f1 100644
--- a/prompt.py
+++ b/prompt.py
@@ -576,7 +576,7 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
                 state.stats.print(clear = False)
                 continue
             trimed = ws.sub(' ', og_q).split(' ') 
-            if trimed[0].lower().startswith('f'):
+            if len(trimed) > 1 and trimed[0].lower().startswith('fi') or trimed[0].lower() == 'f':
                 fn = 'stock.a' if len(trimed) <= 1 or len(trimed[1]) == 0 \
                                 else trimed[1]
                 try:
diff --git a/reconstruct/ast.py b/reconstruct/ast.py
index 173399b..b8228c1 100644
--- a/reconstruct/ast.py
+++ b/reconstruct/ast.py
@@ -1,10 +1,12 @@
+from binascii import Error
 from copy import deepcopy
 from dataclasses import dataclass
 from enum import Enum, auto
 from typing import Dict, List, Optional, Set, Tuple, Union
 
 from engine.types import *
-from engine.utils import base62alp, base62uuid, enlist, get_legal_name
+from engine.utils import (base62alp, base62uuid, enlist, get_innermost,
+                          get_legal_name)
 from reconstruct.storage import ColRef, Context, TableInfo
 
 
@@ -58,6 +60,15 @@ class projection(ast_node):
     name = 'projection'
     first_order = 'select'
     
+    def __init__(self, 
+                 parent : Optional["ast_node"],
+                 node, 
+                 context : Optional[Context] = None,
+                 force_use_spgb : bool = False
+                ):
+        self.force_use_spgb = force_use_spgb
+        super().__init__(parent, node, context)
+        
     def init(self, _):
         # skip default init
         pass
@@ -104,7 +115,7 @@ class projection(ast_node):
         if type(self.datasource) is join:
             self.datasource.process_join_conditions()
         
-        if 'groupby' in node:
+        if 'groupby' in node: # if groupby clause contains special stuff
             self.context.special_gb = groupby.check_special(self, node['groupby'])
 
     def consume(self, node):
@@ -163,6 +174,11 @@ class projection(ast_node):
                         this_type = [c.type for c in _datasource]
                         compound = [c.compound for c in _datasource]
                         proj_expr = [expr(self, c.name) for c in _datasource]
+                        for pe in proj_expr:
+                            if pe.is_ColExpr:
+                                pe.cols_mentioned = {pe.raw_col}
+                            else:
+                                pe.cols_mentioned = set()
                     else:
                         y = lambda x:x
                         count = lambda : 'count(*)'
@@ -208,8 +224,14 @@ class projection(ast_node):
         
         self.out_table.add_cols(cols, new = False)
         
+        self.proj_map = proj_map
+        
         if 'groupby' in node:
             self.group_node = groupby(self, node['groupby'])
+            if self.group_node.terminate:
+                self.context.abandon_query()
+                projection(self.parent, node, self.context, True)
+                return
             if self.group_node.use_sp_gb:
                 self.has_postproc = True
         else:
@@ -588,6 +610,10 @@ class groupby(ast_node):
                 return True
         return False
 
+    def init(self, _):
+        self.terminate = False
+        super().init(_)
+        
     def produce(self, node):
         if not isinstance(self.parent, projection):
             raise ValueError('groupby can only be used in projection')
@@ -595,6 +621,7 @@ class groupby(ast_node):
         node = enlist(node)
         o_list = []
         self.refs = set()
+        self.gb_cols = set()
         self.dedicated_glist : List[Tuple[expr, Set[ColRef]]] = []
         self.use_sp_gb = False
         for g in node:
@@ -612,7 +639,23 @@ class groupby(ast_node):
             if 'sort' in g and f'{g["sort"]}'.lower() == 'desc':
                 g_str = g_str + ' ' + 'DESC'
             o_list.append(g_str)
-            
+            if g_expr.is_ColExpr:
+                self.gb_cols.add(g_expr.raw_col)
+            else:
+                self.gb_cols.add(g_expr.sql)
+                
+        for projs in self.parent.proj_map.values():
+            if self.use_sp_gb:
+                break
+            if (projs[2].is_compound and 
+                not ((projs[2].is_ColExpr and projs[2].raw_col in self.gb_cols) or
+                projs[2].sql in self.gb_cols)
+                ):
+                if self.parent.force_use_spgb:
+                    self.use_sp_gb = True
+                else:    
+                    self.terminate = True
+                    return
         if not self.use_sp_gb:
             self.dedicated_gb = None
             self.add(', '.join(o_list))
@@ -917,35 +960,60 @@ class insert(ast_node):
     name = 'insert'
     first_order = name
     def init(self, node):
-        values = node['query']
-        complex_query_kw = ['from', 'where', 'groupby', 'having', 'orderby', 'limit']
-        if any([kw in values for kw in complex_query_kw]):
-            values['into'] = node['insert']
-            proj_cls = (select_distinct 
-            if 'select_distinct' in values 
-            else projection)
-            proj_cls(None, values, self.context)
-            self.produce = lambda*_:None
-            self.spawn = lambda*_:None
-            self.consume = lambda*_:None
+        if 'query' in node:
+            values = node['query']
+            complex_query_kw = ['from', 'where', 'groupby', 'having', 'orderby', 'limit']
+            if any([kw in values for kw in complex_query_kw]):
+                values['into'] = node['insert']
+                proj_cls = (select_distinct 
+                if 'select_distinct' in values 
+                else projection)
+                proj_cls(None, values, self.context)
+                self.produce = lambda*_:None
+                self.spawn = lambda*_:None
+                self.consume = lambda*_:None
         else:
             super().init(node)
             
     def produce(self, node):
-        values = node['query']['select']
+        keys = []
+        if 'query' in node:
+            if 'select' in node['query']:
+                values = enlist(node['query']['select'])
+                if 'columns' in node:
+                    keys = node['columns']
+                values = [v['value'] for v in values]
+
+            elif 'union_all' in node['query']:
+                values = [[v['select']['value']] for v in node['query']['union_all']]
+                if 'columns' in node:
+                    keys = node['columns']
+        else:
+            values = enlist(node['values'])
+            _vals = []
+            for v in values:
+                if isinstance(v, dict):
+                    keys = v.keys()
+                    v = list(v.values())
+                _vals.append(v)
+            values = _vals
+            
+        keys = f'({", ".join(keys)})' if keys else ''
         tbl = node['insert']
-        self.sql = f'INSERT INTO {tbl} VALUES('
+        self.sql = f'INSERT INTO {tbl}{keys} VALUES'
         # if len(values) != table.n_cols:
         #     raise ValueError("Column Mismatch")
-
+        values = [values] if isinstance(values, list) and not isinstance(values[0], list) else values
         list_values = []
-        for i, s in enumerate(enlist(values)):
-            if 'value' in s:
-                list_values.append(f"{s['value']}")
-            else:
-                # subquery, dispatch to select astnode
-                pass
-        self.sql += ', '.join(list_values) + ')'
+        for l in values:
+            inner_list_values = []
+            for s in enlist(l):
+                if type(s) is dict and 'value' in s:
+                    s = s['value']
+                inner_list_values.append(f"{get_innermost(s)}")
+            list_values.append(f"({', '.join(inner_list_values)})")
+            
+        self.sql += ', '.join(list_values) 
         
   
 class load(ast_node):
diff --git a/reconstruct/expr.py b/reconstruct/expr.py
index f1e3d5a..bfd552c 100644
--- a/reconstruct/expr.py
+++ b/reconstruct/expr.py
@@ -94,7 +94,7 @@ class expr(ast_node):
         
     def produce(self, node):
         from engine.utils import enlist
-        from reconstruct.ast import udf
+        from reconstruct.ast import udf, projection
         
         if type(node) is dict:
             if 'literal' in node:
@@ -169,7 +169,16 @@ class expr(ast_node):
                         special_func = [*self.context.udf_map.keys(), *self.context.module_map.keys(), 
                                         "maxs", "mins", "avgs", "sums", "deltas", "last", "first", 
                                         "ratios", "pack", "truncate"]
-                        if self.context.special_gb:
+                        
+                        if (
+                                self.context.special_gb 
+                                    or 
+                                (
+                                    type(self.root.parent) is projection 
+                                        and
+                                    self.root.parent.force_use_spgb
+                                )
+                           ):
                             special_func = [*special_func, *self.ext_aggfuncs]
                             
                         if key in special_func and not self.is_special:
diff --git a/reconstruct/storage.py b/reconstruct/storage.py
index 2873747..47eab9a 100644
--- a/reconstruct/storage.py
+++ b/reconstruct/storage.py
@@ -226,6 +226,11 @@ class Context:
         self.queries.append('P' + proc_name)    
         self.finalize_query()
         
+    def abandon_query(self):
+        self.sql = ''
+        self.ccode = ''
+        self.finalize_query()
+        
     def finalize_udf(self):
         if self.udf is not None:
             return (Context.udf_head 
diff --git a/tests/simple2 b/tests/simple2
new file mode 100644
index 0000000..d8f3d8c
--- /dev/null
+++ b/tests/simple2
@@ -0,0 +1,20 @@
+CREATE TABLE t(indiv INT, grp STRING, val INT)
+INSERT INTO t VALUES(1, 'A', 1)
+INSERT INTO t VALUES(1, 'A', 2)
+INSERT INTO t VALUES(1, 'A', 3)
+INSERT INTO t VALUES(1, 'A', 4)
+INSERT INTO t VALUES(2, 'A', 2)
+INSERT INTO t VALUES(2, 'A', 2)
+INSERT INTO t VALUES(2, 'A', 4)
+INSERT INTO t VALUES(2, 'A', 8)
+INSERT INTO t VALUES(3, 'B', 10)
+INSERT INTO t VALUES(3, 'B', 20)
+INSERT INTO t VALUES(3, 'B', 30)
+INSERT INTO t VALUES(3, 'B', 40)
+INSERT INTO t VALUES(4, 'B', 20)
+INSERT INTO t VALUES(4, 'B', 20)
+INSERT INTO t VALUES(4, 'B', 40)
+INSERT INTO t VALUES(4, 'B', 80)
+
+
+SELECT * FROM t 
\ No newline at end of file
diff --git a/tests/udf5.a b/tests/udf5.a
new file mode 100644
index 0000000..89e1d0f
--- /dev/null
+++ b/tests/udf5.a
@@ -0,0 +1,10 @@
+FUNCTION myCov(x, y) {
+  center_x := x - avg(x);
+  center_y := y - avg(y);
+  num := sum(center_x * center_y);
+  denom := sqrt(sum(center_x * center_x)) * sqrt(sum(center_y * center_y));
+  num / denom
+  }
+
+
+select myCov(1,2);
\ No newline at end of file

From 312076396657425960fede4968adabdf80ec6813 Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Wed, 26 Oct 2022 05:52:36 +0800
Subject: [PATCH 06/30] fixes recurring bug

---
 prompt.py                    |  6 +++---
 server/table.h               |  9 +++++++++
 tests/{simple2 => jose_gh.a} | 13 ++++++++++++-
 tests/udf5.a                 | 10 ----------
 4 files changed, 24 insertions(+), 14 deletions(-)
 rename tests/{simple2 => jose_gh.a} (70%)
 delete mode 100644 tests/udf5.a

diff --git a/prompt.py b/prompt.py
index 9c486f1..cc6cb15 100644
--- a/prompt.py
+++ b/prompt.py
@@ -499,17 +499,17 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
                 rm(state)
                 exit()
             elif q.startswith('sh'):
-                from distutils.spawn import find_executable
+                from shutil import which
                 qs = re.split(r'[ \t]', q)
                 shells = ('zsh', 'bash', 'sh', 'fish', 'cmd', 'pwsh', 'powershell', 'csh', 'tcsh', 'ksh')
                 shell_path = ''
                 if len(qs) > 1 and qs[1] in shells:
-                    shell_path = find_executable(qs[1])
+                    shell_path = which(qs[1])
                     if shell_path:
                         os.system(shell_path)
                 else:
                     for sh in shells:
-                        shell_path = find_executable(sh)
+                        shell_path = which(sh)
                         if shell_path:
                             os.system(shell_path)
                             break
diff --git a/server/table.h b/server/table.h
index f3911af..782397e 100644
--- a/server/table.h
+++ b/server/table.h
@@ -75,6 +75,15 @@ public:
 		this->name = name;
 	}
 	template<template <typename> class VT, typename T>
+	void initfrom(VT<T>&& v, const char* name = "") {
+		ty = types::Types<_Ty>::getType();
+		this->size = v.size;
+		this->capacity = v.capacity;
+		this->container = (_Ty*)(v.container);
+		this->name = name;
+		v.capacity = 0;
+	}
+	template<template <typename> class VT, typename T>
 	void initfrom(const VT<T>& v, const char* name = "") {
 		ty = types::Types<_Ty>::getType();
 		this->size = v.size;
diff --git a/tests/simple2 b/tests/jose_gh.a
similarity index 70%
rename from tests/simple2
rename to tests/jose_gh.a
index d8f3d8c..4589202 100644
--- a/tests/simple2
+++ b/tests/jose_gh.a
@@ -17,4 +17,15 @@ INSERT INTO t VALUES(4, 'B', 40)
 INSERT INTO t VALUES(4, 'B', 80)
 
 
-SELECT * FROM t 
\ No newline at end of file
+SELECT * FROM t 
+
+FUNCTION myCov(x, y) {
+  center_x := x - avg(x);
+  center_y := y - avg(y);
+  num := sum(center_x * center_y);
+  denom := sqrt(sum(center_x * center_x)) * sqrt(sum(center_y * center_y));
+  num / denom
+  }
+
+
+select myCov(1,2);
\ No newline at end of file
diff --git a/tests/udf5.a b/tests/udf5.a
deleted file mode 100644
index 89e1d0f..0000000
--- a/tests/udf5.a
+++ /dev/null
@@ -1,10 +0,0 @@
-FUNCTION myCov(x, y) {
-  center_x := x - avg(x);
-  center_y := y - avg(y);
-  num := sum(center_x * center_y);
-  denom := sqrt(sum(center_x * center_x)) * sqrt(sum(center_y * center_y));
-  num / denom
-  }
-
-
-select myCov(1,2);
\ No newline at end of file

From ba21da23a340fbd3d97367243b9e06c7fde6165a Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Fri, 28 Oct 2022 16:38:02 +0800
Subject: [PATCH 07/30] bug fixes, more documentations

---
 README.md          | 97 +++++++++++++++++++++++++++++++++++++++++++++-
 aquery_config.py   |  2 +-
 build.py           |  4 +-
 engine/types.py    |  7 +++-
 reconstruct/ast.py |  6 ++-
 server/table.h     |  1 -
 tests/q4.a         | 15 ++++++-
 7 files changed, 124 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index fbc1076..fc06b4f 100644
--- a/README.md
+++ b/README.md
@@ -125,18 +125,113 @@ See files in ./tests/ for more examples.
 - See `test.aquery` as an example
 
 # User Manual
+AQuery++ has similar syntax to standard SQL with extensions for time-series analysis and user extensibility.
+## Basic Grammar
+```
+program : [query | create | insert | load | udf ]*
+
+/********* Queries *********/
+query : [WITH ID ['('columns')'] AS '(' single-query ')'] single-query
+
+single-query : SELECT projections FROM datasource assumption where-clause groupby-clause
+
+projections: [val as ID | val] (, [val as ID | val])*
+
+datasource : ID [ID | AS ID] |
+  ID, datasource |
+  ID [INNER] JOIN datasource [USING columns | ON conditions] |
+  ID NATURAL JOIN datasource
+
+order-clause: ASSUMING ([ASC|DESC] ID)+
+
+where-clause: WHERE conditions;
+
+groupby-clause: GROUP BY expr (, expr )* [HAVING conditions]
+
+conditions: <a boolean expression>
+
+/********* Creating data *********/
+create: CREATE TABLE ID [AS query | '(' schema ')']
+schema: ID type (, ID type)*
+
+insert: INSERT INTO ID [query | VALUES '(' literals ')']
+literals: literal (, literal)*;
+
+/********* Loading/Saving data *********/
+load: LOAD DATA INFILE string INTO TABLE ID FIELDS TERMINATED BY string
+
+save: query INTO OUTFILE string FIELDS TERMINATED BY string
+
+/********* User defined functions *********/
+udf: FUNCTION ID '(' arg-list ')' '{' fun-body '}'
+arg_list: ID (, ID)*
+fun_body: [stmts] expr
+/********* See more udf grammar later. **********/
+
+stmts: stmt+ 
+stmt: assignment; | if-stmt | for-stmt | ;
+assignment: l_value := expr
+l_value: ID | ID '[' ID ']'
+
+if-stmt: if '(' expr ')' if-body [else (stmt|block) ]
+if-body: stmt | block (elif '(' expr ')' if-body)*
+
+for-stmt: for '(' assignment (, assignment)* ';' expr ';' assignment ')' for-body
+for-body: stmt|block
+
+block:  '{' [stmts] '}'
+
+/********* Expressions *********/
+expr: expr binop expr | fun_call | unaryop expr | ID | literal
+fun: ID | sqrt | avg[s] | count | deltas | distinct 
+  | first | last | max[s] | min[s] | next
+  | prev | sum[s] | ratios | <... To be added> 
+fun_call: fun '(' expr (, expr)* ')'
+binop: +|-|=|*|+=|-=|*=|/=|!=|<|>|>=|<=| and | or
+unaryop: +|-| not
+literal:  numbers | strings | booleans
+
+```
 ## Data Types
 - String Types: `STRING` and `TEXT` are variable-length strings with unlimited length. `VARCHAR(n)` is for strings with upper-bound limits.
 - Integer Types: `INT` and `INTEGER` are 32-bit integers, `SMALLINT` is for 16-bit integers, `TINYINT` is for 8-bit integers and `BIGINT` is 64-bit integers. On Linux and macOS, `HGEINT` is 128-bit integers. 
 - Floating-Point Types: `REAL` denotes 32-bit floating point numbers while `DOUBLE` denotes 64-bit floating point numbers. 
 - Temporal Types: `DATE` only supports the format of `yyyy-mm-dd`, and `TIME` uses 24-hour format and has the form of `hh:mm:ss:ms` the milliseconds part can range from 0 to 999, `TIMESTAMP` has the format of `yyyy-mm-dd hh:mm:ss:ms`. When importing data from CSV files, please make sure the spreadsheet software (if they were used) doesn't change the format of the date and timestamp by double-checking the file with a plain-text editor.
-- Boolean Type: `BOOLEAN` is a boolean type with values `TRUE` and `FALSE`.
+- Boolean Type: `BOOLEAN` or `BOOL` is a boolean type with values `TRUE` and `FALSE`.
 
+## Create Table
+Tables can be created using `CREATE TABLE` statement. For example
+```
+CREATE TABLE my_table (c1 INT, c2 INT, c3 STRING)
+INSERT INTO my_table VALUES(10, 20, "example")
+INSERT INTO my_table SELECT * FROM my_table
+```
+You can also create tables using a query. For example:
+```
+CREATE TABLE my_table_derived
+AS
+  SELECT c1, c2 * 2 as twice_c2 FROM my_table
+```
+## Drop Table:
+Tables can be dropped using `DROP TABLE` statement. For example:
+```
+DROP TABLE my_table IF EXISTS
+```
 ## Load Data:
 - Use query like `LOAD DATA INFILE <filename> INTO <table_name> [OPTIONS <options>]`
 - File name is the relative path to the AQuery root directory (where prompy.py resides)
 - File name can also be absolute path.
 - See `data/q1.sql` for more information 
+
+## Built-in functions: 
+- `avg[s]`: average of a column. `avgs(col), avgs(w, col)` is rolling and moving average with window `w` of the column `col`.
+- `sum[s]`, `max[s]`, `min[s]`: similar to `avg[s]`
+- `ratios(w = 1, col)`: moving ratio of a column, e.g. `ratios(w, col)[i]=col[i-w]/col[i]`. Window `w` has default value of 1.  
+- `next(col), prev(col)`: moving column back and forth by 1, e.g. `next(col)[i] = col[i+1]`.
+- `first(col), last(col)`: first and last value of a column, i.e. `first(col)= col[0]`, `last(col) = col[n-1]`.
+- `sqrt(x), trunc(x), and other builtin math functions`: value-wise math operations. `sqrt(x)[i] = sqrt(x[i])`
+- `pack(cols, ...)`: pack multiple columns into a single column. 
+
 # Architecture 
 ![Architecture](./docs/arch-hybrid.svg)
 
diff --git a/aquery_config.py b/aquery_config.py
index cdff3b7..2d5939b 100644
--- a/aquery_config.py
+++ b/aquery_config.py
@@ -2,7 +2,7 @@
 
 ## GLOBAL CONFIGURATION FLAGS
 
-version_string = '0.4.9a'
+version_string = '0.5.0a'
 add_path_to_ldpath = True
 rebuild_backend = False
 run_backend = True
diff --git a/build.py b/build.py
index 8cd4b91..d817dc8 100644
--- a/build.py
+++ b/build.py
@@ -16,6 +16,7 @@ class checksums:
     server : Optional[Union[bytes, bool]] = None
     sources : Optional[Union[Dict[str, bytes], bool]] = None
     env : str = ''
+    
     def calc(self, compiler_name, libaquery_a = 'libaquery.a' , 
                 pch_hpp_gch = 'server/pch.hpp.gch', 
                 server = 'server.so'
@@ -24,7 +25,8 @@ class checksums:
         self.env = (aquery_config.os_platform +
                     machine() + 
                     aquery_config.build_driver + 
-                    compiler_name
+                    compiler_name + 
+                    aquery_config.version_string
                 )
         for key in self.__dict__.keys():
             try:
diff --git a/engine/types.py b/engine/types.py
index 5a56e12..3e217a3 100644
--- a/engine/types.py
+++ b/engine/types.py
@@ -295,7 +295,7 @@ opadd = OperatorBase('add', 2, auto_extension, cname = '+', sqlname = '+', call
 # monetdb wont extend int division to fp type
 # opdiv = OperatorBase('div', 2, fp(auto_extension), cname = '/', sqlname = '/', call = binary_op_behavior)
 opdiv = OperatorBase('div', 2, auto_extension, cname = '/', sqlname = '/', call = binary_op_behavior)
-opmul = OperatorBase('mul', 2, fp(auto_extension), cname = '*', sqlname = '*', call = binary_op_behavior)
+opmul = OperatorBase('mul', 2, auto_extension, cname = '*', sqlname = '*', call = binary_op_behavior)
 opsub = OperatorBase('sub', 2, auto_extension, cname = '-', sqlname = '-', call = binary_op_behavior)
 opmod = OperatorBase('mod', 2, auto_extension_int, cname = '%', sqlname = '%', call = binary_op_behavior)
 opneg = OperatorBase('neg', 1, as_is, cname = '-', sqlname = '-', call = unary_op_behavior)
@@ -367,4 +367,7 @@ builtin_operators : Dict[str, OperatorBase] = {**builtin_binary_arith, **builtin
     **builtin_unary_arith, **builtin_unary_logical, **builtin_unary_special, **builtin_func, **builtin_cstdlib, 
     **user_module_func}
 
-type_table = {**builtin_types, **type_table}
\ No newline at end of file
+type_table = {**builtin_types, **type_table}
+
+# Additional Aliases for type names
+type_table['boolean'] = BoolT
diff --git a/reconstruct/ast.py b/reconstruct/ast.py
index b8228c1..66342df 100644
--- a/reconstruct/ast.py
+++ b/reconstruct/ast.py
@@ -262,6 +262,9 @@ class projection(ast_node):
         if self.col_ext or self.group_node and self.group_node.use_sp_gb:
             self.has_postproc = True
         
+        if self.group_node and self.group_node.use_sp_gb :
+            self.group_node.dedicated_glist
+            ...
         o = self.assumptions
         if 'orderby' in node:
             o.extend(enlist(node['orderby']))
@@ -433,7 +436,7 @@ class orderby(ast_node):
                 o_str += ' ' + 'DESC'
             o_list.append(o_str)
         self.add(', '.join(o_list))
-            
+
 
 class scan(ast_node):
     class Position(Enum):
@@ -622,6 +625,7 @@ class groupby(ast_node):
         o_list = []
         self.refs = set()
         self.gb_cols = set()
+        # dedicated_glist -> cols populated for special group by
         self.dedicated_glist : List[Tuple[expr, Set[ColRef]]] = []
         self.use_sp_gb = False
         for g in node:
diff --git a/server/table.h b/server/table.h
index 782397e..da75cc1 100644
--- a/server/table.h
+++ b/server/table.h
@@ -859,7 +859,6 @@ VT<bool> operator >(const T2& lhs, const VT<T1>& rhs) {
 }
 
 
-
 template <class ...Types>
 void print(const TableInfo<Types...>& v, const char* delimiter = " ", const char* endline = "\n") {
 	v.print(delimiter, endline);
diff --git a/tests/q4.a b/tests/q4.a
index d38a246..8e9e626 100644
--- a/tests/q4.a
+++ b/tests/q4.a
@@ -23,4 +23,17 @@ CREATE TABLE ticks2(ID VARCHAR(20), max REAL, min REAL)
 INSERT INTO ticks2 SELECT ID AS ID, max(ratios(endofdayprice)) AS max, min(ratios(endofdayprice)) AS min from ticks  group by ID;
 
 SELECT ID, max, min
-FROM ticks2;
\ No newline at end of file
+FROM ticks2;
+
+CREATE TABLE my_table (c1 INT, c2 INT, c3 STRING)
+INSERT INTO my_table VALUES(10, 20, "example")
+select * from my_table;
+INSERT INTO my_table SELECT * FROM my_table
+select * from my_table;
+SELECT c1, c2 + c2 as twice_c2 FROM my_table;
+
+CREATE TABLE my_table_derived
+AS
+  SELECT c1, c2 + c2 as twice_c2 FROM my_table;
+SELECT * FROM my_table_derived;
+

From 50740fe6acafd60dd26f745efa132239b5726458 Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Fri, 4 Nov 2022 06:10:36 +0800
Subject: [PATCH 08/30] Added var(s), stddev(s). New ITC, stats. bugfix on
 aggregations.

---
 Makefile                    |  23 +++---
 README.md                   |   1 +
 aquery_config.py            |   5 +-
 data/test.csv               |  14 +++-
 datagen.cpp                 |   2 +-
 engine/types.py             |   7 +-
 prompt.py                   |  49 +++++++++---
 reconstruct/ast.py          |  59 +++++++++++----
 reconstruct/expr.py         |   2 +-
 reconstruct/storage.py      |  19 +++++
 server/aggregations.h       | 143 +++++++++++++++++++++++++++++++-----
 server/libaquery.h          |  33 ++++++++-
 server/server.cpp           | 115 ++++++++++++++++++++++++++---
 server/table.h              |  67 ++++++++++++-----
 server/types.h              |  24 ++++--
 server/vector_type.hpp      |   2 +-
 server/winhelper.cpp        |  16 ++++
 server/winhelper.h          |  12 +++
 tests/datagen_jose/Time.cpp |   2 +-
 tests/funcs.a               |   4 +-
 tests/q4.a                  |   4 +-
 21 files changed, 495 insertions(+), 108 deletions(-)

diff --git a/Makefile b/Makefile
index 1707240..4564f5e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 OS_SUPPORT = 
 MonetDB_LIB = 
 MonetDB_INC = 
-Threading = 
+Defines = 
 CXXFLAGS = --std=c++1z
 ifeq ($(AQ_DEBUG), 1)
 	OPTFLAGS = -g3 -fsanitize=address -fsanitize=leak
@@ -80,16 +80,19 @@ endif
 ifeq ($(THREADING),1)
 	LIBAQ_SRC += server/threading.cpp
 	LIBAQ_OBJ += threading.o
-	Threading +=  -DTHREADING
+	Defines +=  -DTHREADING
 endif
 
+ifeq ($(AQUERY_ITC_USE_SHMEM), 1)
+	Defines += -D__AQUERY_ITC_USE_SHMEM__
+endif
 SHAREDFLAGS += $(FPIC)
 
 info:
 	$(info $(OPTFLAGS))
 	$(info $(OS_SUPPORT))
 	$(info $(OS)) 
-	$(info $(Threading))
+	$(info $(Defines))
 	$(info "test")
 	$(info $(LIBTOOL))
 	$(info $(MonetDB_INC))
@@ -97,26 +100,26 @@ info:
 	$(info $(CXX))
 	$(info $(FPIC))
 pch:
-	$(CXX) -x c++-header server/pch.hpp $(FPIC) $(MonetDB_INC) $(OPTFLAGS) $(CXXFLAGS) $(Threading)
+	$(CXX) -x c++-header server/pch.hpp $(FPIC) $(MonetDB_INC) $(OPTFLAGS) $(CXXFLAGS) $(Defines)
 libaquery.a:
-	$(CXX) -c $(FPIC) $(PCHFLAGS) $(LIBAQ_SRC) $(MonetDB_INC) $(MonetDB_LIB) $(OS_SUPPORT) $(Threading) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) &&\
+	$(CXX) -c $(FPIC) $(PCHFLAGS) $(LIBAQ_SRC) $(MonetDB_INC) $(MonetDB_LIB) $(OS_SUPPORT) $(Defines) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) &&\
 	$(LIBTOOL) libaquery.a $(LIBAQ_OBJ) &&\
 	$(RANLIB) libaquery.a
 
 server.bin:
-	$(CXX) $(LIBAQ_SRC) $(LINKFLAGS) $(OS_SUPPORT) $(Threading)  $(MonetDB_INC) $(MonetDB_LIB) $(OPTFLAGS) $(CXXFLAGS) -o server.bin
+	$(CXX) $(LIBAQ_SRC) $(LINKFLAGS) $(OS_SUPPORT) $(Defines)  $(MonetDB_INC) $(MonetDB_LIB) $(OPTFLAGS) $(CXXFLAGS) -o server.bin
 launcher:
-	$(CXX) -D__AQ_BUILD_LAUNCHER__ $(LIBAQ_SRC) $(LINKFLAGS) $(OS_SUPPORT) $(Threading)  $(MonetDB_INC) $(MonetDB_LIB) $(OPTFLAGS) $(CXXFLAGS) -o aq
+	$(CXX) -D__AQ_BUILD_LAUNCHER__ $(LIBAQ_SRC) $(LINKFLAGS) $(OS_SUPPORT) $(Defines)  $(MonetDB_INC) $(MonetDB_LIB) $(OPTFLAGS) $(CXXFLAGS) -o aq
 server.so:
 #	$(CXX) -z muldefs server/server.cpp server/monetdb_conn.cpp -fPIC -shared $(OS_SUPPORT) monetdb/msvc/monetdbe.dll --std=c++1z -O3 -march=native -o server.so -I./monetdb/msvc 
-	$(CXX) $(SHAREDFLAGS) $(PCHFLAGS) $(LIBAQ_SRC) $(OS_SUPPORT) $(Threading) $(MonetDB_INC) $(MonetDB_LIB) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) -o server.so 
+	$(CXX) $(SHAREDFLAGS) $(PCHFLAGS) $(LIBAQ_SRC) $(OS_SUPPORT) $(Defines) $(MonetDB_INC) $(MonetDB_LIB) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) -o server.so 
 server_uselib:
 	$(CXX) $(SHAREDFLAGS) $(USELIB_FLAG),libaquery.a $(MonetDB_LIB) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) -o server.so
 
 snippet:
-	$(CXX) $(SHAREDFLAGS) $(PCHFLAGS) out.cpp $(LIBAQ_SRC) $(MonetDB_INC) $(MonetDB_LIB) $(Threading) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) -o dll.so
+	$(CXX) $(SHAREDFLAGS) $(PCHFLAGS) out.cpp $(LIBAQ_SRC) $(MonetDB_INC) $(MonetDB_LIB) $(Defines) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) -o dll.so
 snippet_uselib:
-	$(CXX) $(SHAREDFLAGS) $(PCHFLAGS) out.cpp libaquery.a $(MonetDB_INC) $(Threading) $(MonetDB_LIB) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) -o dll.so
+	$(CXX) $(SHAREDFLAGS) $(PCHFLAGS) out.cpp libaquery.a $(MonetDB_INC) $(Defines) $(MonetDB_LIB) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) -o dll.so
 
 docker:
 	docker build -t aquery .
diff --git a/README.md b/README.md
index fc06b4f..1de828d 100644
--- a/README.md
+++ b/README.md
@@ -225,6 +225,7 @@ DROP TABLE my_table IF EXISTS
 
 ## Built-in functions: 
 - `avg[s]`: average of a column. `avgs(col), avgs(w, col)` is rolling and moving average with window `w` of the column `col`.
+- `var[s]`, `stddev[s]`: [moving/rolling] **population** variance, standard deviation.
 - `sum[s]`, `max[s]`, `min[s]`: similar to `avg[s]`
 - `ratios(w = 1, col)`: moving ratio of a column, e.g. `ratios(w, col)[i]=col[i-w]/col[i]`. Window `w` has default value of 1.  
 - `next(col), prev(col)`: moving column back and forth by 1, e.g. `next(col)[i] = col[i+1]`.
diff --git a/aquery_config.py b/aquery_config.py
index 2d5939b..e3600f9 100644
--- a/aquery_config.py
+++ b/aquery_config.py
@@ -2,7 +2,7 @@
 
 ## GLOBAL CONFIGURATION FLAGS
 
-version_string = '0.5.0a'
+version_string = '0.5.1a'
 add_path_to_ldpath = True
 rebuild_backend = False
 run_backend = True
@@ -21,7 +21,8 @@ def init_config():
     import os
     from engine.utils import add_dll_dir
     # os.environ['CXX'] = 'C:/Program Files/LLVM/bin/clang.exe'
-    # os.environ['THREADING'] = '1'
+    os.environ['THREADING'] = '1'
+    os.environ['AQUERY_ITC_USE_SHMEM'] = '1'
 
     if  ('__config_initialized__' not in globals() or 
             not __config_initialized__):
diff --git a/data/test.csv b/data/test.csv
index 5eb9e8f..b4fe244 100644
--- a/data/test.csv
+++ b/data/test.csv
@@ -1,11 +1,21 @@
 a, b, c, d
 1,1,2,2
+2,1,2,2
+2,4,3,4
 1,2,2,2
 1,2,3,4
 4,2,1,4
-2,1,3,4
+2,1,3,3
+2,1,1,2
 1,2,3,4
+3,2,4,2
 1,2,3,3
 3,2,1,2
-2,1,2,2
+2,1,4,2
+3,3,4,4
+2,2,3,1
+2,3,4,4
+2,4,1,2
+3,4,1,2
+2,3,2,2
 1,2,3,1
diff --git a/datagen.cpp b/datagen.cpp
index 88f5a48..c96b480 100644
--- a/datagen.cpp
+++ b/datagen.cpp
@@ -151,5 +151,5 @@ int gen_stock_data(int argc, char* argv[]){
 }
 
 int main(int argc, char* argv[]){
-    gen_stock_data(argc, argv);
+    return gen_stock_data(argc, argv);
 }
diff --git a/engine/types.py b/engine/types.py
index 3e217a3..5baf47f 100644
--- a/engine/types.py
+++ b/engine/types.py
@@ -324,10 +324,14 @@ fnfirst = OperatorBase('first', 1, as_is, cname = 'frist', sqlname = 'FRIST', ca
 #fnavg = OperatorBase('avg', 1, fp(ext(auto_extension)), cname = 'avg', sqlname = 'AVG', call = fn_behavior)
 fnsum = OperatorBase('sum', 1, long_return, cname = 'sum', sqlname = 'SUM', call = fn_behavior)
 fnavg = OperatorBase('avg', 1, lfp_return, cname = 'avg', sqlname = 'AVG', call = fn_behavior)
+fnvar = OperatorBase('var', 1, lfp_return, cname = 'var', sqlname = 'VAR_POP', call = fn_behavior)
+fnstd = OperatorBase('stddev', 1, lfp_return, cname = 'stddev', sqlname = 'STDDEV_POP', call = fn_behavior)
 fnmaxs = OperatorBase('maxs', [1, 2], ty_clamp(as_is, -1), cname = 'maxs', sqlname = 'MAXS', call = windowed_fn_behavor)
 fnmins = OperatorBase('mins', [1, 2], ty_clamp(as_is, -1), cname = 'mins', sqlname = 'MINS', call = windowed_fn_behavor)
 fnsums = OperatorBase('sums', [1, 2], ext(ty_clamp(auto_extension, -1)), cname = 'sums', sqlname = 'SUMS', call = windowed_fn_behavor)
 fnavgs = OperatorBase('avgs', [1, 2], fp(ext(ty_clamp(auto_extension, -1))), cname = 'avgs', sqlname = 'AVGS', call = windowed_fn_behavor)
+fnvars = OperatorBase('vars', [1, 2], fp(ext(ty_clamp(auto_extension, -1))), cname = 'vars', sqlname = 'VARS', call = windowed_fn_behavor)
+fnstds = OperatorBase('stddevs', [1, 2], fp(ext(ty_clamp(auto_extension, -1))), cname = 'stddevs', sqlname = 'STDDEVS', call = windowed_fn_behavor)
 fncnt = OperatorBase('count', 1, int_return, cname = 'count', sqlname = 'COUNT', call = count_behavior)
 fnpack = OperatorBase('pack', -1, pack_return, cname = 'pack', sqlname = 'PACK', call = pack_behavior)
 # special
@@ -361,7 +365,8 @@ builtin_cstdlib = _op_make_dict(fnsqrt, fnlog, fnsin, fncos, fntan, fnpow)
 builtin_func = _op_make_dict(fnmax, fnmin, fnsum, fnavg, fnmaxs, 
                              fnmins, fndeltas, fnratios, fnlast,
                              fnfirst, fnsums, fnavgs, fncnt, 
-                             fnpack, fntrunc, fnprev, fnnext)
+                             fnpack, fntrunc, fnprev, fnnext, 
+                             fnvar, fnvars, fnstd, fnstds)
 user_module_func = {}
 builtin_operators : Dict[str, OperatorBase] = {**builtin_binary_arith, **builtin_binary_logical, 
     **builtin_unary_arith, **builtin_unary_logical, **builtin_unary_special, **builtin_func, **builtin_cstdlib, 
diff --git a/prompt.py b/prompt.py
index cc6cb15..f257b49 100644
--- a/prompt.py
+++ b/prompt.py
@@ -160,9 +160,11 @@ class QueryStats:
 class Config:
     __all_attrs__ = ['running', 'new_query', 'server_mode', 
                      'backend_type', 'has_dll', 
-                     'postproc_time', 'sql_time', 
-                     'n_buffers'
+                     'n_buffers',
                      ]
+    __i64_attrs__ = [
+                     'monetdb_time', 'postproc_time'
+                    ]
     __init_attributes__ = False
     
     @staticmethod
@@ -171,26 +173,42 @@ class Config:
             from functools import partial
             for _i, attr in enumerate(Config.__all_attrs__):
                 if not hasattr(Config, attr):
-                    setattr(Config, attr, property(partial(Config.getter, i = _i), partial(Config.setter, i = _i)))
+                    setattr(Config, attr, property(
+                        partial(Config.getter, i = _i), partial(Config.setter, i = _i)
+                    ))
+            for _i, attr in enumerate(Config.__i64_attrs__):
+                if not hasattr(Config, attr):
+                    setattr(Config, attr, property(
+                        partial(Config.i64_getter, i = _i), partial(Config.i64_setter, i = _i)
+                    ))
             Config.__init_attributes__ = True
             
     def __init__(self, mode, nq = 0, n_bufs = 0, bf_szs = []) -> None:
         Config.__init_self__()
-        self.int_size = 4
         self.n_attrib = len(Config.__all_attrs__)
-        self.buf = bytearray((self.n_attrib + n_bufs) * self.int_size)
-        self.np_buf = np.ndarray(shape=(self.n_attrib), buffer=self.buf, dtype=np.int32)
+        self.buf = bytearray((self.n_attrib + n_bufs) * 4 +
+                              len(self.__i64_attrs__) * 8
+                             )
+        self.np_buf = np.ndarray(shape = (self.n_attrib), buffer = self.buf, dtype = np.int32)
+        self.np_i64buf = np.ndarray(shape = len(self.__i64_attrs__), buffer = self.buf, 
+                                    dtype = np.int64, offset = 4 * len(self.__all_attrs__))
         self.new_query = nq
         self.server_mode = mode.value 
         self.running = 1
         self.backend_type = Backend_Type.BACKEND_AQuery.value
         self.has_dll = 0
         self.n_buffers = n_bufs
+        self.monetdb_time = 0
+        self.postproc_time = 0
         
     def getter (self, *, i):
         return self.np_buf[i]
     def setter(self, v, *, i):
         self.np_buf[i] = v
+    def i64_getter (self, *, i):
+        return self.np_i64buf[i]
+    def i64_setter(self, v, *, i):
+        self.np_i64buf[i] = v
 
     def set_bufszs(self, buf_szs):
         for i in range(min(len(buf_szs), self.n_buffers)):
@@ -209,6 +227,8 @@ class PromptState():
     test_parser = True
     server_mode: RunType = RunType.Threaded
     server_bin = 'server.bin' if server_mode == RunType.IPC else 'server.so'
+    wait_engine = lambda: None
+    wake_engine = lambda: None
     set_ready = lambda: None
     get_ready = lambda: None
     server_status = lambda: False
@@ -299,12 +319,14 @@ def init_threaded(state : PromptState):
     if aquery_config.run_backend:    
         server_so = ctypes.CDLL('./'+state.server_bin)
         state.send = server_so['receive_args']
+        state.wait_engine = server_so['wait_engine']
+        state.wake_engine = server_so['wake_engine']
         aquery_config.have_hge = server_so['have_hge']()
         if aquery_config.have_hge != 0:
             from engine.types import get_int128_support
             get_int128_support()
         state.th = threading.Thread(target=server_so['main'], args=(-1, ctypes.POINTER(ctypes.c_char_p)(state.cfg.c)), daemon=True)
-        state.th.start()
+        state.th.start() 
 
 def init_prompt() -> PromptState:
     aquery_config.init_config()
@@ -337,6 +359,8 @@ def init_prompt() -> PromptState:
         rm = lambda: None
         def __set_ready():
             state.cfg.new_query = 1
+            state.wake_engine()
+            
         state.set_ready = __set_ready
         state.get_ready = lambda: aquery_config.run_backend and state.cfg.new_query
         if aquery_config.run_backend:
@@ -380,9 +404,16 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
     while running():
         try:
             if state.server_status():
-                state.init()
+                state.init(state)
+            # *** busy waiting ***
+            # while state.get_ready():
+            #     time.sleep(.00001)
             while state.get_ready():
-                time.sleep(.00001)
+                state.wait_engine()
+                if state.need_print:
+                    print(f'MonetDB Time: {state.cfg.monetdb_time/10**9}, '
+                          f'PostProc Time: {state.cfg.postproc_time/10**9}')
+                    state.cfg.monetdb_time = state.cfg.postproc_time = 0
             state.currstats.print(state.stats, need_print=state.need_print)
             try:
                 og_q : str = next()
diff --git a/reconstruct/ast.py b/reconstruct/ast.py
index 66342df..c95223d 100644
--- a/reconstruct/ast.py
+++ b/reconstruct/ast.py
@@ -54,19 +54,28 @@ class ast_node:
             self.context.sql_end()
         
 from reconstruct.expr import expr, fastscan
-
-
+class SubqType(Enum):
+    WITH = auto()
+    FROM = auto()
+    PROJECTION = auto()
+    FILTER = auto()
+    GROUPBY = auto()
+    ORDERBY = auto()
+    NONE = auto()
 class projection(ast_node):
     name = 'projection'
     first_order = 'select'
-    
+
+        
     def __init__(self, 
                  parent : Optional["ast_node"],
                  node, 
                  context : Optional[Context] = None,
-                 force_use_spgb : bool = False
+                 force_use_spgb : bool = False,
+                 subq_type: SubqType = SubqType.NONE
                 ):
         self.force_use_spgb = force_use_spgb
+        self.subq_type = subq_type
         super().__init__(parent, node, context)
         
     def init(self, _):
@@ -83,9 +92,21 @@ class projection(ast_node):
             p = node['select_distinct']
             self.distinct = True
         if 'with' in node:
-            self.with_clause = projection(self, node['value'])
+            with_table = node['with']['name']
+            with_table_name = tuple(with_table.keys())[0]
+            with_table_cols = tuple(with_table.values())[0]
+            self.with_clause = projection(self, node['with']['value'], subq_type=SubqType.WITH)
+            self.with_clause.out_table.add_alias(with_table_name)
+            for new_name, col in zip(with_table_cols, self.with_clause.out_table.columns):
+                col.rename(new_name)
+            self.with_clause.out_table.contextname_cpp 
+            # in monetdb, in cxt 
         else:
             self.with_clause = None
+        
+        self.limit = None
+        if 'limit' in node:
+            self.limit = node['limit']
             
         self.projections = p if type(p) is list else [p]
         if self.parent is None:
@@ -115,8 +136,9 @@ class projection(ast_node):
         if type(self.datasource) is join:
             self.datasource.process_join_conditions()
         
+        self.context.special_gb = self.force_use_spgb
         if 'groupby' in node: # if groupby clause contains special stuff
-            self.context.special_gb = groupby.check_special(self, node['groupby'])
+            self.context.special_gb |= groupby.check_special(self, node['groupby'])
 
     def consume(self, node):
         # deal with projections
@@ -230,7 +252,7 @@ class projection(ast_node):
             self.group_node = groupby(self, node['groupby'])
             if self.group_node.terminate:
                 self.context.abandon_query()
-                projection(self.parent, node, self.context, True)
+                projection(self.parent, node, self.context, True, subq_type=self.subq_type)
                 return
             if self.group_node.use_sp_gb:
                 self.has_postproc = True
@@ -370,8 +392,12 @@ class projection(ast_node):
                     # for funcs evaluate f_i(x, ...)
                     self.context.emitc(f'{self.out_table.contextname_cpp}->get_col<{key}>().initfrom({val[1]}, "{cols[i].name}");')
         # print out col_is
-        if 'into' not in node:
-            self.context.emitc(f'print(*{self.out_table.contextname_cpp});')
+        
+        if 'into' not in node and self.subq_type == SubqType.NONE:
+            if self.limit is None:
+                self.context.emitc(f'print(*{self.out_table.contextname_cpp});')
+            else:
+                self.context.emitc(f'{self.out_table.contextname_cpp}->printall(" ","\\n", nullptr, nullptr, {self.limit});')
         
         if self.outfile:
             self.outfile.finalize()
@@ -627,7 +653,7 @@ class groupby(ast_node):
         self.gb_cols = set()
         # dedicated_glist -> cols populated for special group by
         self.dedicated_glist : List[Tuple[expr, Set[ColRef]]] = []
-        self.use_sp_gb = False
+        self.use_sp_gb = self.parent.force_use_spgb
         for g in node:
             self.datasource.rec = set()
             g_expr = expr(self, g['value'])
@@ -654,12 +680,13 @@ class groupby(ast_node):
             if (projs[2].is_compound and 
                 not ((projs[2].is_ColExpr and projs[2].raw_col in self.gb_cols) or
                 projs[2].sql in self.gb_cols)
-                ):
-                if self.parent.force_use_spgb:
+                ) and (not self.parent.force_use_spgb):
                     self.use_sp_gb = True
-                else:    
-                    self.terminate = True
-                    return
+                    break
+                
+        if self.use_sp_gb and not self.parent.force_use_spgb:
+            self.terminate = True
+            return
         if not self.use_sp_gb:
             self.dedicated_gb = None
             self.add(', '.join(o_list))
@@ -1144,7 +1171,7 @@ class load(ast_node):
             self.context.emitc(f'{c.cxt_name}.emplace_back({col_tmp_names[i]});')
             
         self.context.emitc('}')
-        self.context.emitc(f'print(*{self.out_table});')
+        # self.context.emitc(f'print(*{self.out_table});')
         self.context.emitc(f'{self.out_table}->monetdb_append_table(cxt->alt_server, "{table.table_name}");')
         
         self.context.postproc_end(self.postproc_fname)
diff --git a/reconstruct/expr.py b/reconstruct/expr.py
index bfd552c..e22a4ed 100644
--- a/reconstruct/expr.py
+++ b/reconstruct/expr.py
@@ -168,7 +168,7 @@ class expr(ast_node):
                             
                         special_func = [*self.context.udf_map.keys(), *self.context.module_map.keys(), 
                                         "maxs", "mins", "avgs", "sums", "deltas", "last", "first", 
-                                        "ratios", "pack", "truncate"]
+                                        "stddevs", "vars", "ratios", "pack", "truncate"]
                         
                         if (
                                 self.context.special_gb 
diff --git a/reconstruct/storage.py b/reconstruct/storage.py
index 47eab9a..98ad799 100644
--- a/reconstruct/storage.py
+++ b/reconstruct/storage.py
@@ -45,6 +45,14 @@ class ColRef:
             alias = table_name
         return f'{alias}.{self.get_name()}'
     
+    def rename(self, name):
+        self.alias.discard(self.name)
+        self.table.columns_byname.pop(self.name, None)
+        self.name = name
+        self.table.columns_byname[name] = self
+        
+        return self
+    
     def __getitem__(self, key):
         if type(key) is str:
             return getattr(self, key)
@@ -97,6 +105,17 @@ class TableInfo:
             return
         self.cxt.tables_byname[alias] = self
         self.alias.add(alias)
+    
+    def rename(self, name):
+        if name in self.cxt.tables_byname.keys():
+            print(f"Error: table name {name} already exists")
+            return
+        
+        self.cxt.tables_byname.pop(self.table_name, None)
+        self.alias.discard(self.table_name)
+        self.table_name = name
+        self.cxt.tables_byname[name] = self
+        self.alias.add(name)
         
     def parse_col_names(self, colExpr) -> ColRef:
         parsedColExpr = colExpr.split('.')
diff --git a/server/aggregations.h b/server/aggregations.h
index 5338e23..cb4bcbe 100644
--- a/server/aggregations.h
+++ b/server/aggregations.h
@@ -202,6 +202,102 @@ decayed_t<VT, types::GetFPType<types::GetLongType<T>>> avgw(uint32_t w, const VT
 	return ret;
 }
 
+template<class T, template<typename ...> class VT, bool sd = false>
+decayed_t<VT, types::GetFPType<types::GetLongType<T>>> varw(uint32_t w, const VT<T>& arr) {
+	using FPType = types::GetFPType<types::GetLongType<T>>;
+	const uint32_t& len = arr.size;
+	decayed_t<VT, FPType> ret(len);
+	uint32_t i = 0;
+	types::GetLongType<T> s{};
+	w = w > len ? len : w;
+	FPType EnX {},  MnX{};
+	if (len) {
+		s = arr[0];
+		MnX = 0;
+		EnX = arr[0];
+		ret[i++] = 0;
+	}
+	for (; i < len; ++i){
+		s += arr[i];
+		FPType _EnX = s / (FPType)(i + 1);
+		MnX += (arr[i] - EnX) * (arr[i] - _EnX);
+		EnX = _EnX;
+		ret[i] = MnX / (FPType)(i + 1);
+		if constexpr(sd) ret[i-1] = sqrt(ret[i-1]);
+	}
+	const float rw = 1.f / (float)w;
+	s *= rw;	
+	for (; i < len; ++i){
+		const auto dw = arr[i] - arr[i - w - 1];
+		const auto sw = arr[i] + arr[i - w - 1];
+		const auto dex = dw * rw;
+		ret[i] = ret[i-1] - dex*(s + s + dex - sw);
+		if constexpr(sd) ret[i-1] = sqrt(ret[i-1]);
+		s += dex;
+	}
+	if constexpr(sd) 
+		if(i)
+			ret[i-1] = sqrt(ret[i-1]);
+	
+	return ret;
+}
+
+template<class T, template<typename ...> class VT>
+types::GetFPType<types::GetLongType<decays<T>>> var(const VT<T>& arr) {
+	typedef types::GetFPType<types::GetLongType<decays<T>>> FPType;
+	const uint32_t& len = arr.size;
+	uint32_t i = 0;
+	types::GetLongType<T> s{0};
+	types::GetLongType<T> ssq{0};
+	if (len) {
+		s = arr[0];
+		ssq = arr[0] * arr[0];
+	}
+	for (; i < len; ++i){
+		s += arr[i];
+		ssq += arr[i] * arr[i];
+	}
+	return (ssq - s * s / (FPType)(len + 1)) / (FPType)(len + 1);
+}
+
+template<class T, template<typename ...> class VT, bool sd = false>
+decayed_t<VT, types::GetFPType<types::GetLongType<T>>> vars(const VT<T>& arr) {
+	typedef types::GetFPType<types::GetLongType<T>> FPType;
+	const uint32_t& len = arr.size;
+	decayed_t<VT, FPType> ret(len);
+	uint32_t i = 0;
+	types::GetLongType<T> s{};
+	FPType MnX{};
+	FPType EnX {};
+	if (len) {
+		s = arr[0];
+		MnX = 0;
+		EnX = arr[0];
+		ret[i++] = 0;
+	}
+	for (; i < len; ++i){
+		s += arr[i];
+		FPType _EnX = s / (FPType)(i + 1);
+		MnX += (arr[i] - EnX) * (arr[i] - _EnX);
+		printf("%d %ld ", arr[i], MnX);
+		EnX = _EnX;
+		ret[i] = MnX / (FPType)(i + 1);
+		if constexpr(sd) ret[i] = sqrt(ret[i]);
+	}
+	return ret;
+}
+template<class T, template<typename ...> class VT>
+types::GetFPType<types::GetLongType<decays<T>>> stddev(const VT<T>& arr) {
+	return sqrt(var(arr));
+}
+template<class T, template<typename ...> class VT>
+decayed_t<VT, types::GetFPType<types::GetLongType<T>>> stddevs(const VT<T>& arr) {
+	return vars<T, VT, true>(arr);
+}
+template<class T, template<typename ...> class VT>
+decayed_t<VT, types::GetFPType<types::GetLongType<T>>> stddevw(uint32_t w, const VT<T>& arr) {
+	return varw<T, VT, true>(w, arr);
+}
 // use getSignedType
 template<class T, template<typename ...> class VT>
 decayed_t<VT, T> deltas(const VT<T>& arr) {
@@ -251,26 +347,33 @@ T first(const VT<T>& arr) {
 }
 
 
+
 #define __DEFAULT_AGGREGATE_FUNCTION__(NAME, RET) \
-template <class T> constexpr inline T NAME(const T& v) { return RET; }
+template <class T> constexpr T NAME(const T& v) { return RET; }
 
 // non-aggreation count. E.g. SELECT COUNT(col) from table; 
-template <class T> constexpr inline T count(const T& v) { return 1; }
-template <class T> constexpr inline T max(const T& v) { return v; }
-template <class T> constexpr inline T min(const T& v) { return v; }
-template <class T> constexpr inline T avg(const T& v) { return v; }
-template <class T> constexpr inline T sum(const T& v) { return v; }
-template <class T> constexpr inline T maxw(uint32_t, const T& v) { return v; }
-template <class T> constexpr inline T minw(uint32_t, const T& v) { return v; }
-template <class T> constexpr inline T avgw(uint32_t, const T& v) { return v; }
-template <class T> constexpr inline T sumw(uint32_t, const T& v) { return v; }
-template <class T> constexpr inline T ratiow(uint32_t, const T& v) { return 1; }
-template <class T> constexpr inline T maxs(const T& v) { return v; }
-template <class T> constexpr inline T mins(const T& v) { return v; }
-template <class T> constexpr inline T avgs(const T& v) { return v; }
-template <class T> constexpr inline T sums(const T& v) { return v; }
-template <class T> constexpr inline T last(const T& v) { return v; }
-template <class T> constexpr inline T prev(const T& v) { return v; }
-template <class T> constexpr inline T aggnext(const T& v) { return v; }
-template <class T> constexpr inline T daltas(const T& v) { return 0; }
-template <class T> constexpr inline T ratios(const T& v) { return 1; }
+template <class T> constexpr T count(const T&) { return 1; }
+template <class T> constexpr T var(const T&) { return 0; }
+template <class T> constexpr T vars(const T&) { return 0; }
+template <class T> constexpr T varw(uint32_t, const T&) { return 0; }
+template <class T> constexpr T stddev(const T&) { return 0; }
+template <class T> constexpr T stddevs(const T&) { return 0; }
+template <class T> constexpr T stddevw(uint32_t, const T&) { return 0; }
+template <class T> constexpr T max(const T& v) { return v; }
+template <class T> constexpr T min(const T& v) { return v; }
+template <class T> constexpr T avg(const T& v) { return v; }
+template <class T> constexpr T sum(const T& v) { return v; }
+template <class T> constexpr T maxw(uint32_t, const T& v) { return v; }
+template <class T> constexpr T minw(uint32_t, const T& v) { return v; }
+template <class T> constexpr T avgw(uint32_t, const T& v) { return v; }
+template <class T> constexpr T sumw(uint32_t, const T& v) { return v; }
+template <class T> constexpr T ratiow(uint32_t, const T&) { return 1; }
+template <class T> constexpr T maxs(const T& v) { return v; }
+template <class T> constexpr T mins(const T& v) { return v; }
+template <class T> constexpr T avgs(const T& v) { return v; }
+template <class T> constexpr T sums(const T& v) { return v; }
+template <class T> constexpr T last(const T& v) { return v; }
+template <class T> constexpr T prev(const T& v) { return v; }
+template <class T> constexpr T aggnext(const T& v) { return v; }
+template <class T> constexpr T daltas(const T&) { return 0; }
+template <class T> constexpr T ratios(const T&) { return 1; }
diff --git a/server/libaquery.h b/server/libaquery.h
index 551d205..6227af9 100644
--- a/server/libaquery.h
+++ b/server/libaquery.h
@@ -3,6 +3,7 @@
 
 #include "table.h"
 #include <unordered_map>
+#include <chrono>
 
 enum Log_level {
 	LOG_INFO,
@@ -15,9 +16,16 @@ enum Backend_Type {
 	BACKEND_MonetDB,
 	BACKEND_MariaDB
 };
+
+struct QueryStats{
+	long long monet_time;
+	long long postproc_time;
+};
 struct Config{
-    int running, new_query, server_mode,
-	 	backend_type, has_dll, exec_time, n_buffers;
+    int running, new_query, server_mode, 
+	 	backend_type, has_dll, 
+		n_buffers;
+	QueryStats stats;
     int buffer_sizes[];
 };
 
@@ -67,6 +75,27 @@ struct Context{
     std::unordered_map<const char*, uColRef *> cols;
 };
 
+class aq_timer {
+private:
+	std::chrono::high_resolution_clock::time_point now;
+public:
+	aq_timer(){
+		now = std::chrono::high_resolution_clock::now();
+	}
+	void reset(){
+		now = std::chrono::high_resolution_clock::now();
+	}
+	long long elapsed(){
+		long long ret = (std::chrono::high_resolution_clock::now() - now).count();
+		reset();
+		return ret;
+	}
+	long long lap() const{
+		long long ret = (std::chrono::high_resolution_clock::now() - now).count();
+		return ret;
+	}
+};
+
 #ifdef _WIN32
 #define __DLLEXPORT__  __declspec(dllexport) __stdcall 
 #else 
diff --git a/server/server.cpp b/server/server.cpp
index 2105545..6a9bc43 100644
--- a/server/server.cpp
+++ b/server/server.cpp
@@ -18,6 +18,7 @@
 #include <sys/mman.h>
 struct SharedMemory
 {
+    std::atomic<bool> a;
     int hFileMap;
     void* pData;
     SharedMemory(const char* fname) {
@@ -31,6 +32,79 @@ struct SharedMemory
 
     }
 };
+#ifndef __USE_STD_SEMAPHORE__
+#ifdef __APPLE__
+#include <dispatch/dispatch.h>
+class A_Semaphore {
+private:
+	dispatch_semaphore_t native_handle;
+public:
+	A_Semaphore(bool v = false) {
+		native_handle = dispatch_semaphore_create(v);
+	}
+	void acquire() {
+        puts("acquire");
+		dispatch_semaphore_wait(native_handle, DISPATCH_TIME_FOREVER);
+	}
+	void release() {
+        puts("release");
+		dispatch_semaphore_signal(native_handle);
+	}
+	~A_Semaphore() {
+	}
+};
+#else
+#include <semaphore.h>
+class A_Semaphore {
+private:
+	sem_t native_handle;
+public:
+	A_Semaphore(bool v = false) {
+		sem_init(&native_handle, v, 1);
+	}
+	void acquire() {
+		sem_wait(&native_handle);
+	}
+	void release() {
+		sem_post(&native_handle);
+	}
+	~A_Semaphore() {
+		sem_destroy(&native_handle);
+	}
+};
+#endif
+#endif
+
+#endif
+#ifdef __USE_STD_SEMAPHORE__
+#include <semaphore>
+class A_Semaphore {
+private:
+    std::binary_semaphore native_handle;
+public:
+    A_Semaphore(bool v = false) {
+        native_handle = std::binary_semaphore(v);
+    }
+    void acquire() {
+        native_handle.acquire();
+    }
+    void release() {
+        native_handle.release();
+    }
+    ~A_Semaphore() { }
+};
+#endif
+#ifdef __AQUERY_ITC_USE_SHMEM__
+A_Semaphore prompt{ true }, engine{ false };
+#define PROMPT_ACQUIRE() prompt.acquire()
+#define PROMPT_RELEASE() prompt.release()
+#define ENGINE_ACQUIRE() engine.acquire()
+#define ENGINE_RELEASE() engine.release()
+#else
+#define PROMPT_ACQUIRE() 
+#define PROMPT_RELEASE() std::this_thread::sleep_for(std::chrono::nanoseconds(0))
+#define ENGINE_ACQUIRE() 
+#define ENGINE_RELEASE() 
 #endif
 
 #include "aggregations.h"
@@ -42,6 +116,13 @@ int test_main();
 int n_recv = 0;
 char** n_recvd = nullptr;
 
+__AQEXPORT__(void) wait_engine(){
+    PROMPT_ACQUIRE();
+}
+__AQEXPORT__(void) wake_engine(){
+    ENGINE_RELEASE();
+}
+
 extern "C" void __DLLEXPORT__ receive_args(int argc, char**argv){
     n_recv = argc;
     n_recvd = argv;
@@ -119,15 +200,16 @@ void initialize_module(const char* module_name, void* module_handle, Context* cx
 }
 
 int dll_main(int argc, char** argv, Context* cxt){
+    aq_timer timer;
     Config *cfg = reinterpret_cast<Config *>(argv[0]);
     std::unordered_map<std::string, void*> user_module_map;
-    if (cxt->module_function_maps == 0)
+    if (cxt->module_function_maps == nullptr)
         cxt->module_function_maps = new std::unordered_map<std::string, void*>();
     auto module_fn_map = 
         static_cast<std::unordered_map<std::string, void*>*>(cxt->module_function_maps);
     
     auto buf_szs = cfg->buffer_sizes;
-    void** buffers = (void**)malloc(sizeof(void*) * cfg->n_buffers);
+    void** buffers = (void**) malloc (sizeof(void*) * cfg->n_buffers);
     for (int i = 0; i < cfg->n_buffers; i++) 
         buffers[i] = static_cast<void *>(argv[i + 1]);
 
@@ -136,18 +218,22 @@ int dll_main(int argc, char** argv, Context* cxt){
     cxt->n_buffers = cfg->n_buffers;
     cxt->sz_bufs = buf_szs;
     cxt->alt_server = NULL;
-
+    
     while(cfg->running){
+        ENGINE_ACQUIRE();
         if (cfg->new_query) {
-            void *handle = 0;
-            void *user_module_handle = 0;
+            cfg->stats.postproc_time = 0;
+            cfg->stats.monet_time = 0;
+
+            void *handle = nullptr;
+            void *user_module_handle = nullptr;
             if (cfg->backend_type == BACKEND_MonetDB){
-                if (cxt->alt_server == 0)
+                if (cxt->alt_server == nullptr)
                     cxt->alt_server = new Server(cxt);
                 Server* server = reinterpret_cast<Server*>(cxt->alt_server);
                 if(n_recv > 0){
                     if (cfg->backend_type == BACKEND_AQuery || cfg->has_dll) {
-                        handle = dlopen("./dll.so", RTLD_LAZY);
+                        handle = dlopen("./dll.so", RTLD_NOW);
                     }
                     for (const auto& module : user_module_map){
                         initialize_module(module.first.c_str(), module.second, cxt);
@@ -159,14 +245,18 @@ int dll_main(int argc, char** argv, Context* cxt){
                         switch(n_recvd[i][0]){
                         case 'Q': // SQL query for monetdbe
                             {
+                                timer.reset();
                                 server->exec(n_recvd[i] + 1);
+                                cfg->stats.monet_time += timer.elapsed();
                                 printf("Exec Q%d: %s", i, n_recvd[i]);
                             }
                             break;
                         case 'P': // Postprocessing procedure 
                             if(handle && !server->haserror()) {
                                 code_snippet c = reinterpret_cast<code_snippet>(dlsym(handle, n_recvd[i]+1));
+                                timer.reset();
                                 c(cxt);
+                                cfg->stats.postproc_time += timer.elapsed();
                             }
                             break;
                         case 'M': // Load Module
@@ -198,7 +288,7 @@ int dll_main(int argc, char** argv, Context* cxt){
                                 auto mname = n_recvd[i] + 1;
                                 auto it = user_module_map.find(mname);
                                 if (user_module_handle == it->second)
-                                    user_module_handle = 0;
+                                    user_module_handle = nullptr;
                                 dlclose(it->second);
                                 user_module_map.erase(it);
                             }
@@ -207,8 +297,9 @@ int dll_main(int argc, char** argv, Context* cxt){
                     }
                     if(handle) {
                         dlclose(handle);
-                        handle = 0;
+                        handle = nullptr;
                     }
+                    printf("%ld, %ld", cfg->stats.monet_time, cfg->stats.postproc_time);
                     cxt->end_session();
                     n_recv = 0;
                 }
@@ -230,9 +321,11 @@ int dll_main(int argc, char** argv, Context* cxt){
             if (handle) dlclose(handle);
             cfg->new_query = 0;
         }
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+        //puts(cfg->running? "true": "false");
+        asm("");
+        PROMPT_RELEASE();
     }
-
+    
     return 0;
 }
 
diff --git a/server/table.h b/server/table.h
index da75cc1..20e2449 100644
--- a/server/table.h
+++ b/server/table.h
@@ -9,6 +9,7 @@
 #include <string>
 #include <algorithm>
 #include <cstdarg>
+#include <vector>
 #include "io.h"
 #include "hasher.h"
 
@@ -139,8 +140,16 @@ public:
 	ColView<_Ty> operator [](const vector_type<uint32_t>& idxs) const {
 		return ColView<_Ty>(*this, idxs);
 	}
-
-	void out(uint32_t n = 4, const char* sep = " ") const {
+	vector_type<_Ty> operator [](const std::vector<bool>& idxs) const {
+		vector_type<_Ty> ret (this->size);
+		uint32_t i = 0;
+		for(const auto& f : idxs){
+			if(f) ret.emplace_back(this->operator[](i));
+			++i;
+		}
+		return ret;
+	}
+	void out(uint32_t n = 1000, const char* sep = " ") const {
 		const char* more = "";
 		if (n < this->size)
 			more = " ... ";
@@ -243,7 +252,7 @@ public:
 	Iterator_t end() const {
 		return Iterator_t(idxs.end(), orig);
 	}
-	void out(uint32_t n = 4, const char* sep = " ") const {
+	void out(uint32_t n = 1000, const char* sep = " ") const {
 		n = n > size ? size : n;
 		std::cout << '(';
 		for (uint32_t i = 0; i < n; ++i)
@@ -438,19 +447,27 @@ struct TableInfo {
 	}
 	template <int ...cols>
 	void print2(const char* __restrict sep = ",", const char* __restrict end = "\n",
-		const vector_type<uint32_t>* __restrict view = nullptr, FILE* __restrict fp = nullptr) const {
+		const vector_type<uint32_t>* __restrict view = nullptr, 
+		FILE* __restrict fp = nullptr, uint32_t limit = std::numeric_limits<uint32_t>::max()
+		) const {
 
 		std::string printf_string =
 			generate_printf_string<typename std::tuple_element<cols, tuple_type>::type ...>(sep, end);
+		// puts(printf_string.c_str());
 		std::string header_string = std::string();
 		constexpr static int a_cols[] = { cols... };
-		for (int i = 0; i < sizeof...(cols); ++i)
-			header_string += std::string(this->colrefs[a_cols[i]].name) + sep;
-		const size_t l_sep = strlen(sep);
-		if (header_string.size() - l_sep >= 0)
-			header_string.resize(header_string.size() - l_sep);
-
-		const auto& prt_loop = [&fp, &view, &printf_string, *this](const auto& f) {
+		if (fp == nullptr){
+			header_string = get_header_string(sep, end);
+			header_string.resize(header_string.size() - strlen(end));
+		}
+		else {
+			for (int i = 0; i < sizeof...(cols); ++i)
+				header_string += std::string(this->colrefs[a_cols[i]].name) + sep;
+			const size_t l_sep = strlen(sep);
+			if (header_string.size() - l_sep >= 0)
+				header_string.resize(header_string.size() - l_sep);
+		}
+		const auto& prt_loop = [&fp, &view, &printf_string, *this, &limit](const auto& f) {
 #ifdef __AQ__HAS__INT128__			
 			constexpr auto num_hge = count_type<__int128_t, __uint128_t>((tuple_type*)(0));
 #else
@@ -466,16 +483,21 @@ struct TableInfo {
 				+ 1 // padding for msvc not allowing empty arrays
 			];
 			setgbuf(cbuf);
-			if (view)
-				for (uint32_t i = 0; i < view->size; ++i) {
+			
+			if (view){
+				uint32_t outsz = limit > view->size ? view->size : limit;
+				for (uint32_t i = 0; i < outsz; ++i) {
 					print2_impl<cols...>(f, (*view)[i], printf_string.c_str());
 					setgbuf();
 				}
-			else
-				for (uint32_t i = 0; i < colrefs[0].size; ++i) {
+			}
+			else{
+				uint32_t outsz = limit > colrefs[0].size ? colrefs[0].size : limit;
+				for (uint32_t i = 0; i < outsz; ++i) {
 					print2_impl<cols...>(f, i, printf_string.c_str());
 					setgbuf();
 				}
+			}
 		};
 
 		if (fp)
@@ -490,15 +512,17 @@ struct TableInfo {
 	}
 	template <int ...vals> struct applier {
 		inline constexpr static void apply(const TableInfo<Types...>& t, const char* __restrict sep = ",", const char* __restrict end = "\n",
-			const vector_type<uint32_t>* __restrict view = nullptr, FILE* __restrict fp = nullptr)
+			const vector_type<uint32_t>* __restrict view = nullptr, FILE* __restrict fp = nullptr, uint32_t limit = std::numeric_limits<uint32_t>::max()
+			) 
 		{
-			t.template print2<vals ...>(sep, end, view, fp);
+			t.template print2<vals ...>(sep, end, view, fp, limit);
 		}
 	};
 
 	inline void printall(const char* __restrict sep = ",", const char* __restrict end = "\n",
-		const vector_type<uint32_t>* __restrict view = nullptr, FILE* __restrict fp = nullptr) {
-		applyIntegerSequence<sizeof...(Types), applier>::apply(*this, sep, end, view, fp);
+		const vector_type<uint32_t>* __restrict view = nullptr, FILE* __restrict fp = nullptr, 
+		uint32_t limit = std::numeric_limits<uint32_t>::max() ) const {
+		applyIntegerSequence<sizeof...(Types), applier>::apply(*this, sep, end, view, fp, limit);
 	}
 
 	TableInfo<Types...>* rename(const char* name) {
@@ -667,7 +691,9 @@ template <class ...Types>
 template <size_t j>
 inline typename std::enable_if<j == sizeof...(Types) - 1, void>::type
 TableInfo<Types ...>::print_impl(const uint32_t& i, const char* __restrict sep) const {
-	std::cout << (get<j>(*this))[i];
+	decltype(auto) t = (get<j>(*this))[i];
+//	print(t);
+	std::cout << t;
 }
 
 template<class ...Types>
@@ -682,6 +708,7 @@ inline typename std::enable_if < j < sizeof...(Types) - 1, void>::type
 template<class ...Types>
 inline void TableInfo<Types...>::print(const char* __restrict sep, const char* __restrict end) const {
 
+	//printall(sep, end);
 	std::string header_string = get_header_string(sep, end);
 	std::cout << header_string.c_str();
 
diff --git a/server/types.h b/server/types.h
index 3ae14b6..20cb0fc 100644
--- a/server/types.h
+++ b/server/types.h
@@ -29,27 +29,37 @@ inline constexpr size_t aq_szof<void> = 0;
 template <class T1, class T2>
 struct aqis_same_impl {
 	constexpr static bool value = 
+		
 		std::conditional_t<
-			std::is_signed_v<T1> == std::is_signed_v<T2>,
+			std::is_same_v<T1, bool> || std::is_same_v<T2, bool>, 
+			Cond(
+				(std::is_same_v<T1, bool> && std::is_same_v<T2, bool>), 
+				std::true_type, 
+				std::false_type
+			),
 			Cond(
-				std::is_floating_point_v<T1> == std::is_floating_point_v<T2>,
+				std::is_signed_v<T1> == std::is_signed_v<T2>,
 				Cond(
-					aq_szof<T1> == aq_szof<T2>, // deal with sizeof(void)
-					std::true_type,
+					std::is_floating_point_v<T1> == std::is_floating_point_v<T2>,
+					Cond(
+						aq_szof<T1> == aq_szof<T2>, // deal with sizeof(void)
+						std::true_type,
+						std::false_type
+					),
 					std::false_type
 				),
 				std::false_type
-			),
-			std::false_type
+			)
 		>::value;
 };
-
+// make sure size_t/ptr_t and the corresponding integer types are the same
 template <class T1, class T2, class ...Ts>
 constexpr bool aqis_same = aqis_same_impl<T1, T2>::value &&
 aqis_same<T2, Ts...>;
 
 template <class T1, class T2>
 constexpr bool aqis_same<T1, T2> = aqis_same_impl<T1, T2>::value;
+
 namespace types {
 	enum Type_t {
 		AINT32, AFLOAT, ASTR, ADOUBLE, ALDOUBLE, AINT64, AINT128, AINT16, ADATE, ATIME, AINT8,
diff --git a/server/vector_type.hpp b/server/vector_type.hpp
index 98d79f5..3ad5fcc 100644
--- a/server/vector_type.hpp
+++ b/server/vector_type.hpp
@@ -265,7 +265,7 @@ public:
 		}
 		size = this->size + dist;
 	}
-	inline void out(uint32_t n = 4, const char* sep = " ") const
+	inline void out(uint32_t n = 4000, const char* sep = " ") const
 	{
 		const char* more = "";
 		if (n < this->size)
diff --git a/server/winhelper.cpp b/server/winhelper.cpp
index 48bfa86..d59f58b 100644
--- a/server/winhelper.cpp
+++ b/server/winhelper.cpp
@@ -41,4 +41,20 @@ void SharedMemory::FreeMemoryMap()
         if (this->hFileMap)
             CloseHandle(this->hFileMap);
 }
+
+#ifndef __USE_STD_SEMAPHORE__
+A_Semaphore::A_Semaphore(bool v = false) {
+    native_handle = CreateSemaphore(NULL, v, 1, NULL);
+}
+void A_Semaphore::acquire() {
+    WaitForSingleObject(native_handle, INFINITE);
+}
+void A_Semaphore::release() {
+    ReleaseSemaphore(native_handle, 1, NULL);
+}
+A_Semaphore::~A_Semaphore() {
+    CloseHandle(native_handle);
+}
+#endif
+
 #endif
diff --git a/server/winhelper.h b/server/winhelper.h
index df9231e..f39c0b9 100644
--- a/server/winhelper.h
+++ b/server/winhelper.h
@@ -14,5 +14,17 @@ struct SharedMemory
     SharedMemory(const char*);
     void FreeMemoryMap();
 };
+
+#ifndef __USE_STD_SEMAPHORE__
+class A_Semaphore {
+private:
+	void* native_handle;
+public:
+	A_Semaphore();
+	void acquire();
+	void release();
+	~A_Semaphore();
+};
 #endif
+
 #endif
diff --git a/tests/datagen_jose/Time.cpp b/tests/datagen_jose/Time.cpp
index 5f852cb..d5130d5 100644
--- a/tests/datagen_jose/Time.cpp
+++ b/tests/datagen_jose/Time.cpp
@@ -18,7 +18,7 @@
 //
 ///////////////////////////////////////////////////////////////////////////////
 #include <stdio.h>
-#include "Time.H"
+#include "Time.hpp"
 
 Time::Time(char *startTime_)
 {
diff --git a/tests/funcs.a b/tests/funcs.a
index 65316ce..7f17f0c 100644
--- a/tests/funcs.a
+++ b/tests/funcs.a
@@ -19,7 +19,7 @@ LOAD DATA INFILE "data/test.csv"
 INTO TABLE test1
 FIELDS TERMINATED BY ","
 
-SELECT pairCorr(c, b) * d, sum(a), b
+SELECT pairCorr(c, b) * d, a, sum(b)
 FROM test1
-group by c,b,d
+group by a
 order by b ASC
diff --git a/tests/q4.a b/tests/q4.a
index 8e9e626..4a4016b 100644
--- a/tests/q4.a
+++ b/tests/q4.a
@@ -30,10 +30,10 @@ INSERT INTO my_table VALUES(10, 20, "example")
 select * from my_table;
 INSERT INTO my_table SELECT * FROM my_table
 select * from my_table;
-SELECT c1, c2 + c2 as twice_c2 FROM my_table;
+SELECT c1, c2  as twice_c2 FROM my_table;
 
 CREATE TABLE my_table_derived
 AS
-  SELECT c1, c2 + c2 as twice_c2 FROM my_table;
+  SELECT c1, c2  as twice_c2 FROM my_table;
 SELECT * FROM my_table_derived;
 

From c628d5a569c4e628bcf36e539d63e72bf1c6eb80 Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Fri, 4 Nov 2022 14:37:21 +0800
Subject: [PATCH 09/30] fixed deadlock

---
 server/server.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/server/server.cpp b/server/server.cpp
index 6a9bc43..ff8e681 100644
--- a/server/server.cpp
+++ b/server/server.cpp
@@ -308,7 +308,7 @@ int dll_main(int argc, char** argv, Context* cxt){
                 }   
                 else{
                     server->last_error = nullptr;
-                    continue;
+                    goto finalize;
                 } 
             }
             
@@ -322,8 +322,8 @@ int dll_main(int argc, char** argv, Context* cxt){
             cfg->new_query = 0;
         }
         //puts(cfg->running? "true": "false");
-        asm("");
-        PROMPT_RELEASE();
+        //__asm("");
+finalize:        PROMPT_RELEASE();
     }
     
     return 0;

From cda7a61ffff944b54360fe692390542267fa8d92 Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Fri, 4 Nov 2022 14:49:11 +0800
Subject: [PATCH 10/30] fixed deadlock upon failed queries

---
 server/server.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/server/server.cpp b/server/server.cpp
index ff8e681..d618bf7 100644
--- a/server/server.cpp
+++ b/server/server.cpp
@@ -308,7 +308,7 @@ int dll_main(int argc, char** argv, Context* cxt){
                 }   
                 else{
                     server->last_error = nullptr;
-                    goto finalize;
+                    //goto finalize;
                 } 
             }
             
@@ -322,8 +322,8 @@ int dll_main(int argc, char** argv, Context* cxt){
             cfg->new_query = 0;
         }
         //puts(cfg->running? "true": "false");
-        //__asm("");
-finalize:        PROMPT_RELEASE();
+//finalize:
+        PROMPT_RELEASE();
     }
     
     return 0;

From 31a4972478058da20471164adecda74e032630fe Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Fri, 4 Nov 2022 16:29:20 +0800
Subject: [PATCH 11/30] bug fixes

---
 reconstruct/expr.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/reconstruct/expr.py b/reconstruct/expr.py
index e22a4ed..9e16d61 100644
--- a/reconstruct/expr.py
+++ b/reconstruct/expr.py
@@ -90,7 +90,9 @@ class expr(ast_node):
         self.udf_map = parent.context.udf_map
         self.func_maps = {**builtin_func, **self.udf_map, **user_module_func}
         self.operators = {**builtin_operators, **self.udf_map, **user_module_func}
-        self.ext_aggfuncs = ['sum', 'avg', 'count', 'min', 'max', 'last', 'first', 'prev', 'next']
+        self.ext_aggfuncs = ['sum', 'avg', 'count', 'min', 'max', 
+                             'last', 'first', 'prev', 'next', 'var', 
+                             'stddev']
         
     def produce(self, node):
         from engine.utils import enlist

From 5dad6e5270fe9ba3f7054c7478b0d8de15162ea1 Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Sat, 5 Nov 2022 05:33:28 +0800
Subject: [PATCH 12/30] monetdb passthru, WIP: threadedGC

---
 Makefile                |   4 +-
 aquery_config.py        |   2 +-
 reconstruct/ast.py      |  12 +++--
 reconstruct/storage.py  |   5 +-
 server/gc.hpp           | 102 +++++++++++++++++++++++++++++++++++-----
 server/monetdb_conn.cpp |  61 +++++++++++++++++++++---
 server/monetdb_conn.h   |   2 +
 server/server.cpp       |  15 ++++--
 8 files changed, 174 insertions(+), 29 deletions(-)

diff --git a/Makefile b/Makefile
index 4564f5e..327aab9 100644
--- a/Makefile
+++ b/Makefile
@@ -83,8 +83,8 @@ ifeq ($(THREADING),1)
 	Defines +=  -DTHREADING
 endif
 
-ifeq ($(AQUERY_ITC_USE_SHMEM), 1)
-	Defines += -D__AQUERY_ITC_USE_SHMEM__
+ifeq ($(AQUERY_ITC_USE_SEMPH), 1)
+	Defines += -D__AQUERY_ITC_USE_SEMPH__
 endif
 SHAREDFLAGS += $(FPIC)
 
diff --git a/aquery_config.py b/aquery_config.py
index e3600f9..9e80e4b 100644
--- a/aquery_config.py
+++ b/aquery_config.py
@@ -22,7 +22,7 @@ def init_config():
     from engine.utils import add_dll_dir
     # os.environ['CXX'] = 'C:/Program Files/LLVM/bin/clang.exe'
     os.environ['THREADING'] = '1'
-    os.environ['AQUERY_ITC_USE_SHMEM'] = '1'
+    os.environ['AQUERY_ITC_USE_SEMPH'] = '1'
 
     if  ('__config_initialized__' not in globals() or 
             not __config_initialized__):
diff --git a/reconstruct/ast.py b/reconstruct/ast.py
index c95223d..0fa978d 100644
--- a/reconstruct/ast.py
+++ b/reconstruct/ast.py
@@ -310,7 +310,6 @@ class projection(ast_node):
         
         
         # cpp module codegen
-        self.context.has_dll = True
         # extract typed-columns from result-set
         vid2cname = [0]*len(self.var_table)
         self.pyname2cname = dict()
@@ -404,15 +403,22 @@ class projection(ast_node):
 
         if 'into' in node:
             self.context.emitc(select_into(self, node['into']).ccode)
+            self.has_postproc = True
         if not self.distinct:
             self.finalize()
-            
+                    
     def finalize(self):      
         self.context.emitc(f'puts("done.");')
 
         if self.parent is None:
             self.context.sql_end()
-            self.context.postproc_end(self.postproc_fname)
+            if self.has_postproc:
+                self.context.has_dll = True
+                self.context.postproc_end(self.postproc_fname)
+            else:
+                self.context.ccode = ''
+                if self.limit != 0:
+                    self.context.direct_output()
         
 class select_distinct(projection):
     first_order = 'select_distinct'
diff --git a/reconstruct/storage.py b/reconstruct/storage.py
index 98ad799..e8dfe94 100644
--- a/reconstruct/storage.py
+++ b/reconstruct/storage.py
@@ -249,7 +249,10 @@ class Context:
         self.sql = ''
         self.ccode = ''
         self.finalize_query()
-        
+    
+    def direct_output(self):
+        self.queries.append('O')
+    
     def finalize_udf(self):
         if self.udf is not None:
             return (Context.udf_head 
diff --git a/server/gc.hpp b/server/gc.hpp
index 4c66060..decd632 100644
--- a/server/gc.hpp
+++ b/server/gc.hpp
@@ -3,46 +3,90 @@
 #include <utility>
 #include <thread>
 #include <chrono>
+#include <atomic>
+#ifndef __AQ_USE_THREADEDGC__
 class GC {
+private:
 	template<class T>
 	using vector = vector_type<T>;
 	template<class ...T>
 	using tuple = std::tuple<T...>;
-	size_t current_size, max_size, interval, forced_clean;
+	size_t current_size = 0, max_size, 
+		   interval, forced_clean, 
+		   forceclean_timer = 0;
 	bool running, alive;
 //  ptr, dealloc, ref, sz
-	vector<tuple<void*, void (*)(void*)>> q;
+	vector<tuple<void*, void (*)(void*)>> *q, *q_back;
 	std::thread handle;
+	std::atomic<std::thread::id> lock;
+
+protected:
+	void acquire_lock(){
+		auto this_pid = std::this_thread::get_id();
+		while(lock != this_pid)
+		{
+			while(lock != this_pid && lock != std::thread::id()) {
+				std::this_thread::sleep_for(std::chrono::milliseconds(0));
+			}
+			lock = this_pid;
+		}
+	}
+	
+	void release_lock(){
+		lock = std::thread::id();
+	}
+
 	void gc()
 	{
-		
-	}
-	void reg(void* v, uint32_t ref, uint32_t sz, 
-		void(*f)(void*) = [](void* v) {free (v); }) {
-		current_size += sz;
-		if (current_size > max_size)
-			gc();
-		q.push_back({ v, f });
+		if (q->size() == 0)
+			return;
+		auto t = q;
+		acquire_lock();
+		q = q_back;
+		release_lock();
+		for(const auto& t : *t) {
+			std::get<1>(t)(std::get<0>(t));
+		}
+		t->clear();
+		q_back = t;
+		running = false;
+		current_size = 0;
 	}
+
+
 	void daemon() {
 		using namespace std::chrono;
+
 		while (alive) {
 			if (running) {
-				gc();
+				if (current_size > max_size || 
+					forceclean_timer > forced_clean) 
+				{
+					gc();
+					forceclean_timer = 0;
+				}
 				std::this_thread::sleep_for(microseconds(interval));
+				forceclean_timer += interval;
 			}
 			else {
 				std::this_thread::sleep_for(10ms);
+				forceclean_timer += 10000;
 			}
 		}
 	}
 	void start_deamon() {
-		handle = std::thread(&daemon);
+		q = new vector<tuple<void*, void (*)(void*)>>();
+		q_back = new vector<tuple<void*, void (*)(void*)>>();
+		lock = thread::id();
 		alive = true;
+		handle = std::thread(&daemon);
 	}
+
 	void terminate_daemon() {
 		running = false;
 		alive = false;
+		delete q;
+		delete q_back;
 		using namespace std::chrono;
 
 		if (handle.joinable()) {
@@ -50,4 +94,36 @@ class GC {
 			handle.join();
 		}
 	}
-};
\ No newline at end of file
+public:
+	void reg(void* v, uint32_t sz = 1, 
+			void(*f)(void*) = [](void* v) {free (v); }
+		) {
+		acquire_lock();
+		current_size += sz;
+		q.push_back({ v, f });
+		running = true;
+		release_lock()
+	}
+
+	GC(
+		uint32_t max_size = 0xfffffff, uint32_t interval = 10000, 
+		uint32_t forced_clean = 1000000 //one seconds
+	) : max_size(max_size), interval(interval), forced_clean(forced_clean){
+		start_deamon();
+	} // 256 MB
+
+	~GC(){
+		terminate_daemon();
+	}
+};
+
+#else
+class GC {
+public:
+	GC(uint32_t) = default;
+	void reg(
+		void* v, uint32_t = 0, 
+		void(*f)(void*) = [](void* v) {free (v); }
+	) const { f(v); }
+}
+#endif
diff --git a/server/monetdb_conn.cpp b/server/monetdb_conn.cpp
index b29f1a8..c0e9d5b 100644
--- a/server/monetdb_conn.cpp
+++ b/server/monetdb_conn.cpp
@@ -2,6 +2,7 @@
 
 #include "libaquery.h"
 #include <cstdio>
+#include <string>
 #include "monetdb_conn.h"
 #include "monetdbe.h"
 #include "table.h"
@@ -35,9 +36,19 @@ const unsigned char monetdbe_type_szs[] = {
     // should be last:
     1
 };
+namespace types{
+    const Type_t monetdbe_type_aqtypes[] = {
+        ABOOL, AINT8, AINT16, AINT32, AINT64, 
+#ifdef HAVE_HGE
+        AINT128,
+#endif
+        AUINT64, AFLOAT, ADOUBLE, ASTR, 
+        // blob?
+        AINT64,
+        ADATE, ATIME, ATIMESTAMP, ERROR
 
-
-
+    };
+}
 Server::Server(Context* cxt){
     if (cxt){
         connect(cxt);
@@ -80,7 +91,7 @@ void Server::connect(Context *cxt){
     else{
         if(server)
             free(server);
-        this->server = 0;
+        this->server = nullptr;
         status = false;
         puts(ret == -1 ? "Allocation Error." : "Internal Database Error.");
     }
@@ -103,20 +114,58 @@ void Server::exec(const char* q){
 
 bool Server::haserror(){
     if (last_error){
-        last_error = 0;
+        last_error = nullptr;
         return true;
     }
     else{
         return false;
     }
 }
+void Server::print_results(const char* sep, const char* end){
+
+    if (!haserror()){
+        auto _res = static_cast<monetdbe_result*> (res);
+        const auto& ncols = _res->ncols;
+        monetdbe_column** cols = static_cast<monetdbe_column**>(malloc(sizeof(monetdbe_column*) * ncols));
+        std::string* printf_string = new std::string[ncols];
+        const char** col_data = static_cast<const char**> (malloc(sizeof(char*) * ncols));
+        uint8_t* szs = static_cast<uint8_t*>(alloca(ncols));
+        std::string header_string = "";
+        const char* err_msg = nullptr;
+        for(uint32_t i = 0; i < ncols; ++i){
+            err_msg = monetdbe_result_fetch(_res, &cols[i], i);
+            printf_string[i] = 
+                std::string(types::printf_str[types::monetdbe_type_aqtypes[cols[i]->type]]) 
+                + (i < ncols - 1 ? sep : "");
+            puts(printf_string[i].c_str());
+            col_data[i] = static_cast<char *>(cols[i]->data);
+            szs [i] = monetdbe_type_szs[cols[i]->type];
+            header_string = header_string + cols[i]->name + sep + '|' + sep;
+        }
+        const size_t l_sep = strlen(sep) + 1;
+		if (header_string.size() - l_sep >= 0)
+			header_string.resize(header_string.size() - l_sep);
+        header_string += end + std::string(header_string.size(), '=') + end;
+        fputs(header_string.c_str(), stdout);
+        for(uint64_t i = 0; i < cnt; ++i){
+            for(uint32_t j = 0; j < ncols; ++j){
+                printf(printf_string[j].c_str(), *((void**)col_data[j]));
+                col_data[j] += szs[j];
+            }
+            fputs(end, stdout);
+        }
+        free(cols);
+        delete[] printf_string;
+        free(col_data);
+    }
+}
 
 void Server::close(){
     if(this->server){
         auto server = static_cast<monetdbe_database*>(this->server);
         monetdbe_close(*(server));
         free(server);
-        this->server = 0;
+        this->server = nullptr;
     }
 }
 
@@ -140,7 +189,7 @@ void* Server::getCol(int col_idx){
     else{
         puts("Error: No result.");
     }
-    return 0;
+    return nullptr;
 }
 
 Server::~Server(){
diff --git a/server/monetdb_conn.h b/server/monetdb_conn.h
index 467cb2c..3255342 100644
--- a/server/monetdb_conn.h
+++ b/server/monetdb_conn.h
@@ -22,6 +22,8 @@ struct Server{
     void close();
     bool haserror();
     static bool havehge();
+    void test(const char*);
+    void print_results(const char* sep = " ", const char* end = "\n");
     ~Server();
 };
 
diff --git a/server/server.cpp b/server/server.cpp
index d618bf7..5d948b2 100644
--- a/server/server.cpp
+++ b/server/server.cpp
@@ -43,11 +43,11 @@ public:
 		native_handle = dispatch_semaphore_create(v);
 	}
 	void acquire() {
-        puts("acquire");
+        // puts("acquire");
 		dispatch_semaphore_wait(native_handle, DISPATCH_TIME_FOREVER);
 	}
 	void release() {
-        puts("release");
+        // puts("release");
 		dispatch_semaphore_signal(native_handle);
 	}
 	~A_Semaphore() {
@@ -94,7 +94,7 @@ public:
     ~A_Semaphore() { }
 };
 #endif
-#ifdef __AQUERY_ITC_USE_SHMEM__
+#ifdef __AQUERY_ITC_USE_SEMPH__
 A_Semaphore prompt{ true }, engine{ false };
 #define PROMPT_ACQUIRE() prompt.acquire()
 #define PROMPT_RELEASE() prompt.release()
@@ -283,6 +283,15 @@ int dll_main(int argc, char** argv, Context* cxt){
                                 //printf("F::: %p\n", module_fn_map->find("mydiv") != module_fn_map->end() ? module_fn_map->find("mydiv")->second : nullptr);
                             }
                             break;
+                        case 'O':
+                            {
+                                if(!server->haserror()){
+                                    timer.reset();
+                                    server->print_results();        
+                                    cfg->stats.postproc_time += timer.elapsed();
+                                }
+                            }
+                            break;
                         case 'U': // Unload Module
                             {
                                 auto mname = n_recvd[i] + 1;

From be7fb9f523dda091a4558655f99c226d4a37a505 Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Sat, 5 Nov 2022 22:33:22 +0800
Subject: [PATCH 13/30] organized Makefile, preemptive initialization

---
 Makefile          | 54 ++++++++++++++++++++++++++++++-----------------
 prompt.py         |  4 +++-
 server/gc.hpp     |  7 +++---
 server/server.cpp |  9 ++++++--
 4 files changed, 49 insertions(+), 25 deletions(-)

diff --git a/Makefile b/Makefile
index 327aab9..1d4a4d2 100644
--- a/Makefile
+++ b/Makefile
@@ -86,40 +86,56 @@ endif
 ifeq ($(AQUERY_ITC_USE_SEMPH), 1)
 	Defines += -D__AQUERY_ITC_USE_SEMPH__
 endif
-SHAREDFLAGS += $(FPIC)
+
+CXXFLAGS += $(OPTFLAGS) $(Defines) $(MonetDB_INC) 
+BINARYFLAGS = $(CXXFLAGS) $(LINKFLAGS) $(MonetDB_LIB)
+SHAREDFLAGS += $(FPIC) $(BINARYFLAGS)
 
 info:
-	$(info $(OPTFLAGS))
-	$(info $(OS_SUPPORT))
-	$(info $(OS)) 
-	$(info $(Defines))
-	$(info "test")
-	$(info $(LIBTOOL))
-	$(info $(MonetDB_INC))
-	$(info $(COMPILER))
-	$(info $(CXX))
-	$(info $(FPIC))
+	$(info This makefile script is used in AQuery to automatically build required libraries and executables.)
+	$(info Run it manually only for debugging purposes.)
+	$(info Targets (built by `make <target>`):)
+	$(info $"	pch: generate precompiled header)
+	$(info $"	libaquery.a: build static library)
+	$(info $"	server.so: build execution engine)
+	$(info $"	snippet: build generated query snippet)
+	$(info $"	server_uselib: build execution engine using shared library and pch)
+	$(info $"	snippet_uselib: build generated query snippet using shared library and pch)
+	$(info $"	docker: build docker image with name aquery)
+	$(info $"	launcher: build launcher for aquery ./aq)
+	$(info $"	clean: remove all generated binaraies and caches)
+	$(info )
+	$(info Variables:)
+	$(info $"	OPTFLAGS: $(OPTFLAGS))
+	$(info $"	OS_SUPPORT: $(OS_SUPPORT))
+	$(info $"	OS: $(OS)) 
+	$(info $"	Defines: $(Defines))
+	$(info $"	LIBTOOL: $(LIBTOOL))
+	$(info $"	MonetDB_INC: $(MonetDB_INC))
+	$(info $"	COMPILER: $(COMPILER))
+	$(info $"	CXX: $(CXX))
+	$(info $"	FPIC: $(FPIC))
 pch:
-	$(CXX) -x c++-header server/pch.hpp $(FPIC) $(MonetDB_INC) $(OPTFLAGS) $(CXXFLAGS) $(Defines)
+	$(CXX) -x c++-header server/pch.hpp $(FPIC) $(CXXFLAGS)
 libaquery.a:
-	$(CXX) -c $(FPIC) $(PCHFLAGS) $(LIBAQ_SRC) $(MonetDB_INC) $(MonetDB_LIB) $(OS_SUPPORT) $(Defines) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) &&\
+	$(CXX) -c $(FPIC) $(PCHFLAGS) $(LIBAQ_SRC) $(MonetDB_LIB) $(OS_SUPPORT) $(CXXFLAGS) &&\
 	$(LIBTOOL) libaquery.a $(LIBAQ_OBJ) &&\
 	$(RANLIB) libaquery.a
 
 server.bin:
-	$(CXX) $(LIBAQ_SRC) $(LINKFLAGS) $(OS_SUPPORT) $(Defines)  $(MonetDB_INC) $(MonetDB_LIB) $(OPTFLAGS) $(CXXFLAGS) -o server.bin
+	$(CXX) $(LIBAQ_SRC) $(BINARYFLAGS) $(OS_SUPPORT) -o server.bin
 launcher:
-	$(CXX) -D__AQ_BUILD_LAUNCHER__ $(LIBAQ_SRC) $(LINKFLAGS) $(OS_SUPPORT) $(Defines)  $(MonetDB_INC) $(MonetDB_LIB) $(OPTFLAGS) $(CXXFLAGS) -o aq
+	$(CXX) -D__AQ_BUILD_LAUNCHER__ $(LIBAQ_SRC) $(OS_SUPPORT) $(BINARYFLAGS) -o aq
 server.so:
 #	$(CXX) -z muldefs server/server.cpp server/monetdb_conn.cpp -fPIC -shared $(OS_SUPPORT) monetdb/msvc/monetdbe.dll --std=c++1z -O3 -march=native -o server.so -I./monetdb/msvc 
-	$(CXX) $(SHAREDFLAGS) $(PCHFLAGS) $(LIBAQ_SRC) $(OS_SUPPORT) $(Defines) $(MonetDB_INC) $(MonetDB_LIB) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) -o server.so 
+	$(CXX) $(SHAREDFLAGS) $(PCHFLAGS) $(LIBAQ_SRC) $(OS_SUPPORT) -o server.so 
 server_uselib:
-	$(CXX) $(SHAREDFLAGS) $(USELIB_FLAG),libaquery.a $(MonetDB_LIB) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) -o server.so
+	$(CXX) $(SHAREDFLAGS) $(USELIB_FLAG),libaquery.a -o server.so
 
 snippet:
-	$(CXX) $(SHAREDFLAGS) $(PCHFLAGS) out.cpp $(LIBAQ_SRC) $(MonetDB_INC) $(MonetDB_LIB) $(Defines) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) -o dll.so
+	$(CXX) $(SHAREDFLAGS) $(PCHFLAGS) out.cpp $(LIBAQ_SRC) -o dll.so
 snippet_uselib:
-	$(CXX) $(SHAREDFLAGS) $(PCHFLAGS) out.cpp libaquery.a $(MonetDB_INC) $(Defines) $(MonetDB_LIB) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) -o dll.so
+	$(CXX) $(SHAREDFLAGS) $(PCHFLAGS) out.cpp libaquery.a -o dll.so
 
 docker:
 	docker build -t aquery .
diff --git a/prompt.py b/prompt.py
index f257b49..b8ec8d1 100644
--- a/prompt.py
+++ b/prompt.py
@@ -195,7 +195,7 @@ class Config:
         self.new_query = nq
         self.server_mode = mode.value 
         self.running = 1
-        self.backend_type = Backend_Type.BACKEND_AQuery.value
+        self.backend_type = Backend_Type.BACKEND_MonetDB.value
         self.has_dll = 0
         self.n_buffers = n_bufs
         self.monetdb_time = 0
@@ -399,6 +399,8 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
     payload = None
     keep = True
     cxt = engine.initialize()
+    parser.parse('SELECT "**** WELCOME TO AQUERY++! ****";')
+    
     # state.currstats = QueryStats()
     # state.need_print = False
     while running():
diff --git a/server/gc.hpp b/server/gc.hpp
index decd632..246404c 100644
--- a/server/gc.hpp
+++ b/server/gc.hpp
@@ -11,6 +11,7 @@ private:
 	using vector = vector_type<T>;
 	template<class ...T>
 	using tuple = std::tuple<T...>;
+
 	size_t current_size = 0, max_size, 
 		   interval, forced_clean, 
 		   forceclean_timer = 0;
@@ -19,9 +20,9 @@ private:
 	vector<tuple<void*, void (*)(void*)>> *q, *q_back;
 	std::thread handle;
 	std::atomic<std::thread::id> lock;
-
+	// maybe use volatile std::thread::id instead
 protected:
-	void acquire_lock(){
+	void acquire_lock() {
 		auto this_pid = std::this_thread::get_id();
 		while(lock != this_pid)
 		{
@@ -38,7 +39,7 @@ protected:
 
 	void gc()
 	{
-		if (q->size() == 0)
+		if (q->size == 0)
 			return;
 		auto t = q;
 		acquire_lock();
diff --git a/server/server.cpp b/server/server.cpp
index 5d948b2..e601019 100644
--- a/server/server.cpp
+++ b/server/server.cpp
@@ -217,8 +217,13 @@ int dll_main(int argc, char** argv, Context* cxt){
     cxt->cfg = cfg;
     cxt->n_buffers = cfg->n_buffers;
     cxt->sz_bufs = buf_szs;
-    cxt->alt_server = NULL;
-    
+    if (cfg->backend_type == BACKEND_MonetDB && cxt->alt_server == nullptr)
+    {
+        auto alt_server = new Server(cxt);
+        alt_server->exec("SELECT '**** WELCOME TO AQUERY++! ****';");
+        puts(*(const char**)(alt_server->getCol(0)));
+        cxt->alt_server = alt_server;
+    }
     while(cfg->running){
         ENGINE_ACQUIRE();
         if (cfg->new_query) {

From fa1f9822bc1ed0da2cffd20cd99e5a9ad16c266f Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Fri, 11 Nov 2022 03:14:50 +0800
Subject: [PATCH 14/30] WIP: rework exec-engine structure, GC, fast print

---
 Makefile                                |   14 +-
 README.md                               |   23 +-
 aquery_config.py                        |    1 +
 build.py                                |   16 +-
 msc-plugin/libaquery.vcxproj            |    2 +-
 reconstruct/storage.py                  |    2 +-
 server/Makefile                         |    4 +-
 server/dragonbox/dragonbox.h            | 2658 +++++++++++++++++++++++
 server/dragonbox/dragonbox_to_chars.cpp |  519 +++++
 server/dragonbox/dragonbox_to_chars.h   |  108 +
 server/gc.h                             |   59 +
 server/jeaiii_to_text.h                 |  116 +
 server/{io.cpp => libaquery.cpp}        |  164 +-
 server/libaquery.h                      |   11 +-
 server/monetdb_conn.cpp                 |   10 +-
 server/monetdb_conn.h                   |    1 +
 server/server.cpp                       |  127 +-
 server/utils.h                          |   20 +-
 18 files changed, 3730 insertions(+), 125 deletions(-)
 create mode 100644 server/dragonbox/dragonbox.h
 create mode 100644 server/dragonbox/dragonbox_to_chars.cpp
 create mode 100644 server/dragonbox/dragonbox_to_chars.h
 create mode 100644 server/gc.h
 create mode 100644 server/jeaiii_to_text.h
 rename server/{io.cpp => libaquery.cpp} (68%)

diff --git a/Makefile b/Makefile
index 1d4a4d2..ede166b 100644
--- a/Makefile
+++ b/Makefile
@@ -8,15 +8,15 @@ ifeq ($(AQ_DEBUG), 1)
 	LINKFLAGS = 
 else
 	OPTFLAGS = -O3 -DNDEBUG -fno-stack-protector 
-	LINKFLAGS = -flto
+	LINKFLAGS = -flto -s
 endif
 SHAREDFLAGS = -shared  
 FPIC = -fPIC
 COMPILER = $(shell $(CXX) --version | grep -q clang && echo clang|| echo gcc) 
 LIBTOOL = ar rcs
 USELIB_FLAG = -Wl,--whole-archive,libaquery.a -Wl,-no-whole-archive
-LIBAQ_SRC = server/server.cpp server/monetdb_conn.cpp server/io.cpp 
-LIBAQ_OBJ = server.o monetdb_conn.o io.o 
+LIBAQ_SRC = server/monetdb_conn.cpp server/libaquery.cpp 
+LIBAQ_OBJ = monetdb_conn.o libaquery.o 
 SEMANTIC_INTERPOSITION = -fno-semantic-interposition
 RANLIB = ranlib
 
@@ -118,19 +118,21 @@ info:
 pch:
 	$(CXX) -x c++-header server/pch.hpp $(FPIC) $(CXXFLAGS)
 libaquery.a:
-	$(CXX) -c $(FPIC) $(PCHFLAGS) $(LIBAQ_SRC) $(MonetDB_LIB) $(OS_SUPPORT) $(CXXFLAGS) &&\
+	$(CXX) -c $(FPIC) $(PCHFLAGS) $(LIBAQ_SRC) $(OS_SUPPORT) $(CXXFLAGS) &&\
 	$(LIBTOOL) libaquery.a $(LIBAQ_OBJ) &&\
 	$(RANLIB) libaquery.a
 
+warmup:
+	$(CXX) $(SHAREDFLAGS) msc-plugin/dummy.cpp libaquery.a -o dll.so
 server.bin:
 	$(CXX) $(LIBAQ_SRC) $(BINARYFLAGS) $(OS_SUPPORT) -o server.bin
 launcher:
 	$(CXX) -D__AQ_BUILD_LAUNCHER__ $(LIBAQ_SRC) $(OS_SUPPORT) $(BINARYFLAGS) -o aq
 server.so:
 #	$(CXX) -z muldefs server/server.cpp server/monetdb_conn.cpp -fPIC -shared $(OS_SUPPORT) monetdb/msvc/monetdbe.dll --std=c++1z -O3 -march=native -o server.so -I./monetdb/msvc 
-	$(CXX) $(SHAREDFLAGS) $(PCHFLAGS) $(LIBAQ_SRC) $(OS_SUPPORT) -o server.so 
+	$(CXX) $(SHAREDFLAGS) $(PCHFLAGS) $(LIBAQ_SRC) server/server.cpp server/dragonbox/dragonbox_to_chars.cpp $(OS_SUPPORT) -o server.so 
 server_uselib:
-	$(CXX) $(SHAREDFLAGS) $(USELIB_FLAG),libaquery.a -o server.so
+	$(CXX) $(SHAREDFLAGS) server/server.cpp libaquery.a server/dragonbox/dragonbox_to_chars.cpp -o server.so
 
 snippet:
 	$(CXX) $(SHAREDFLAGS) $(PCHFLAGS) out.cpp $(LIBAQ_SRC) -o dll.so
diff --git a/README.md b/README.md
index 1de828d..36d2182 100644
--- a/README.md
+++ b/README.md
@@ -231,7 +231,7 @@ DROP TABLE my_table IF EXISTS
 - `next(col), prev(col)`: moving column back and forth by 1, e.g. `next(col)[i] = col[i+1]`.
 - `first(col), last(col)`: first and last value of a column, i.e. `first(col)= col[0]`, `last(col) = col[n-1]`.
 - `sqrt(x), trunc(x), and other builtin math functions`: value-wise math operations. `sqrt(x)[i] = sqrt(x[i])`
-- `pack(cols, ...)`: pack multiple columns into a single column. 
+- `pack(cols, ...)`: pack multiple columns with exact same type into a single column. 
 
 # Architecture 
 ![Architecture](./docs/arch-hybrid.svg)
@@ -287,3 +287,24 @@ DROP TABLE my_table IF EXISTS
 - [ ] Bug: Order By after Group By
 - [ ] Functionality: Having clause, With clause
 - [ ] Decouple expr.py
+
+# Credit:
+- [mo-sql-parsing](https://github.com/klahnakoski/mo-sql-parsing) <br>
+  Author: Kyle Lahnakoski <br>
+  License (Mozilla Public License 2.0): https://github.com/klahnakoski/mo-sql-parsing/blob/dev/LICENSE 
+
+- [Fast C++ CSV pParser](https://github.com/ben-strasser/fast-cpp-csv-parser) <br>
+  Author: Ben Strasser <br>
+  License (BSD 3-Clause License): https://github.com/ben-strasser/fast-cpp-csv-parser/blob/master/LICENSE
+
+- [Dragonbox](https://github.com/jk-jeon/dragonbox)<br>
+  Author: Junekey Jeon
+  License (Boost, Apache2-LLVM): <br>https://github.com/jk-jeon/dragonbox/blob/master/LICENSE-Boost <br>
+  https://github.com/jk-jeon/dragonbox/blob/master/LICENSE-Apache2-LLVM
+
+- [itoa](https://github.com/jeaiii/itoa) <br>
+  Author: James Edward Anhalt III <br>
+  License (MIT): https://github.com/jeaiii/itoa/blob/main/LICENSE
+
+- [MobetDB] (https://www.monetdb.org) <br>
+  License (Mozilla Public License): https://github.com/MonetDB/MonetDB/blob/master/license.txt
diff --git a/aquery_config.py b/aquery_config.py
index 9e80e4b..0327d06 100644
--- a/aquery_config.py
+++ b/aquery_config.py
@@ -11,6 +11,7 @@ cygroot = 'c:/msys64/usr/bin'
 msbuildroot = ''
 os_platform = 'unknown'
 build_driver = 'Auto'
+compilation_output = True
 
 def init_config():
     global __config_initialized__, os_platform, msbuildroot, build_driver
diff --git a/build.py b/build.py
index d817dc8..5d3bf0d 100644
--- a/build.py
+++ b/build.py
@@ -73,7 +73,7 @@ class checksums:
 class build_manager:
     sourcefiles = [
                    'build.py', 'Makefile', 
-                   'server/server.cpp', 'server/io.cpp',  
+                   'server/server.cpp', 'server/libaquery.cpp',  
                    'server/monetdb_conn.cpp', 'server/threading.cpp', 
                    'server/winhelper.cpp' 
                    ]
@@ -94,6 +94,9 @@ class build_manager:
             return False
         def build(self, stdout = sys.stdout, stderr = sys.stderr):
             ret = True
+            if not aquery_config.compilation_output:
+                stdout = nullstream
+                stderr = nullstream
             for c in self.build_cmd:
                 if c:
                     try: # only last success matters
@@ -102,6 +105,8 @@ class build_manager:
                         ret = False
                         pass
             return ret
+        def warmup(self):
+            return True
                 
     class MakefileDriver(DriverBase):
         def __init__(self, mgr : 'build_manager') -> None:
@@ -113,7 +118,7 @@ class build_manager:
                 mgr.cxx = os.environ['CXX']
             if 'AQ_DEBUG' not in os.environ:
                 os.environ['AQ_DEBUG'] = '0' if mgr.OptimizationLv else '1'
-                
+
         def libaquery_a(self):
             self.build_cmd = [['rm', 'libaquery.a'],['make', 'libaquery.a']]
             return self.build()
@@ -168,6 +173,10 @@ class build_manager:
             self.build_cmd = [[aquery_config.msbuildroot, loc, self.opt, self.platform]]
             return self.build()
 
+        def warmup(self):
+            self.build_cmd = [['make', 'warmup']]
+            return self.build()
+            
     #class PythonDriver(DriverBase):
     #    def __init__(self, mgr : 'build_manager') -> None:
     #        super().__init__(mgr)           
@@ -223,6 +232,9 @@ class build_manager:
             current.calc(self.cxx, libaquery_a)
             with open('.cached', 'wb') as cache_sig:
                 cache_sig.write(pickle.dumps(current))
+            self.driver.warmup()
+            
+            
         else:
             if aquery_config.os_platform == 'mac':
                 os.system('./arch-check.sh')
diff --git a/msc-plugin/libaquery.vcxproj b/msc-plugin/libaquery.vcxproj
index cb493e4..a727a3c 100644
--- a/msc-plugin/libaquery.vcxproj
+++ b/msc-plugin/libaquery.vcxproj
@@ -238,7 +238,7 @@
   <ItemGroup>
     <ClCompile Include="..\server\server.cpp" />
     <ClCompile Include="..\server\winhelper.cpp" />
-    <ClCompile Include="..\server\io.cpp" />
+    <ClCompile Include="..\server\libaquery.cpp" />
     <ClCompile Include="..\server\monetdb_conn.cpp" />
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
diff --git a/reconstruct/storage.py b/reconstruct/storage.py
index e8dfe94..983f866 100644
--- a/reconstruct/storage.py
+++ b/reconstruct/storage.py
@@ -156,6 +156,7 @@ class Context:
         self.queries = []
         self.module_init_loc = 0
         self.special_gb = False
+        self.has_dll = False
          
     def __init__(self):
         self.tables_byname = dict()
@@ -169,7 +170,6 @@ class Context:
         self.udf_agg_map = dict()
         self.use_columnstore = False
         self.print = print
-        self.has_dll = False
         self.dialect = 'MonetDB'
         self.is_msvc = False
         self.have_hge = False
diff --git a/server/Makefile b/server/Makefile
index cb082c8..a2d4e44 100644
--- a/server/Makefile
+++ b/server/Makefile
@@ -1,6 +1,6 @@
 debug:
-	g++ -g3 -O0 server/server.cpp server/io.cpp  -o a.out -Wall -Wextra -Wpedantic -lpthread
+	g++ -g3 -O0 server/server.cpp server/libaquery.cpp  -o a.out -Wall -Wextra -Wpedantic -lpthread
 	
 test:
-	g++ --std=c++1z -g3 -O0 server.cpp io.cpp  -o a.out -Wall -Wextra -Wpedantic -lpthread
+	g++ --std=c++1z -g3 -O0 server.cpp libaquery.cpp  -o a.out -Wall -Wextra -Wpedantic -lpthread
 	
diff --git a/server/dragonbox/dragonbox.h b/server/dragonbox/dragonbox.h
new file mode 100644
index 0000000..e4b954d
--- /dev/null
+++ b/server/dragonbox/dragonbox.h
@@ -0,0 +1,2658 @@
+// Copyright 2020-2022 Junekey Jeon
+//
+// The contents of this file may be used under the terms of
+// the Apache License v2.0 with LLVM Exceptions.
+//
+//    (See accompanying file LICENSE-Apache or copy at
+//     https://llvm.org/foundation/relicensing/LICENSE.txt)
+//
+// Alternatively, the contents of this file may be used under the terms of
+// the Boost Software License, Version 1.0.
+//    (See accompanying file LICENSE-Boost or copy at
+//     https://www.boost.org/LICENSE_1_0.txt)
+//
+// Unless required by applicable law or agreed to in writing, this software
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.
+
+
+#ifndef JKJ_HEADER_DRAGONBOX
+#define JKJ_HEADER_DRAGONBOX
+
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <limits>
+#include <type_traits>
+
+// Suppress additional buffer overrun check.
+// I have no idea why MSVC thinks some functions here are vulnerable to the buffer overrun
+// attacks. No, they aren't.
+#if defined(__GNUC__) || defined(__clang__)
+    #define JKJ_SAFEBUFFERS
+    #define JKJ_FORCEINLINE inline __attribute__((always_inline))
+#elif defined(_MSC_VER)
+    #define JKJ_SAFEBUFFERS __declspec(safebuffers)
+    #define JKJ_FORCEINLINE __forceinline
+#else
+    #define JKJ_SAFEBUFFERS
+    #define JKJ_FORCEINLINE inline
+#endif
+
+#if defined(__has_builtin)
+    #define JKJ_DRAGONBOX_HAS_BUILTIN(x) __has_builtin(x)
+#else
+    #define JKJ_DRAGONBOX_HAS_BUILTIN(x) false
+#endif
+
+#if defined(_MSC_VER)
+    #include <intrin.h>
+#endif
+
+namespace jkj::dragonbox {
+    namespace detail {
+        template <class T>
+        constexpr std::size_t
+            physical_bits = sizeof(T) * std::numeric_limits<unsigned char>::digits;
+
+        template <class T>
+        constexpr std::size_t value_bits =
+            std::numeric_limits<std::enable_if_t<std::is_unsigned_v<T>, T>>::digits;
+    }
+
+    // These classes expose encoding specs of IEEE-754-like floating-point formats.
+    // Currently available formats are IEEE754-binary32 & IEEE754-binary64.
+
+    struct ieee754_binary32 {
+        static constexpr int significand_bits = 23;
+        static constexpr int exponent_bits = 8;
+        static constexpr int min_exponent = -126;
+        static constexpr int max_exponent = 127;
+        static constexpr int exponent_bias = -127;
+        static constexpr int decimal_digits = 9;
+    };
+    struct ieee754_binary64 {
+        static constexpr int significand_bits = 52;
+        static constexpr int exponent_bits = 11;
+        static constexpr int min_exponent = -1022;
+        static constexpr int max_exponent = 1023;
+        static constexpr int exponent_bias = -1023;
+        static constexpr int decimal_digits = 17;
+    };
+
+    // A floating-point traits class defines ways to interpret a bit pattern of given size as an
+    // encoding of floating-point number. This is a default implementation of such a traits class,
+    // supporting ways to interpret 32-bits into a binary32-encoded floating-point number and to
+    // interpret 64-bits into a binary64-encoded floating-point number. Users might specialize this
+    // class to change the default behavior for certain types.
+    template <class T>
+    struct default_float_traits {
+        // I don't know if there is a truly reliable way of detecting
+        // IEEE-754 binary32/binary64 formats; I just did my best here.
+        static_assert(std::numeric_limits<T>::is_iec559 && std::numeric_limits<T>::radix == 2 &&
+                          (detail::physical_bits<T> == 32 || detail::physical_bits<T> == 64),
+                      "default_ieee754_traits only works for 32-bits or 64-bits types "
+                      "supporting binary32 or binary64 formats!");
+
+        // The type that is being viewed.
+        using type = T;
+
+        // Refers to the format specification class.
+        using format =
+            std::conditional_t<detail::physical_bits<T> == 32, ieee754_binary32, ieee754_binary64>;
+
+        // Defines an unsigned integer type that is large enough to carry a variable of type T.
+        // Most of the operations will be done on this integer type.
+        using carrier_uint =
+            std::conditional_t<detail::physical_bits<T> == 32, std::uint32_t, std::uint64_t>;
+        static_assert(sizeof(carrier_uint) == sizeof(T));
+
+        // Number of bits in the above unsigned integer type.
+        static constexpr int carrier_bits = int(detail::physical_bits<carrier_uint>);
+
+        // Convert from carrier_uint into the original type.
+        // Depending on the floating-point encoding format, this operation might not be possible for
+        // some specific bit patterns. However, the contract is that u always denotes a
+        // valid bit pattern, so this function must be assumed to be noexcept.
+        static T carrier_to_float(carrier_uint u) noexcept {
+            T x;
+            std::memcpy(&x, &u, sizeof(carrier_uint));
+            return x;
+        }
+
+        // Same as above.
+        static carrier_uint float_to_carrier(T x) noexcept {
+            carrier_uint u;
+            std::memcpy(&u, &x, sizeof(carrier_uint));
+            return u;
+        }
+
+        // Extract exponent bits from a bit pattern.
+        // The result must be aligned to the LSB so that there is no additional zero paddings
+        // on the right. This function does not do bias adjustment.
+        static constexpr unsigned int extract_exponent_bits(carrier_uint u) noexcept {
+            constexpr int significand_bits = format::significand_bits;
+            constexpr int exponent_bits = format::exponent_bits;
+            static_assert(detail::value_bits<unsigned int> > exponent_bits);
+            constexpr auto exponent_bits_mask =
+                (unsigned int)(((unsigned int)(1) << exponent_bits) - 1);
+            return (unsigned int)(u >> significand_bits) & exponent_bits_mask;
+        }
+
+        // Extract significand bits from a bit pattern.
+        // The result must be aligned to the LSB so that there is no additional zero paddings
+        // on the right. The result does not contain the implicit bit.
+        static constexpr carrier_uint extract_significand_bits(carrier_uint u) noexcept {
+            constexpr auto mask = carrier_uint((carrier_uint(1) << format::significand_bits) - 1);
+            return carrier_uint(u & mask);
+        }
+
+        // Remove the exponent bits and extract significand bits together with the sign bit.
+        static constexpr carrier_uint remove_exponent_bits(carrier_uint u,
+                                                           unsigned int exponent_bits) noexcept {
+            return u ^ (carrier_uint(exponent_bits) << format::significand_bits);
+        }
+
+        // Shift the obtained signed significand bits to the left by 1 to remove the sign bit.
+        static constexpr carrier_uint remove_sign_bit_and_shift(carrier_uint u) noexcept {
+            return carrier_uint(carrier_uint(u) << 1);
+        }
+
+        // The actual value of exponent is obtained by adding this value to the extracted exponent
+        // bits.
+        static constexpr int exponent_bias =
+            1 - (1 << (carrier_bits - format::significand_bits - 2));
+
+        // Obtain the actual value of the binary exponent from the extracted exponent bits.
+        static constexpr int binary_exponent(unsigned int exponent_bits) noexcept {
+            if (exponent_bits == 0) {
+                return format::min_exponent;
+            }
+            else {
+                return int(exponent_bits) + format::exponent_bias;
+            }
+        }
+
+        // Obtain the actual value of the binary exponent from the extracted significand bits and
+        // exponent bits.
+        static constexpr carrier_uint binary_significand(carrier_uint significand_bits,
+                                                         unsigned int exponent_bits) noexcept {
+            if (exponent_bits == 0) {
+                return significand_bits;
+            }
+            else {
+                return significand_bits | (carrier_uint(1) << format::significand_bits);
+            }
+        }
+
+
+        /* Various boolean observer functions */
+
+        static constexpr bool is_nonzero(carrier_uint u) noexcept { return (u << 1) != 0; }
+        static constexpr bool is_positive(carrier_uint u) noexcept {
+            constexpr auto sign_bit = carrier_uint(1)
+                                      << (format::significand_bits + format::exponent_bits);
+            return u < sign_bit;
+        }
+        static constexpr bool is_negative(carrier_uint u) noexcept { return !is_positive(u); }
+        static constexpr bool is_finite(unsigned int exponent_bits) noexcept {
+            constexpr unsigned int exponent_bits_all_set = (1u << format::exponent_bits) - 1;
+            return exponent_bits != exponent_bits_all_set;
+        }
+        static constexpr bool has_all_zero_significand_bits(carrier_uint u) noexcept {
+            return (u << 1) == 0;
+        }
+        static constexpr bool has_even_significand_bits(carrier_uint u) noexcept {
+            return u % 2 == 0;
+        }
+    };
+
+    // Convenient wrappers for floating-point traits classes.
+    // In order to reduce the argument passing overhead, these classes should be as simple as
+    // possible (e.g., no inheritance, no private non-static data member, etc.; this is an
+    // unfortunate fact about common ABI convention).
+
+    template <class T, class Traits = default_float_traits<T>>
+    struct float_bits;
+
+    template <class T, class Traits = default_float_traits<T>>
+    struct signed_significand_bits;
+
+    template <class T, class Traits>
+    struct float_bits {
+        using type = T;
+        using traits_type = Traits;
+        using carrier_uint = typename traits_type::carrier_uint;
+
+        carrier_uint u;
+
+        float_bits() = default;
+        constexpr explicit float_bits(carrier_uint bit_pattern) noexcept : u{bit_pattern} {}
+        constexpr explicit float_bits(T float_value) noexcept
+            : u{traits_type::float_to_carrier(float_value)} {}
+
+        constexpr T to_float() const noexcept { return traits_type::carrier_to_float(u); }
+
+        // Extract exponent bits from a bit pattern.
+        // The result must be aligned to the LSB so that there is no additional zero paddings
+        // on the right. This function does not do bias adjustment.
+        constexpr unsigned int extract_exponent_bits() const noexcept {
+            return traits_type::extract_exponent_bits(u);
+        }
+
+        // Extract significand bits from a bit pattern.
+        // The result must be aligned to the LSB so that there is no additional zero paddings
+        // on the right. The result does not contain the implicit bit.
+        constexpr carrier_uint extract_significand_bits() const noexcept {
+            return traits_type::extract_significand_bits(u);
+        }
+
+        // Remove the exponent bits and extract significand bits together with the sign bit.
+        constexpr auto remove_exponent_bits(unsigned int exponent_bits) const noexcept {
+            return signed_significand_bits<type, traits_type>(
+                traits_type::remove_exponent_bits(u, exponent_bits));
+        }
+
+        // Obtain the actual value of the binary exponent from the extracted exponent bits.
+        static constexpr int binary_exponent(unsigned int exponent_bits) noexcept {
+            return traits_type::binary_exponent(exponent_bits);
+        }
+        constexpr int binary_exponent() const noexcept {
+            return binary_exponent(extract_exponent_bits());
+        }
+
+        // Obtain the actual value of the binary exponent from the extracted significand bits and
+        // exponent bits.
+        static constexpr carrier_uint binary_significand(carrier_uint significand_bits,
+                                                         unsigned int exponent_bits) noexcept {
+            return traits_type::binary_significand(significand_bits, exponent_bits);
+        }
+        constexpr carrier_uint binary_significand() const noexcept {
+            return binary_significand(extract_significand_bits(), extract_exponent_bits());
+        }
+
+        constexpr bool is_nonzero() const noexcept { return traits_type::is_nonzero(u); }
+        constexpr bool is_positive() const noexcept { return traits_type::is_positive(u); }
+        constexpr bool is_negative() const noexcept { return traits_type::is_negative(u); }
+        constexpr bool is_finite(unsigned int exponent_bits) const noexcept {
+            return traits_type::is_finite(exponent_bits);
+        }
+        constexpr bool is_finite() const noexcept {
+            return traits_type::is_finite(extract_exponent_bits());
+        }
+        constexpr bool has_even_significand_bits() const noexcept {
+            return traits_type::has_even_significand_bits(u);
+        }
+    };
+
+    template <class T, class Traits>
+    struct signed_significand_bits {
+        using type = T;
+        using traits_type = Traits;
+        using carrier_uint = typename traits_type::carrier_uint;
+
+        carrier_uint u;
+
+        signed_significand_bits() = default;
+        constexpr explicit signed_significand_bits(carrier_uint bit_pattern) noexcept
+            : u{bit_pattern} {}
+
+        // Shift the obtained signed significand bits to the left by 1 to remove the sign bit.
+        constexpr carrier_uint remove_sign_bit_and_shift() const noexcept {
+            return traits_type::remove_sign_bit_and_shift(u);
+        }
+
+        constexpr bool is_positive() const noexcept { return traits_type::is_positive(u); }
+        constexpr bool is_negative() const noexcept { return traits_type::is_negative(u); }
+        constexpr bool has_all_zero_significand_bits() const noexcept {
+            return traits_type::has_all_zero_significand_bits(u);
+        }
+        constexpr bool has_even_significand_bits() const noexcept {
+            return traits_type::has_even_significand_bits(u);
+        }
+    };
+
+    namespace detail {
+        ////////////////////////////////////////////////////////////////////////////////////////
+        // Bit operation intrinsics.
+        ////////////////////////////////////////////////////////////////////////////////////////
+
+        namespace bits {
+            // Most compilers should be able to optimize this into the ROR instruction.
+            inline std::uint32_t rotr(std::uint32_t n, std::uint32_t r) noexcept {
+                r &= 31;
+                return (n >> r) | (n << (32 - r));
+            }
+            inline std::uint64_t rotr(std::uint64_t n, std::uint32_t r) noexcept {
+                r &= 63;
+                return (n >> r) | (n << (64 - r));
+            }
+        }
+
+        ////////////////////////////////////////////////////////////////////////////////////////
+        // Utilities for wide unsigned integer arithmetic.
+        ////////////////////////////////////////////////////////////////////////////////////////
+
+        namespace wuint {
+            // Compilers might support built-in 128-bit integer types. However, it seems that
+            // emulating them with a pair of 64-bit integers actually produces a better code,
+            // so we avoid using those built-ins. That said, they are still useful for
+            // implementing 64-bit x 64-bit -> 128-bit multiplication.
+
+            // clang-format off
+#if defined(__SIZEOF_INT128__)
+		// To silence "error: ISO C++ does not support '__int128' for 'type name'
+		// [-Wpedantic]"
+#if defined(__GNUC__)
+			__extension__
+#endif
+				using builtin_uint128_t = unsigned __int128;
+#endif
+            // clang-format on
+
+            struct uint128 {
+                uint128() = default;
+
+                std::uint64_t high_;
+                std::uint64_t low_;
+
+                constexpr uint128(std::uint64_t high, std::uint64_t low) noexcept
+                    : high_{high}, low_{low} {}
+
+                constexpr std::uint64_t high() const noexcept { return high_; }
+                constexpr std::uint64_t low() const noexcept { return low_; }
+
+                uint128& operator+=(std::uint64_t n) & noexcept {
+#if JKJ_DRAGONBOX_HAS_BUILTIN(__builtin_addcll)
+                    unsigned long long carry;
+                    low_ = __builtin_addcll(low_, n, 0, &carry);
+                    high_ = __builtin_addcll(high_, 0, carry, &carry);
+#elif JKJ_DRAGONBOX_HAS_BUILTIN(__builtin_ia32_addcarryx_u64)
+                    unsigned long long result;
+                    auto carry = __builtin_ia32_addcarryx_u64(0, low_, n, &result);
+                    low_ = result;
+                    __builtin_ia32_addcarryx_u64(carry, high_, 0, &result);
+                    high_ = result;
+#elif defined(_MSC_VER) && defined(_M_X64)
+                    auto carry = _addcarry_u64(0, low_, n, &low_);
+                    _addcarry_u64(carry, high_, 0, &high_);
+#else
+                    auto sum = low_ + n;
+                    high_ += (sum < low_ ? 1 : 0);
+                    low_ = sum;
+#endif
+                    return *this;
+                }
+            };
+
+            static inline std::uint64_t umul64(std::uint32_t x, std::uint32_t y) noexcept {
+#if defined(_MSC_VER) && defined(_M_IX86)
+                return __emulu(x, y);
+#else
+                return x * std::uint64_t(y);
+#endif
+            }
+
+            // Get 128-bit result of multiplication of two 64-bit unsigned integers.
+            JKJ_SAFEBUFFERS inline uint128 umul128(std::uint64_t x, std::uint64_t y) noexcept {
+#if defined(__SIZEOF_INT128__)
+                auto result = builtin_uint128_t(x) * builtin_uint128_t(y);
+                return {std::uint64_t(result >> 64), std::uint64_t(result)};
+#elif defined(_MSC_VER) && defined(_M_X64)
+                uint128 result;
+                result.low_ = _umul128(x, y, &result.high_);
+                return result;
+#else
+                auto a = std::uint32_t(x >> 32);
+                auto b = std::uint32_t(x);
+                auto c = std::uint32_t(y >> 32);
+                auto d = std::uint32_t(y);
+
+                auto ac = umul64(a, c);
+                auto bc = umul64(b, c);
+                auto ad = umul64(a, d);
+                auto bd = umul64(b, d);
+
+                auto intermediate = (bd >> 32) + std::uint32_t(ad) + std::uint32_t(bc);
+
+                return {ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32),
+                        (intermediate << 32) + std::uint32_t(bd)};
+#endif
+            }
+
+            JKJ_SAFEBUFFERS inline std::uint64_t umul128_upper64(std::uint64_t x,
+                                                                 std::uint64_t y) noexcept {
+#if defined(__SIZEOF_INT128__)
+                auto result = builtin_uint128_t(x) * builtin_uint128_t(y);
+                return std::uint64_t(result >> 64);
+#elif defined(_MSC_VER) && defined(_M_X64)
+                return __umulh(x, y);
+#else
+                auto a = std::uint32_t(x >> 32);
+                auto b = std::uint32_t(x);
+                auto c = std::uint32_t(y >> 32);
+                auto d = std::uint32_t(y);
+
+                auto ac = umul64(a, c);
+                auto bc = umul64(b, c);
+                auto ad = umul64(a, d);
+                auto bd = umul64(b, d);
+
+                auto intermediate = (bd >> 32) + std::uint32_t(ad) + std::uint32_t(bc);
+
+                return ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32);
+#endif
+            }
+
+            // Get upper 128-bits of multiplication of a 64-bit unsigned integer and a 128-bit
+            // unsigned integer.
+            JKJ_SAFEBUFFERS inline uint128 umul192_upper128(std::uint64_t x, uint128 y) noexcept {
+                auto r = umul128(x, y.high());
+                r += umul128_upper64(x, y.low());
+                return r;
+            }
+
+            // Get upper 64-bits of multiplication of a 32-bit unsigned integer and a 64-bit
+            // unsigned integer.
+            inline std::uint64_t umul96_upper64(std::uint32_t x, std::uint64_t y) noexcept {
+#if defined(__SIZEOF_INT128__) || (defined(_MSC_VER) && defined(_M_X64))
+                return umul128_upper64(std::uint64_t(x) << 32, y);
+#else
+                auto yh = std::uint32_t(y >> 32);
+                auto yl = std::uint32_t(y);
+
+                auto xyh = umul64(x, yh);
+                auto xyl = umul64(x, yl);
+
+                return xyh + (xyl >> 32);
+#endif
+            }
+
+            // Get lower 128-bits of multiplication of a 64-bit unsigned integer and a 128-bit
+            // unsigned integer.
+            JKJ_SAFEBUFFERS inline uint128 umul192_lower128(std::uint64_t x, uint128 y) noexcept {
+                auto high = x * y.high();
+                auto high_low = umul128(x, y.low());
+                return {high + high_low.high(), high_low.low()};
+            }
+
+            // Get lower 64-bits of multiplication of a 32-bit unsigned integer and a 64-bit
+            // unsigned integer.
+            inline std::uint64_t umul96_lower64(std::uint32_t x, std::uint64_t y) noexcept {
+                return x * y;
+            }
+        }
+
+        ////////////////////////////////////////////////////////////////////////////////////////
+        // Some simple utilities for constexpr computation.
+        ////////////////////////////////////////////////////////////////////////////////////////
+
+        template <int k, class Int>
+        constexpr Int compute_power(Int a) noexcept {
+            static_assert(k >= 0);
+            Int p = 1;
+            for (int i = 0; i < k; ++i) {
+                p *= a;
+            }
+            return p;
+        }
+
+        template <int a, class UInt>
+        constexpr int count_factors(UInt n) noexcept {
+            static_assert(a > 1);
+            int c = 0;
+            while (n % a == 0) {
+                n /= a;
+                ++c;
+            }
+            return c;
+        }
+
+        ////////////////////////////////////////////////////////////////////////////////////////
+        // Utilities for fast/constexpr log computation.
+        ////////////////////////////////////////////////////////////////////////////////////////
+
+        namespace log {
+            static_assert((-1 >> 1) == -1, "right-shift for signed integers must be arithmetic");
+
+            // Compute floor(e * c - s).
+            enum class multiply : std::uint32_t {};
+            enum class subtract : std::uint32_t {};
+            enum class shift : std::size_t {};
+            enum class min_exponent : std::int32_t {};
+            enum class max_exponent : std::int32_t {};
+
+            template <multiply m, subtract f, shift k, min_exponent e_min, max_exponent e_max>
+            constexpr int compute(int e) noexcept {
+                assert(std::int32_t(e_min) <= e && e <= std::int32_t(e_max));
+                return int((std::int32_t(e) * std::int32_t(m) - std::int32_t(f)) >> std::size_t(k));
+            }
+
+            // For constexpr computation.
+            // Returns -1 when n = 0.
+            template <class UInt>
+            constexpr int floor_log2(UInt n) noexcept {
+                int count = -1;
+                while (n != 0) {
+                    ++count;
+                    n >>= 1;
+                }
+                return count;
+            }
+
+            static constexpr int floor_log10_pow2_min_exponent = -2620;
+            static constexpr int floor_log10_pow2_max_exponent = 2620;
+            constexpr int floor_log10_pow2(int e) noexcept {
+                using namespace log;
+                return compute<multiply(315653), subtract(0), shift(20),
+                               min_exponent(floor_log10_pow2_min_exponent),
+                               max_exponent(floor_log10_pow2_max_exponent)>(e);
+            }
+
+            static constexpr int floor_log2_pow10_min_exponent = -1233;
+            static constexpr int floor_log2_pow10_max_exponent = 1233;
+            constexpr int floor_log2_pow10(int e) noexcept {
+                using namespace log;
+                return compute<multiply(1741647), subtract(0), shift(19),
+                               min_exponent(floor_log2_pow10_min_exponent),
+                               max_exponent(floor_log2_pow10_max_exponent)>(e);
+            }
+
+            static constexpr int floor_log10_pow2_minus_log10_4_over_3_min_exponent = -2985;
+            static constexpr int floor_log10_pow2_minus_log10_4_over_3_max_exponent = 2936;
+            constexpr int floor_log10_pow2_minus_log10_4_over_3(int e) noexcept {
+                using namespace log;
+                return compute<multiply(631305), subtract(261663), shift(21),
+                               min_exponent(floor_log10_pow2_minus_log10_4_over_3_min_exponent),
+                               max_exponent(floor_log10_pow2_minus_log10_4_over_3_max_exponent)>(e);
+            }
+
+            static constexpr int floor_log5_pow2_min_exponent = -1831;
+            static constexpr int floor_log5_pow2_max_exponent = 1831;
+            constexpr int floor_log5_pow2(int e) noexcept {
+                using namespace log;
+                return compute<multiply(225799), subtract(0), shift(19),
+                               min_exponent(floor_log5_pow2_min_exponent),
+                               max_exponent(floor_log5_pow2_max_exponent)>(e);
+            }
+
+            static constexpr int floor_log5_pow2_minus_log5_3_min_exponent = -3543;
+            static constexpr int floor_log5_pow2_minus_log5_3_max_exponent = 2427;
+            constexpr int floor_log5_pow2_minus_log5_3(int e) noexcept {
+                using namespace log;
+                return compute<multiply(451597), subtract(715764), shift(20),
+                               min_exponent(floor_log5_pow2_minus_log5_3_min_exponent),
+                               max_exponent(floor_log5_pow2_minus_log5_3_max_exponent)>(e);
+            }
+        }
+
+        ////////////////////////////////////////////////////////////////////////////////////////
+        // Utilities for fast divisibility tests.
+        ////////////////////////////////////////////////////////////////////////////////////////
+
+        namespace div {
+            // Replace n by floor(n / 10^N).
+            // Returns true if and only if n is divisible by 10^N.
+            // Precondition: n <= 10^(N+1)
+            // !!It takes an in-out parameter!!
+            template <int N>
+            struct divide_by_pow10_info;
+
+            template <>
+            struct divide_by_pow10_info<1> {
+                static constexpr std::uint32_t magic_number = 6554;
+                static constexpr int shift_amount = 16;
+            };
+
+            template <>
+            struct divide_by_pow10_info<2> {
+                static constexpr std::uint32_t magic_number = 656;
+                static constexpr int shift_amount = 16;
+            };
+
+            template <int N>
+            constexpr bool check_divisibility_and_divide_by_pow10(std::uint32_t& n) noexcept {
+                // Make sure the computation for max_n does not overflow.
+                static_assert(N + 1 <= log::floor_log10_pow2(31));
+                assert(n <= compute_power<N + 1>(std::uint32_t(10)));
+
+                using info = divide_by_pow10_info<N>;
+                n *= info::magic_number;
+
+                constexpr auto mask = std::uint32_t(std::uint32_t(1) << info::shift_amount) - 1;
+                bool result = ((n & mask) < info::magic_number);
+
+                n >>= info::shift_amount;
+                return result;
+            }
+
+            // Compute floor(n / 10^N) for small n and N.
+            // Precondition: n <= 10^(N+1)
+            template <int N>
+            constexpr std::uint32_t small_division_by_pow10(std::uint32_t n) noexcept {
+                // Make sure the computation for max_n does not overflow.
+                static_assert(N + 1 <= log::floor_log10_pow2(31));
+                assert(n <= compute_power<N + 1>(std::uint32_t(10)));
+
+                return (n * divide_by_pow10_info<N>::magic_number) >>
+                       divide_by_pow10_info<N>::shift_amount;
+            }
+
+            // Compute floor(n / 10^N) for small N.
+            // Precondition: n <= n_max
+            template <int N, class UInt, UInt n_max>
+            constexpr UInt divide_by_pow10(UInt n) noexcept {
+                static_assert(N >= 0);
+
+                // Specialize for 32-bit division by 100.
+                // Compiler is supposed to generate the identical code for just writing
+                // "n / 100", but for some reason MSVC generates an inefficient code
+                // (mul + mov for no apparent reason, instead of single imul),
+                // so we does this manually.
+                if constexpr (std::is_same_v<UInt, std::uint32_t> && N == 2) {
+                    return std::uint32_t(wuint::umul64(n, std::uint32_t(1374389535)) >> 37);
+                }
+                // Specialize for 64-bit division by 1000.
+                // Ensure that the correctness condition is met.
+                if constexpr (std::is_same_v<UInt, std::uint64_t> && N == 3 &&
+                              n_max <= std::uint64_t(15534100272597517998ull)) {
+                    return wuint::umul128_upper64(n, std::uint64_t(2361183241434822607ull)) >> 7;
+                }
+                else {
+                    constexpr auto divisor = compute_power<N>(UInt(10));
+                    return n / divisor;
+                }
+            }
+        }
+    }
+
+    ////////////////////////////////////////////////////////////////////////////////////////
+    // Return types for the main interface function.
+    ////////////////////////////////////////////////////////////////////////////////////////
+
+    template <class UInt, bool is_signed, bool trailing_zero_flag>
+    struct decimal_fp;
+
+    template <class UInt>
+    struct decimal_fp<UInt, false, false> {
+        using carrier_uint = UInt;
+
+        carrier_uint significand;
+        int exponent;
+    };
+
+    template <class UInt>
+    struct decimal_fp<UInt, true, false> {
+        using carrier_uint = UInt;
+
+        carrier_uint significand;
+        int exponent;
+        bool is_negative;
+    };
+
+    template <class UInt>
+    struct decimal_fp<UInt, false, true> {
+        using carrier_uint = UInt;
+
+        carrier_uint significand;
+        int exponent;
+        bool may_have_trailing_zeros;
+    };
+
+    template <class UInt>
+    struct decimal_fp<UInt, true, true> {
+        using carrier_uint = UInt;
+
+        carrier_uint significand;
+        int exponent;
+        bool is_negative;
+        bool may_have_trailing_zeros;
+    };
+
+    template <class UInt>
+    using unsigned_decimal_fp = decimal_fp<UInt, false, false>;
+
+    template <class UInt>
+    using signed_decimal_fp = decimal_fp<UInt, true, false>;
+
+
+    ////////////////////////////////////////////////////////////////////////////////////////
+    // Computed cache entries.
+    ////////////////////////////////////////////////////////////////////////////////////////
+
+    namespace detail {
+        template <class FloatFormat>
+        struct cache_holder;
+
+        template <>
+        struct cache_holder<ieee754_binary32> {
+            using cache_entry_type = std::uint64_t;
+            static constexpr int cache_bits = 64;
+            static constexpr int min_k = -31;
+            static constexpr int max_k = 46;
+            static constexpr cache_entry_type cache[] = {
+                0x81ceb32c4b43fcf5, 0xa2425ff75e14fc32, 0xcad2f7f5359a3b3f, 0xfd87b5f28300ca0e,
+                0x9e74d1b791e07e49, 0xc612062576589ddb, 0xf79687aed3eec552, 0x9abe14cd44753b53,
+                0xc16d9a0095928a28, 0xf1c90080baf72cb2, 0x971da05074da7bef, 0xbce5086492111aeb,
+                0xec1e4a7db69561a6, 0x9392ee8e921d5d08, 0xb877aa3236a4b44a, 0xe69594bec44de15c,
+                0x901d7cf73ab0acda, 0xb424dc35095cd810, 0xe12e13424bb40e14, 0x8cbccc096f5088cc,
+                0xafebff0bcb24aaff, 0xdbe6fecebdedd5bf, 0x89705f4136b4a598, 0xabcc77118461cefd,
+                0xd6bf94d5e57a42bd, 0x8637bd05af6c69b6, 0xa7c5ac471b478424, 0xd1b71758e219652c,
+                0x83126e978d4fdf3c, 0xa3d70a3d70a3d70b, 0xcccccccccccccccd, 0x8000000000000000,
+                0xa000000000000000, 0xc800000000000000, 0xfa00000000000000, 0x9c40000000000000,
+                0xc350000000000000, 0xf424000000000000, 0x9896800000000000, 0xbebc200000000000,
+                0xee6b280000000000, 0x9502f90000000000, 0xba43b74000000000, 0xe8d4a51000000000,
+                0x9184e72a00000000, 0xb5e620f480000000, 0xe35fa931a0000000, 0x8e1bc9bf04000000,
+                0xb1a2bc2ec5000000, 0xde0b6b3a76400000, 0x8ac7230489e80000, 0xad78ebc5ac620000,
+                0xd8d726b7177a8000, 0x878678326eac9000, 0xa968163f0a57b400, 0xd3c21bcecceda100,
+                0x84595161401484a0, 0xa56fa5b99019a5c8, 0xcecb8f27f4200f3a, 0x813f3978f8940985,
+                0xa18f07d736b90be6, 0xc9f2c9cd04674edf, 0xfc6f7c4045812297, 0x9dc5ada82b70b59e,
+                0xc5371912364ce306, 0xf684df56c3e01bc7, 0x9a130b963a6c115d, 0xc097ce7bc90715b4,
+                0xf0bdc21abb48db21, 0x96769950b50d88f5, 0xbc143fa4e250eb32, 0xeb194f8e1ae525fe,
+                0x92efd1b8d0cf37bf, 0xb7abc627050305ae, 0xe596b7b0c643c71a, 0x8f7e32ce7bea5c70,
+                0xb35dbf821ae4f38c, 0xe0352f62a19e306f};
+        };
+
+        template <>
+        struct cache_holder<ieee754_binary64> {
+            using cache_entry_type = wuint::uint128;
+            static constexpr int cache_bits = 128;
+            static constexpr int min_k = -292;
+            static constexpr int max_k = 326;
+            static constexpr cache_entry_type cache[] = {
+                {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b}, {0x9faacf3df73609b1, 0x77b191618c54e9ad},
+                {0xc795830d75038c1d, 0xd59df5b9ef6a2418}, {0xf97ae3d0d2446f25, 0x4b0573286b44ad1e},
+                {0x9becce62836ac577, 0x4ee367f9430aec33}, {0xc2e801fb244576d5, 0x229c41f793cda740},
+                {0xf3a20279ed56d48a, 0x6b43527578c11110}, {0x9845418c345644d6, 0x830a13896b78aaaa},
+                {0xbe5691ef416bd60c, 0x23cc986bc656d554}, {0xedec366b11c6cb8f, 0x2cbfbe86b7ec8aa9},
+                {0x94b3a202eb1c3f39, 0x7bf7d71432f3d6aa}, {0xb9e08a83a5e34f07, 0xdaf5ccd93fb0cc54},
+                {0xe858ad248f5c22c9, 0xd1b3400f8f9cff69}, {0x91376c36d99995be, 0x23100809b9c21fa2},
+                {0xb58547448ffffb2d, 0xabd40a0c2832a78b}, {0xe2e69915b3fff9f9, 0x16c90c8f323f516d},
+                {0x8dd01fad907ffc3b, 0xae3da7d97f6792e4}, {0xb1442798f49ffb4a, 0x99cd11cfdf41779d},
+                {0xdd95317f31c7fa1d, 0x40405643d711d584}, {0x8a7d3eef7f1cfc52, 0x482835ea666b2573},
+                {0xad1c8eab5ee43b66, 0xda3243650005eed0}, {0xd863b256369d4a40, 0x90bed43e40076a83},
+                {0x873e4f75e2224e68, 0x5a7744a6e804a292}, {0xa90de3535aaae202, 0x711515d0a205cb37},
+                {0xd3515c2831559a83, 0x0d5a5b44ca873e04}, {0x8412d9991ed58091, 0xe858790afe9486c3},
+                {0xa5178fff668ae0b6, 0x626e974dbe39a873}, {0xce5d73ff402d98e3, 0xfb0a3d212dc81290},
+                {0x80fa687f881c7f8e, 0x7ce66634bc9d0b9a}, {0xa139029f6a239f72, 0x1c1fffc1ebc44e81},
+                {0xc987434744ac874e, 0xa327ffb266b56221}, {0xfbe9141915d7a922, 0x4bf1ff9f0062baa9},
+                {0x9d71ac8fada6c9b5, 0x6f773fc3603db4aa}, {0xc4ce17b399107c22, 0xcb550fb4384d21d4},
+                {0xf6019da07f549b2b, 0x7e2a53a146606a49}, {0x99c102844f94e0fb, 0x2eda7444cbfc426e},
+                {0xc0314325637a1939, 0xfa911155fefb5309}, {0xf03d93eebc589f88, 0x793555ab7eba27cb},
+                {0x96267c7535b763b5, 0x4bc1558b2f3458df}, {0xbbb01b9283253ca2, 0x9eb1aaedfb016f17},
+                {0xea9c227723ee8bcb, 0x465e15a979c1cadd}, {0x92a1958a7675175f, 0x0bfacd89ec191eca},
+                {0xb749faed14125d36, 0xcef980ec671f667c}, {0xe51c79a85916f484, 0x82b7e12780e7401b},
+                {0x8f31cc0937ae58d2, 0xd1b2ecb8b0908811}, {0xb2fe3f0b8599ef07, 0x861fa7e6dcb4aa16},
+                {0xdfbdcece67006ac9, 0x67a791e093e1d49b}, {0x8bd6a141006042bd, 0xe0c8bb2c5c6d24e1},
+                {0xaecc49914078536d, 0x58fae9f773886e19}, {0xda7f5bf590966848, 0xaf39a475506a899f},
+                {0x888f99797a5e012d, 0x6d8406c952429604}, {0xaab37fd7d8f58178, 0xc8e5087ba6d33b84},
+                {0xd5605fcdcf32e1d6, 0xfb1e4a9a90880a65}, {0x855c3be0a17fcd26, 0x5cf2eea09a550680},
+                {0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481f}, {0xd0601d8efc57b08b, 0xf13b94daf124da27},
+                {0x823c12795db6ce57, 0x76c53d08d6b70859}, {0xa2cb1717b52481ed, 0x54768c4b0c64ca6f},
+                {0xcb7ddcdda26da268, 0xa9942f5dcf7dfd0a}, {0xfe5d54150b090b02, 0xd3f93b35435d7c4d},
+                {0x9efa548d26e5a6e1, 0xc47bc5014a1a6db0}, {0xc6b8e9b0709f109a, 0x359ab6419ca1091c},
+                {0xf867241c8cc6d4c0, 0xc30163d203c94b63}, {0x9b407691d7fc44f8, 0x79e0de63425dcf1e},
+                {0xc21094364dfb5636, 0x985915fc12f542e5}, {0xf294b943e17a2bc4, 0x3e6f5b7b17b2939e},
+                {0x979cf3ca6cec5b5a, 0xa705992ceecf9c43}, {0xbd8430bd08277231, 0x50c6ff782a838354},
+                {0xece53cec4a314ebd, 0xa4f8bf5635246429}, {0x940f4613ae5ed136, 0x871b7795e136be9a},
+                {0xb913179899f68584, 0x28e2557b59846e40}, {0xe757dd7ec07426e5, 0x331aeada2fe589d0},
+                {0x9096ea6f3848984f, 0x3ff0d2c85def7622}, {0xb4bca50b065abe63, 0x0fed077a756b53aa},
+                {0xe1ebce4dc7f16dfb, 0xd3e8495912c62895}, {0x8d3360f09cf6e4bd, 0x64712dd7abbbd95d},
+                {0xb080392cc4349dec, 0xbd8d794d96aacfb4}, {0xdca04777f541c567, 0xecf0d7a0fc5583a1},
+                {0x89e42caaf9491b60, 0xf41686c49db57245}, {0xac5d37d5b79b6239, 0x311c2875c522ced6},
+                {0xd77485cb25823ac7, 0x7d633293366b828c}, {0x86a8d39ef77164bc, 0xae5dff9c02033198},
+                {0xa8530886b54dbdeb, 0xd9f57f830283fdfd}, {0xd267caa862a12d66, 0xd072df63c324fd7c},
+                {0x8380dea93da4bc60, 0x4247cb9e59f71e6e}, {0xa46116538d0deb78, 0x52d9be85f074e609},
+                {0xcd795be870516656, 0x67902e276c921f8c}, {0x806bd9714632dff6, 0x00ba1cd8a3db53b7},
+                {0xa086cfcd97bf97f3, 0x80e8a40eccd228a5}, {0xc8a883c0fdaf7df0, 0x6122cd128006b2ce},
+                {0xfad2a4b13d1b5d6c, 0x796b805720085f82}, {0x9cc3a6eec6311a63, 0xcbe3303674053bb1},
+                {0xc3f490aa77bd60fc, 0xbedbfc4411068a9d}, {0xf4f1b4d515acb93b, 0xee92fb5515482d45},
+                {0x991711052d8bf3c5, 0x751bdd152d4d1c4b}, {0xbf5cd54678eef0b6, 0xd262d45a78a0635e},
+                {0xef340a98172aace4, 0x86fb897116c87c35}, {0x9580869f0e7aac0e, 0xd45d35e6ae3d4da1},
+                {0xbae0a846d2195712, 0x8974836059cca10a}, {0xe998d258869facd7, 0x2bd1a438703fc94c},
+                {0x91ff83775423cc06, 0x7b6306a34627ddd0}, {0xb67f6455292cbf08, 0x1a3bc84c17b1d543},
+                {0xe41f3d6a7377eeca, 0x20caba5f1d9e4a94}, {0x8e938662882af53e, 0x547eb47b7282ee9d},
+                {0xb23867fb2a35b28d, 0xe99e619a4f23aa44}, {0xdec681f9f4c31f31, 0x6405fa00e2ec94d5},
+                {0x8b3c113c38f9f37e, 0xde83bc408dd3dd05}, {0xae0b158b4738705e, 0x9624ab50b148d446},
+                {0xd98ddaee19068c76, 0x3badd624dd9b0958}, {0x87f8a8d4cfa417c9, 0xe54ca5d70a80e5d7},
+                {0xa9f6d30a038d1dbc, 0x5e9fcf4ccd211f4d}, {0xd47487cc8470652b, 0x7647c32000696720},
+                {0x84c8d4dfd2c63f3b, 0x29ecd9f40041e074}, {0xa5fb0a17c777cf09, 0xf468107100525891},
+                {0xcf79cc9db955c2cc, 0x7182148d4066eeb5}, {0x81ac1fe293d599bf, 0xc6f14cd848405531},
+                {0xa21727db38cb002f, 0xb8ada00e5a506a7d}, {0xca9cf1d206fdc03b, 0xa6d90811f0e4851d},
+                {0xfd442e4688bd304a, 0x908f4a166d1da664}, {0x9e4a9cec15763e2e, 0x9a598e4e043287ff},
+                {0xc5dd44271ad3cdba, 0x40eff1e1853f29fe}, {0xf7549530e188c128, 0xd12bee59e68ef47d},
+                {0x9a94dd3e8cf578b9, 0x82bb74f8301958cf}, {0xc13a148e3032d6e7, 0xe36a52363c1faf02},
+                {0xf18899b1bc3f8ca1, 0xdc44e6c3cb279ac2}, {0x96f5600f15a7b7e5, 0x29ab103a5ef8c0ba},
+                {0xbcb2b812db11a5de, 0x7415d448f6b6f0e8}, {0xebdf661791d60f56, 0x111b495b3464ad22},
+                {0x936b9fcebb25c995, 0xcab10dd900beec35}, {0xb84687c269ef3bfb, 0x3d5d514f40eea743},
+                {0xe65829b3046b0afa, 0x0cb4a5a3112a5113}, {0x8ff71a0fe2c2e6dc, 0x47f0e785eaba72ac},
+                {0xb3f4e093db73a093, 0x59ed216765690f57}, {0xe0f218b8d25088b8, 0x306869c13ec3532d},
+                {0x8c974f7383725573, 0x1e414218c73a13fc}, {0xafbd2350644eeacf, 0xe5d1929ef90898fb},
+                {0xdbac6c247d62a583, 0xdf45f746b74abf3a}, {0x894bc396ce5da772, 0x6b8bba8c328eb784},
+                {0xab9eb47c81f5114f, 0x066ea92f3f326565}, {0xd686619ba27255a2, 0xc80a537b0efefebe},
+                {0x8613fd0145877585, 0xbd06742ce95f5f37}, {0xa798fc4196e952e7, 0x2c48113823b73705},
+                {0xd17f3b51fca3a7a0, 0xf75a15862ca504c6}, {0x82ef85133de648c4, 0x9a984d73dbe722fc},
+                {0xa3ab66580d5fdaf5, 0xc13e60d0d2e0ebbb}, {0xcc963fee10b7d1b3, 0x318df905079926a9},
+                {0xffbbcfe994e5c61f, 0xfdf17746497f7053}, {0x9fd561f1fd0f9bd3, 0xfeb6ea8bedefa634},
+                {0xc7caba6e7c5382c8, 0xfe64a52ee96b8fc1}, {0xf9bd690a1b68637b, 0x3dfdce7aa3c673b1},
+                {0x9c1661a651213e2d, 0x06bea10ca65c084f}, {0xc31bfa0fe5698db8, 0x486e494fcff30a63},
+                {0xf3e2f893dec3f126, 0x5a89dba3c3efccfb}, {0x986ddb5c6b3a76b7, 0xf89629465a75e01d},
+                {0xbe89523386091465, 0xf6bbb397f1135824}, {0xee2ba6c0678b597f, 0x746aa07ded582e2d},
+                {0x94db483840b717ef, 0xa8c2a44eb4571cdd}, {0xba121a4650e4ddeb, 0x92f34d62616ce414},
+                {0xe896a0d7e51e1566, 0x77b020baf9c81d18}, {0x915e2486ef32cd60, 0x0ace1474dc1d122f},
+                {0xb5b5ada8aaff80b8, 0x0d819992132456bb}, {0xe3231912d5bf60e6, 0x10e1fff697ed6c6a},
+                {0x8df5efabc5979c8f, 0xca8d3ffa1ef463c2}, {0xb1736b96b6fd83b3, 0xbd308ff8a6b17cb3},
+                {0xddd0467c64bce4a0, 0xac7cb3f6d05ddbdf}, {0x8aa22c0dbef60ee4, 0x6bcdf07a423aa96c},
+                {0xad4ab7112eb3929d, 0x86c16c98d2c953c7}, {0xd89d64d57a607744, 0xe871c7bf077ba8b8},
+                {0x87625f056c7c4a8b, 0x11471cd764ad4973}, {0xa93af6c6c79b5d2d, 0xd598e40d3dd89bd0},
+                {0xd389b47879823479, 0x4aff1d108d4ec2c4}, {0x843610cb4bf160cb, 0xcedf722a585139bb},
+                {0xa54394fe1eedb8fe, 0xc2974eb4ee658829}, {0xce947a3da6a9273e, 0x733d226229feea33},
+                {0x811ccc668829b887, 0x0806357d5a3f5260}, {0xa163ff802a3426a8, 0xca07c2dcb0cf26f8},
+                {0xc9bcff6034c13052, 0xfc89b393dd02f0b6}, {0xfc2c3f3841f17c67, 0xbbac2078d443ace3},
+                {0x9d9ba7832936edc0, 0xd54b944b84aa4c0e}, {0xc5029163f384a931, 0x0a9e795e65d4df12},
+                {0xf64335bcf065d37d, 0x4d4617b5ff4a16d6}, {0x99ea0196163fa42e, 0x504bced1bf8e4e46},
+                {0xc06481fb9bcf8d39, 0xe45ec2862f71e1d7}, {0xf07da27a82c37088, 0x5d767327bb4e5a4d},
+                {0x964e858c91ba2655, 0x3a6a07f8d510f870}, {0xbbe226efb628afea, 0x890489f70a55368c},
+                {0xeadab0aba3b2dbe5, 0x2b45ac74ccea842f}, {0x92c8ae6b464fc96f, 0x3b0b8bc90012929e},
+                {0xb77ada0617e3bbcb, 0x09ce6ebb40173745}, {0xe55990879ddcaabd, 0xcc420a6a101d0516},
+                {0x8f57fa54c2a9eab6, 0x9fa946824a12232e}, {0xb32df8e9f3546564, 0x47939822dc96abfa},
+                {0xdff9772470297ebd, 0x59787e2b93bc56f8}, {0x8bfbea76c619ef36, 0x57eb4edb3c55b65b},
+                {0xaefae51477a06b03, 0xede622920b6b23f2}, {0xdab99e59958885c4, 0xe95fab368e45ecee},
+                {0x88b402f7fd75539b, 0x11dbcb0218ebb415}, {0xaae103b5fcd2a881, 0xd652bdc29f26a11a},
+                {0xd59944a37c0752a2, 0x4be76d3346f04960}, {0x857fcae62d8493a5, 0x6f70a4400c562ddc},
+                {0xa6dfbd9fb8e5b88e, 0xcb4ccd500f6bb953}, {0xd097ad07a71f26b2, 0x7e2000a41346a7a8},
+                {0x825ecc24c873782f, 0x8ed400668c0c28c9}, {0xa2f67f2dfa90563b, 0x728900802f0f32fb},
+                {0xcbb41ef979346bca, 0x4f2b40a03ad2ffba}, {0xfea126b7d78186bc, 0xe2f610c84987bfa9},
+                {0x9f24b832e6b0f436, 0x0dd9ca7d2df4d7ca}, {0xc6ede63fa05d3143, 0x91503d1c79720dbc},
+                {0xf8a95fcf88747d94, 0x75a44c6397ce912b}, {0x9b69dbe1b548ce7c, 0xc986afbe3ee11abb},
+                {0xc24452da229b021b, 0xfbe85badce996169}, {0xf2d56790ab41c2a2, 0xfae27299423fb9c4},
+                {0x97c560ba6b0919a5, 0xdccd879fc967d41b}, {0xbdb6b8e905cb600f, 0x5400e987bbc1c921},
+                {0xed246723473e3813, 0x290123e9aab23b69}, {0x9436c0760c86e30b, 0xf9a0b6720aaf6522},
+                {0xb94470938fa89bce, 0xf808e40e8d5b3e6a}, {0xe7958cb87392c2c2, 0xb60b1d1230b20e05},
+                {0x90bd77f3483bb9b9, 0xb1c6f22b5e6f48c3}, {0xb4ecd5f01a4aa828, 0x1e38aeb6360b1af4},
+                {0xe2280b6c20dd5232, 0x25c6da63c38de1b1}, {0x8d590723948a535f, 0x579c487e5a38ad0f},
+                {0xb0af48ec79ace837, 0x2d835a9df0c6d852}, {0xdcdb1b2798182244, 0xf8e431456cf88e66},
+                {0x8a08f0f8bf0f156b, 0x1b8e9ecb641b5900}, {0xac8b2d36eed2dac5, 0xe272467e3d222f40},
+                {0xd7adf884aa879177, 0x5b0ed81dcc6abb10}, {0x86ccbb52ea94baea, 0x98e947129fc2b4ea},
+                {0xa87fea27a539e9a5, 0x3f2398d747b36225}, {0xd29fe4b18e88640e, 0x8eec7f0d19a03aae},
+                {0x83a3eeeef9153e89, 0x1953cf68300424ad}, {0xa48ceaaab75a8e2b, 0x5fa8c3423c052dd8},
+                {0xcdb02555653131b6, 0x3792f412cb06794e}, {0x808e17555f3ebf11, 0xe2bbd88bbee40bd1},
+                {0xa0b19d2ab70e6ed6, 0x5b6aceaeae9d0ec5}, {0xc8de047564d20a8b, 0xf245825a5a445276},
+                {0xfb158592be068d2e, 0xeed6e2f0f0d56713}, {0x9ced737bb6c4183d, 0x55464dd69685606c},
+                {0xc428d05aa4751e4c, 0xaa97e14c3c26b887}, {0xf53304714d9265df, 0xd53dd99f4b3066a9},
+                {0x993fe2c6d07b7fab, 0xe546a8038efe402a}, {0xbf8fdb78849a5f96, 0xde98520472bdd034},
+                {0xef73d256a5c0f77c, 0x963e66858f6d4441}, {0x95a8637627989aad, 0xdde7001379a44aa9},
+                {0xbb127c53b17ec159, 0x5560c018580d5d53}, {0xe9d71b689dde71af, 0xaab8f01e6e10b4a7},
+                {0x9226712162ab070d, 0xcab3961304ca70e9}, {0xb6b00d69bb55c8d1, 0x3d607b97c5fd0d23},
+                {0xe45c10c42a2b3b05, 0x8cb89a7db77c506b}, {0x8eb98a7a9a5b04e3, 0x77f3608e92adb243},
+                {0xb267ed1940f1c61c, 0x55f038b237591ed4}, {0xdf01e85f912e37a3, 0x6b6c46dec52f6689},
+                {0x8b61313bbabce2c6, 0x2323ac4b3b3da016}, {0xae397d8aa96c1b77, 0xabec975e0a0d081b},
+                {0xd9c7dced53c72255, 0x96e7bd358c904a22}, {0x881cea14545c7575, 0x7e50d64177da2e55},
+                {0xaa242499697392d2, 0xdde50bd1d5d0b9ea}, {0xd4ad2dbfc3d07787, 0x955e4ec64b44e865},
+                {0x84ec3c97da624ab4, 0xbd5af13bef0b113f}, {0xa6274bbdd0fadd61, 0xecb1ad8aeacdd58f},
+                {0xcfb11ead453994ba, 0x67de18eda5814af3}, {0x81ceb32c4b43fcf4, 0x80eacf948770ced8},
+                {0xa2425ff75e14fc31, 0xa1258379a94d028e}, {0xcad2f7f5359a3b3e, 0x096ee45813a04331},
+                {0xfd87b5f28300ca0d, 0x8bca9d6e188853fd}, {0x9e74d1b791e07e48, 0x775ea264cf55347e},
+                {0xc612062576589dda, 0x95364afe032a819e}, {0xf79687aed3eec551, 0x3a83ddbd83f52205},
+                {0x9abe14cd44753b52, 0xc4926a9672793543}, {0xc16d9a0095928a27, 0x75b7053c0f178294},
+                {0xf1c90080baf72cb1, 0x5324c68b12dd6339}, {0x971da05074da7bee, 0xd3f6fc16ebca5e04},
+                {0xbce5086492111aea, 0x88f4bb1ca6bcf585}, {0xec1e4a7db69561a5, 0x2b31e9e3d06c32e6},
+                {0x9392ee8e921d5d07, 0x3aff322e62439fd0}, {0xb877aa3236a4b449, 0x09befeb9fad487c3},
+                {0xe69594bec44de15b, 0x4c2ebe687989a9b4}, {0x901d7cf73ab0acd9, 0x0f9d37014bf60a11},
+                {0xb424dc35095cd80f, 0x538484c19ef38c95}, {0xe12e13424bb40e13, 0x2865a5f206b06fba},
+                {0x8cbccc096f5088cb, 0xf93f87b7442e45d4}, {0xafebff0bcb24aafe, 0xf78f69a51539d749},
+                {0xdbe6fecebdedd5be, 0xb573440e5a884d1c}, {0x89705f4136b4a597, 0x31680a88f8953031},
+                {0xabcc77118461cefc, 0xfdc20d2b36ba7c3e}, {0xd6bf94d5e57a42bc, 0x3d32907604691b4d},
+                {0x8637bd05af6c69b5, 0xa63f9a49c2c1b110}, {0xa7c5ac471b478423, 0x0fcf80dc33721d54},
+                {0xd1b71758e219652b, 0xd3c36113404ea4a9}, {0x83126e978d4fdf3b, 0x645a1cac083126ea},
+                {0xa3d70a3d70a3d70a, 0x3d70a3d70a3d70a4}, {0xcccccccccccccccc, 0xcccccccccccccccd},
+                {0x8000000000000000, 0x0000000000000000}, {0xa000000000000000, 0x0000000000000000},
+                {0xc800000000000000, 0x0000000000000000}, {0xfa00000000000000, 0x0000000000000000},
+                {0x9c40000000000000, 0x0000000000000000}, {0xc350000000000000, 0x0000000000000000},
+                {0xf424000000000000, 0x0000000000000000}, {0x9896800000000000, 0x0000000000000000},
+                {0xbebc200000000000, 0x0000000000000000}, {0xee6b280000000000, 0x0000000000000000},
+                {0x9502f90000000000, 0x0000000000000000}, {0xba43b74000000000, 0x0000000000000000},
+                {0xe8d4a51000000000, 0x0000000000000000}, {0x9184e72a00000000, 0x0000000000000000},
+                {0xb5e620f480000000, 0x0000000000000000}, {0xe35fa931a0000000, 0x0000000000000000},
+                {0x8e1bc9bf04000000, 0x0000000000000000}, {0xb1a2bc2ec5000000, 0x0000000000000000},
+                {0xde0b6b3a76400000, 0x0000000000000000}, {0x8ac7230489e80000, 0x0000000000000000},
+                {0xad78ebc5ac620000, 0x0000000000000000}, {0xd8d726b7177a8000, 0x0000000000000000},
+                {0x878678326eac9000, 0x0000000000000000}, {0xa968163f0a57b400, 0x0000000000000000},
+                {0xd3c21bcecceda100, 0x0000000000000000}, {0x84595161401484a0, 0x0000000000000000},
+                {0xa56fa5b99019a5c8, 0x0000000000000000}, {0xcecb8f27f4200f3a, 0x0000000000000000},
+                {0x813f3978f8940984, 0x4000000000000000}, {0xa18f07d736b90be5, 0x5000000000000000},
+                {0xc9f2c9cd04674ede, 0xa400000000000000}, {0xfc6f7c4045812296, 0x4d00000000000000},
+                {0x9dc5ada82b70b59d, 0xf020000000000000}, {0xc5371912364ce305, 0x6c28000000000000},
+                {0xf684df56c3e01bc6, 0xc732000000000000}, {0x9a130b963a6c115c, 0x3c7f400000000000},
+                {0xc097ce7bc90715b3, 0x4b9f100000000000}, {0xf0bdc21abb48db20, 0x1e86d40000000000},
+                {0x96769950b50d88f4, 0x1314448000000000}, {0xbc143fa4e250eb31, 0x17d955a000000000},
+                {0xeb194f8e1ae525fd, 0x5dcfab0800000000}, {0x92efd1b8d0cf37be, 0x5aa1cae500000000},
+                {0xb7abc627050305ad, 0xf14a3d9e40000000}, {0xe596b7b0c643c719, 0x6d9ccd05d0000000},
+                {0x8f7e32ce7bea5c6f, 0xe4820023a2000000}, {0xb35dbf821ae4f38b, 0xdda2802c8a800000},
+                {0xe0352f62a19e306e, 0xd50b2037ad200000}, {0x8c213d9da502de45, 0x4526f422cc340000},
+                {0xaf298d050e4395d6, 0x9670b12b7f410000}, {0xdaf3f04651d47b4c, 0x3c0cdd765f114000},
+                {0x88d8762bf324cd0f, 0xa5880a69fb6ac800}, {0xab0e93b6efee0053, 0x8eea0d047a457a00},
+                {0xd5d238a4abe98068, 0x72a4904598d6d880}, {0x85a36366eb71f041, 0x47a6da2b7f864750},
+                {0xa70c3c40a64e6c51, 0x999090b65f67d924}, {0xd0cf4b50cfe20765, 0xfff4b4e3f741cf6d},
+                {0x82818f1281ed449f, 0xbff8f10e7a8921a5}, {0xa321f2d7226895c7, 0xaff72d52192b6a0e},
+                {0xcbea6f8ceb02bb39, 0x9bf4f8a69f764491}, {0xfee50b7025c36a08, 0x02f236d04753d5b5},
+                {0x9f4f2726179a2245, 0x01d762422c946591}, {0xc722f0ef9d80aad6, 0x424d3ad2b7b97ef6},
+                {0xf8ebad2b84e0d58b, 0xd2e0898765a7deb3}, {0x9b934c3b330c8577, 0x63cc55f49f88eb30},
+                {0xc2781f49ffcfa6d5, 0x3cbf6b71c76b25fc}, {0xf316271c7fc3908a, 0x8bef464e3945ef7b},
+                {0x97edd871cfda3a56, 0x97758bf0e3cbb5ad}, {0xbde94e8e43d0c8ec, 0x3d52eeed1cbea318},
+                {0xed63a231d4c4fb27, 0x4ca7aaa863ee4bde}, {0x945e455f24fb1cf8, 0x8fe8caa93e74ef6b},
+                {0xb975d6b6ee39e436, 0xb3e2fd538e122b45}, {0xe7d34c64a9c85d44, 0x60dbbca87196b617},
+                {0x90e40fbeea1d3a4a, 0xbc8955e946fe31ce}, {0xb51d13aea4a488dd, 0x6babab6398bdbe42},
+                {0xe264589a4dcdab14, 0xc696963c7eed2dd2}, {0x8d7eb76070a08aec, 0xfc1e1de5cf543ca3},
+                {0xb0de65388cc8ada8, 0x3b25a55f43294bcc}, {0xdd15fe86affad912, 0x49ef0eb713f39ebf},
+                {0x8a2dbf142dfcc7ab, 0x6e3569326c784338}, {0xacb92ed9397bf996, 0x49c2c37f07965405},
+                {0xd7e77a8f87daf7fb, 0xdc33745ec97be907}, {0x86f0ac99b4e8dafd, 0x69a028bb3ded71a4},
+                {0xa8acd7c0222311bc, 0xc40832ea0d68ce0d}, {0xd2d80db02aabd62b, 0xf50a3fa490c30191},
+                {0x83c7088e1aab65db, 0x792667c6da79e0fb}, {0xa4b8cab1a1563f52, 0x577001b891185939},
+                {0xcde6fd5e09abcf26, 0xed4c0226b55e6f87}, {0x80b05e5ac60b6178, 0x544f8158315b05b5},
+                {0xa0dc75f1778e39d6, 0x696361ae3db1c722}, {0xc913936dd571c84c, 0x03bc3a19cd1e38ea},
+                {0xfb5878494ace3a5f, 0x04ab48a04065c724}, {0x9d174b2dcec0e47b, 0x62eb0d64283f9c77},
+                {0xc45d1df942711d9a, 0x3ba5d0bd324f8395}, {0xf5746577930d6500, 0xca8f44ec7ee3647a},
+                {0x9968bf6abbe85f20, 0x7e998b13cf4e1ecc}, {0xbfc2ef456ae276e8, 0x9e3fedd8c321a67f},
+                {0xefb3ab16c59b14a2, 0xc5cfe94ef3ea101f}, {0x95d04aee3b80ece5, 0xbba1f1d158724a13},
+                {0xbb445da9ca61281f, 0x2a8a6e45ae8edc98}, {0xea1575143cf97226, 0xf52d09d71a3293be},
+                {0x924d692ca61be758, 0x593c2626705f9c57}, {0xb6e0c377cfa2e12e, 0x6f8b2fb00c77836d},
+                {0xe498f455c38b997a, 0x0b6dfb9c0f956448}, {0x8edf98b59a373fec, 0x4724bd4189bd5ead},
+                {0xb2977ee300c50fe7, 0x58edec91ec2cb658}, {0xdf3d5e9bc0f653e1, 0x2f2967b66737e3ee},
+                {0x8b865b215899f46c, 0xbd79e0d20082ee75}, {0xae67f1e9aec07187, 0xecd8590680a3aa12},
+                {0xda01ee641a708de9, 0xe80e6f4820cc9496}, {0x884134fe908658b2, 0x3109058d147fdcde},
+                {0xaa51823e34a7eede, 0xbd4b46f0599fd416}, {0xd4e5e2cdc1d1ea96, 0x6c9e18ac7007c91b},
+                {0x850fadc09923329e, 0x03e2cf6bc604ddb1}, {0xa6539930bf6bff45, 0x84db8346b786151d},
+                {0xcfe87f7cef46ff16, 0xe612641865679a64}, {0x81f14fae158c5f6e, 0x4fcb7e8f3f60c07f},
+                {0xa26da3999aef7749, 0xe3be5e330f38f09e}, {0xcb090c8001ab551c, 0x5cadf5bfd3072cc6},
+                {0xfdcb4fa002162a63, 0x73d9732fc7c8f7f7}, {0x9e9f11c4014dda7e, 0x2867e7fddcdd9afb},
+                {0xc646d63501a1511d, 0xb281e1fd541501b9}, {0xf7d88bc24209a565, 0x1f225a7ca91a4227},
+                {0x9ae757596946075f, 0x3375788de9b06959}, {0xc1a12d2fc3978937, 0x0052d6b1641c83af},
+                {0xf209787bb47d6b84, 0xc0678c5dbd23a49b}, {0x9745eb4d50ce6332, 0xf840b7ba963646e1},
+                {0xbd176620a501fbff, 0xb650e5a93bc3d899}, {0xec5d3fa8ce427aff, 0xa3e51f138ab4cebf},
+                {0x93ba47c980e98cdf, 0xc66f336c36b10138}, {0xb8a8d9bbe123f017, 0xb80b0047445d4185},
+                {0xe6d3102ad96cec1d, 0xa60dc059157491e6}, {0x9043ea1ac7e41392, 0x87c89837ad68db30},
+                {0xb454e4a179dd1877, 0x29babe4598c311fc}, {0xe16a1dc9d8545e94, 0xf4296dd6fef3d67b},
+                {0x8ce2529e2734bb1d, 0x1899e4a65f58660d}, {0xb01ae745b101e9e4, 0x5ec05dcff72e7f90},
+                {0xdc21a1171d42645d, 0x76707543f4fa1f74}, {0x899504ae72497eba, 0x6a06494a791c53a9},
+                {0xabfa45da0edbde69, 0x0487db9d17636893}, {0xd6f8d7509292d603, 0x45a9d2845d3c42b7},
+                {0x865b86925b9bc5c2, 0x0b8a2392ba45a9b3}, {0xa7f26836f282b732, 0x8e6cac7768d7141f},
+                {0xd1ef0244af2364ff, 0x3207d795430cd927}, {0x8335616aed761f1f, 0x7f44e6bd49e807b9},
+                {0xa402b9c5a8d3a6e7, 0x5f16206c9c6209a7}, {0xcd036837130890a1, 0x36dba887c37a8c10},
+                {0x802221226be55a64, 0xc2494954da2c978a}, {0xa02aa96b06deb0fd, 0xf2db9baa10b7bd6d},
+                {0xc83553c5c8965d3d, 0x6f92829494e5acc8}, {0xfa42a8b73abbf48c, 0xcb772339ba1f17fa},
+                {0x9c69a97284b578d7, 0xff2a760414536efc}, {0xc38413cf25e2d70d, 0xfef5138519684abb},
+                {0xf46518c2ef5b8cd1, 0x7eb258665fc25d6a}, {0x98bf2f79d5993802, 0xef2f773ffbd97a62},
+                {0xbeeefb584aff8603, 0xaafb550ffacfd8fb}, {0xeeaaba2e5dbf6784, 0x95ba2a53f983cf39},
+                {0x952ab45cfa97a0b2, 0xdd945a747bf26184}, {0xba756174393d88df, 0x94f971119aeef9e5},
+                {0xe912b9d1478ceb17, 0x7a37cd5601aab85e}, {0x91abb422ccb812ee, 0xac62e055c10ab33b},
+                {0xb616a12b7fe617aa, 0x577b986b314d600a}, {0xe39c49765fdf9d94, 0xed5a7e85fda0b80c},
+                {0x8e41ade9fbebc27d, 0x14588f13be847308}, {0xb1d219647ae6b31c, 0x596eb2d8ae258fc9},
+                {0xde469fbd99a05fe3, 0x6fca5f8ed9aef3bc}, {0x8aec23d680043bee, 0x25de7bb9480d5855},
+                {0xada72ccc20054ae9, 0xaf561aa79a10ae6b}, {0xd910f7ff28069da4, 0x1b2ba1518094da05},
+                {0x87aa9aff79042286, 0x90fb44d2f05d0843}, {0xa99541bf57452b28, 0x353a1607ac744a54},
+                {0xd3fa922f2d1675f2, 0x42889b8997915ce9}, {0x847c9b5d7c2e09b7, 0x69956135febada12},
+                {0xa59bc234db398c25, 0x43fab9837e699096}, {0xcf02b2c21207ef2e, 0x94f967e45e03f4bc},
+                {0x8161afb94b44f57d, 0x1d1be0eebac278f6}, {0xa1ba1ba79e1632dc, 0x6462d92a69731733},
+                {0xca28a291859bbf93, 0x7d7b8f7503cfdcff}, {0xfcb2cb35e702af78, 0x5cda735244c3d43f},
+                {0x9defbf01b061adab, 0x3a0888136afa64a8}, {0xc56baec21c7a1916, 0x088aaa1845b8fdd1},
+                {0xf6c69a72a3989f5b, 0x8aad549e57273d46}, {0x9a3c2087a63f6399, 0x36ac54e2f678864c},
+                {0xc0cb28a98fcf3c7f, 0x84576a1bb416a7de}, {0xf0fdf2d3f3c30b9f, 0x656d44a2a11c51d6},
+                {0x969eb7c47859e743, 0x9f644ae5a4b1b326}, {0xbc4665b596706114, 0x873d5d9f0dde1fef},
+                {0xeb57ff22fc0c7959, 0xa90cb506d155a7eb}, {0x9316ff75dd87cbd8, 0x09a7f12442d588f3},
+                {0xb7dcbf5354e9bece, 0x0c11ed6d538aeb30}, {0xe5d3ef282a242e81, 0x8f1668c8a86da5fb},
+                {0x8fa475791a569d10, 0xf96e017d694487bd}, {0xb38d92d760ec4455, 0x37c981dcc395a9ad},
+                {0xe070f78d3927556a, 0x85bbe253f47b1418}, {0x8c469ab843b89562, 0x93956d7478ccec8f},
+                {0xaf58416654a6babb, 0x387ac8d1970027b3}, {0xdb2e51bfe9d0696a, 0x06997b05fcc0319f},
+                {0x88fcf317f22241e2, 0x441fece3bdf81f04}, {0xab3c2fddeeaad25a, 0xd527e81cad7626c4},
+                {0xd60b3bd56a5586f1, 0x8a71e223d8d3b075}, {0x85c7056562757456, 0xf6872d5667844e4a},
+                {0xa738c6bebb12d16c, 0xb428f8ac016561dc}, {0xd106f86e69d785c7, 0xe13336d701beba53},
+                {0x82a45b450226b39c, 0xecc0024661173474}, {0xa34d721642b06084, 0x27f002d7f95d0191},
+                {0xcc20ce9bd35c78a5, 0x31ec038df7b441f5}, {0xff290242c83396ce, 0x7e67047175a15272},
+                {0x9f79a169bd203e41, 0x0f0062c6e984d387}, {0xc75809c42c684dd1, 0x52c07b78a3e60869},
+                {0xf92e0c3537826145, 0xa7709a56ccdf8a83}, {0x9bbcc7a142b17ccb, 0x88a66076400bb692},
+                {0xc2abf989935ddbfe, 0x6acff893d00ea436}, {0xf356f7ebf83552fe, 0x0583f6b8c4124d44},
+                {0x98165af37b2153de, 0xc3727a337a8b704b}, {0xbe1bf1b059e9a8d6, 0x744f18c0592e4c5d},
+                {0xeda2ee1c7064130c, 0x1162def06f79df74}, {0x9485d4d1c63e8be7, 0x8addcb5645ac2ba9},
+                {0xb9a74a0637ce2ee1, 0x6d953e2bd7173693}, {0xe8111c87c5c1ba99, 0xc8fa8db6ccdd0438},
+                {0x910ab1d4db9914a0, 0x1d9c9892400a22a3}, {0xb54d5e4a127f59c8, 0x2503beb6d00cab4c},
+                {0xe2a0b5dc971f303a, 0x2e44ae64840fd61e}, {0x8da471a9de737e24, 0x5ceaecfed289e5d3},
+                {0xb10d8e1456105dad, 0x7425a83e872c5f48}, {0xdd50f1996b947518, 0xd12f124e28f7771a},
+                {0x8a5296ffe33cc92f, 0x82bd6b70d99aaa70}, {0xace73cbfdc0bfb7b, 0x636cc64d1001550c},
+                {0xd8210befd30efa5a, 0x3c47f7e05401aa4f}, {0x8714a775e3e95c78, 0x65acfaec34810a72},
+                {0xa8d9d1535ce3b396, 0x7f1839a741a14d0e}, {0xd31045a8341ca07c, 0x1ede48111209a051},
+                {0x83ea2b892091e44d, 0x934aed0aab460433}, {0xa4e4b66b68b65d60, 0xf81da84d56178540},
+                {0xce1de40642e3f4b9, 0x36251260ab9d668f}, {0x80d2ae83e9ce78f3, 0xc1d72b7c6b42601a},
+                {0xa1075a24e4421730, 0xb24cf65b8612f820}, {0xc94930ae1d529cfc, 0xdee033f26797b628},
+                {0xfb9b7cd9a4a7443c, 0x169840ef017da3b2}, {0x9d412e0806e88aa5, 0x8e1f289560ee864f},
+                {0xc491798a08a2ad4e, 0xf1a6f2bab92a27e3}, {0xf5b5d7ec8acb58a2, 0xae10af696774b1dc},
+                {0x9991a6f3d6bf1765, 0xacca6da1e0a8ef2a}, {0xbff610b0cc6edd3f, 0x17fd090a58d32af4},
+                {0xeff394dcff8a948e, 0xddfc4b4cef07f5b1}, {0x95f83d0a1fb69cd9, 0x4abdaf101564f98f},
+                {0xbb764c4ca7a4440f, 0x9d6d1ad41abe37f2}, {0xea53df5fd18d5513, 0x84c86189216dc5ee},
+                {0x92746b9be2f8552c, 0x32fd3cf5b4e49bb5}, {0xb7118682dbb66a77, 0x3fbc8c33221dc2a2},
+                {0xe4d5e82392a40515, 0x0fabaf3feaa5334b}, {0x8f05b1163ba6832d, 0x29cb4d87f2a7400f},
+                {0xb2c71d5bca9023f8, 0x743e20e9ef511013}, {0xdf78e4b2bd342cf6, 0x914da9246b255417},
+                {0x8bab8eefb6409c1a, 0x1ad089b6c2f7548f}, {0xae9672aba3d0c320, 0xa184ac2473b529b2},
+                {0xda3c0f568cc4f3e8, 0xc9e5d72d90a2741f}, {0x8865899617fb1871, 0x7e2fa67c7a658893},
+                {0xaa7eebfb9df9de8d, 0xddbb901b98feeab8}, {0xd51ea6fa85785631, 0x552a74227f3ea566},
+                {0x8533285c936b35de, 0xd53a88958f872760}, {0xa67ff273b8460356, 0x8a892abaf368f138},
+                {0xd01fef10a657842c, 0x2d2b7569b0432d86}, {0x8213f56a67f6b29b, 0x9c3b29620e29fc74},
+                {0xa298f2c501f45f42, 0x8349f3ba91b47b90}, {0xcb3f2f7642717713, 0x241c70a936219a74},
+                {0xfe0efb53d30dd4d7, 0xed238cd383aa0111}, {0x9ec95d1463e8a506, 0xf4363804324a40ab},
+                {0xc67bb4597ce2ce48, 0xb143c6053edcd0d6}, {0xf81aa16fdc1b81da, 0xdd94b7868e94050b},
+                {0x9b10a4e5e9913128, 0xca7cf2b4191c8327}, {0xc1d4ce1f63f57d72, 0xfd1c2f611f63a3f1},
+                {0xf24a01a73cf2dccf, 0xbc633b39673c8ced}, {0x976e41088617ca01, 0xd5be0503e085d814},
+                {0xbd49d14aa79dbc82, 0x4b2d8644d8a74e19}, {0xec9c459d51852ba2, 0xddf8e7d60ed1219f},
+                {0x93e1ab8252f33b45, 0xcabb90e5c942b504}, {0xb8da1662e7b00a17, 0x3d6a751f3b936244},
+                {0xe7109bfba19c0c9d, 0x0cc512670a783ad5}, {0x906a617d450187e2, 0x27fb2b80668b24c6},
+                {0xb484f9dc9641e9da, 0xb1f9f660802dedf7}, {0xe1a63853bbd26451, 0x5e7873f8a0396974},
+                {0x8d07e33455637eb2, 0xdb0b487b6423e1e9}, {0xb049dc016abc5e5f, 0x91ce1a9a3d2cda63},
+                {0xdc5c5301c56b75f7, 0x7641a140cc7810fc}, {0x89b9b3e11b6329ba, 0xa9e904c87fcb0a9e},
+                {0xac2820d9623bf429, 0x546345fa9fbdcd45}, {0xd732290fbacaf133, 0xa97c177947ad4096},
+                {0x867f59a9d4bed6c0, 0x49ed8eabcccc485e}, {0xa81f301449ee8c70, 0x5c68f256bfff5a75},
+                {0xd226fc195c6a2f8c, 0x73832eec6fff3112}, {0x83585d8fd9c25db7, 0xc831fd53c5ff7eac},
+                {0xa42e74f3d032f525, 0xba3e7ca8b77f5e56}, {0xcd3a1230c43fb26f, 0x28ce1bd2e55f35ec},
+                {0x80444b5e7aa7cf85, 0x7980d163cf5b81b4}, {0xa0555e361951c366, 0xd7e105bcc3326220},
+                {0xc86ab5c39fa63440, 0x8dd9472bf3fefaa8}, {0xfa856334878fc150, 0xb14f98f6f0feb952},
+                {0x9c935e00d4b9d8d2, 0x6ed1bf9a569f33d4}, {0xc3b8358109e84f07, 0x0a862f80ec4700c9},
+                {0xf4a642e14c6262c8, 0xcd27bb612758c0fb}, {0x98e7e9cccfbd7dbd, 0x8038d51cb897789d},
+                {0xbf21e44003acdd2c, 0xe0470a63e6bd56c4}, {0xeeea5d5004981478, 0x1858ccfce06cac75},
+                {0x95527a5202df0ccb, 0x0f37801e0c43ebc9}, {0xbaa718e68396cffd, 0xd30560258f54e6bb},
+                {0xe950df20247c83fd, 0x47c6b82ef32a206a}, {0x91d28b7416cdd27e, 0x4cdc331d57fa5442},
+                {0xb6472e511c81471d, 0xe0133fe4adf8e953}, {0xe3d8f9e563a198e5, 0x58180fddd97723a7},
+                {0x8e679c2f5e44ff8f, 0x570f09eaa7ea7649}, {0xb201833b35d63f73, 0x2cd2cc6551e513db},
+                {0xde81e40a034bcf4f, 0xf8077f7ea65e58d2}, {0x8b112e86420f6191, 0xfb04afaf27faf783},
+                {0xadd57a27d29339f6, 0x79c5db9af1f9b564}, {0xd94ad8b1c7380874, 0x18375281ae7822bd},
+                {0x87cec76f1c830548, 0x8f2293910d0b15b6}, {0xa9c2794ae3a3c69a, 0xb2eb3875504ddb23},
+                {0xd433179d9c8cb841, 0x5fa60692a46151ec}, {0x849feec281d7f328, 0xdbc7c41ba6bcd334},
+                {0xa5c7ea73224deff3, 0x12b9b522906c0801}, {0xcf39e50feae16bef, 0xd768226b34870a01},
+                {0x81842f29f2cce375, 0xe6a1158300d46641}, {0xa1e53af46f801c53, 0x60495ae3c1097fd1},
+                {0xca5e89b18b602368, 0x385bb19cb14bdfc5}, {0xfcf62c1dee382c42, 0x46729e03dd9ed7b6},
+                {0x9e19db92b4e31ba9, 0x6c07a2c26a8346d2}, {0xc5a05277621be293, 0xc7098b7305241886},
+                {0xf70867153aa2db38, 0xb8cbee4fc66d1ea8}};
+        };
+
+        // Compressed cache for double
+        struct compressed_cache_detail {
+            static constexpr int compression_ratio = 27;
+            static constexpr std::size_t compressed_table_size =
+                (cache_holder<ieee754_binary64>::max_k - cache_holder<ieee754_binary64>::min_k +
+                 compression_ratio) /
+                compression_ratio;
+
+            struct cache_holder_t {
+                wuint::uint128 table[compressed_table_size];
+            };
+            static constexpr cache_holder_t cache = [] {
+                cache_holder_t res{};
+                for (std::size_t i = 0; i < compressed_table_size; ++i) {
+                    res.table[i] = cache_holder<ieee754_binary64>::cache[i * compression_ratio];
+                }
+                return res;
+            }();
+
+            struct pow5_holder_t {
+                std::uint64_t table[compression_ratio];
+            };
+            static constexpr pow5_holder_t pow5 = [] {
+                pow5_holder_t res{};
+                std::uint64_t p = 1;
+                for (std::size_t i = 0; i < compression_ratio; ++i) {
+                    res.table[i] = p;
+                    p *= 5;
+                }
+                return res;
+            }();
+        };
+    }
+
+
+    ////////////////////////////////////////////////////////////////////////////////////////
+    // Policies.
+    ////////////////////////////////////////////////////////////////////////////////////////
+
+    namespace detail {
+        // Forward declare the implementation class.
+        template <class Float, class FloatTraits = default_float_traits<Float>>
+        struct impl;
+
+        namespace policy_impl {
+            // Sign policies.
+            namespace sign {
+                struct base {};
+
+                struct ignore : base {
+                    using sign_policy = ignore;
+                    static constexpr bool return_has_sign = false;
+
+                    template <class SignedSignificandBits, class ReturnType>
+                    static constexpr void handle_sign(SignedSignificandBits, ReturnType&) noexcept {
+                    }
+                };
+
+                struct return_sign : base {
+                    using sign_policy = return_sign;
+                    static constexpr bool return_has_sign = true;
+
+                    template <class SignedSignificandBits, class ReturnType>
+                    static constexpr void handle_sign(SignedSignificandBits s,
+                                                      ReturnType& r) noexcept {
+                        r.is_negative = s.is_negative();
+                    }
+                };
+            }
+
+            // Trailing zero policies.
+            namespace trailing_zero {
+                struct base {};
+
+                struct ignore : base {
+                    using trailing_zero_policy = ignore;
+                    static constexpr bool report_trailing_zeros = false;
+
+                    template <class Impl, class ReturnType>
+                    static constexpr void on_trailing_zeros(ReturnType&) noexcept {}
+
+                    template <class Impl, class ReturnType>
+                    static constexpr void no_trailing_zeros(ReturnType&) noexcept {}
+                };
+
+                struct remove : base {
+                    using trailing_zero_policy = remove;
+                    static constexpr bool report_trailing_zeros = false;
+
+                    template <class Impl, class ReturnType>
+                    JKJ_FORCEINLINE static constexpr void
+                    on_trailing_zeros(ReturnType& r) noexcept {
+                        r.exponent += Impl::remove_trailing_zeros(r.significand);
+                    }
+
+                    template <class Impl, class ReturnType>
+                    static constexpr void no_trailing_zeros(ReturnType&) noexcept {}
+                };
+
+                struct report : base {
+                    using trailing_zero_policy = report;
+                    static constexpr bool report_trailing_zeros = true;
+
+                    template <class Impl, class ReturnType>
+                    static constexpr void on_trailing_zeros(ReturnType& r) noexcept {
+                        r.may_have_trailing_zeros = true;
+                    }
+
+                    template <class Impl, class ReturnType>
+                    static constexpr void no_trailing_zeros(ReturnType& r) noexcept {
+                        r.may_have_trailing_zeros = false;
+                    }
+                };
+            }
+
+            // Decimal-to-binary rounding mode policies.
+            namespace decimal_to_binary_rounding {
+                struct base {};
+
+                enum class tag_t { to_nearest, left_closed_directed, right_closed_directed };
+                namespace interval_type {
+                    struct symmetric_boundary {
+                        static constexpr bool is_symmetric = true;
+                        bool is_closed;
+                        constexpr bool include_left_endpoint() const noexcept { return is_closed; }
+                        constexpr bool include_right_endpoint() const noexcept { return is_closed; }
+                    };
+                    struct asymmetric_boundary {
+                        static constexpr bool is_symmetric = false;
+                        bool is_left_closed;
+                        constexpr bool include_left_endpoint() const noexcept {
+                            return is_left_closed;
+                        }
+                        constexpr bool include_right_endpoint() const noexcept {
+                            return !is_left_closed;
+                        }
+                    };
+                    struct closed {
+                        static constexpr bool is_symmetric = true;
+                        static constexpr bool include_left_endpoint() noexcept { return true; }
+                        static constexpr bool include_right_endpoint() noexcept { return true; }
+                    };
+                    struct open {
+                        static constexpr bool is_symmetric = true;
+                        static constexpr bool include_left_endpoint() noexcept { return false; }
+                        static constexpr bool include_right_endpoint() noexcept { return false; }
+                    };
+                    struct left_closed_right_open {
+                        static constexpr bool is_symmetric = false;
+                        static constexpr bool include_left_endpoint() noexcept { return true; }
+                        static constexpr bool include_right_endpoint() noexcept { return false; }
+                    };
+                    struct right_closed_left_open {
+                        static constexpr bool is_symmetric = false;
+                        static constexpr bool include_left_endpoint() noexcept { return false; }
+                        static constexpr bool include_right_endpoint() noexcept { return true; }
+                    };
+                }
+
+                struct nearest_to_even : base {
+                    using decimal_to_binary_rounding_policy = nearest_to_even;
+                    static constexpr auto tag = tag_t::to_nearest;
+                    using normal_interval_type = interval_type::symmetric_boundary;
+                    using shorter_interval_type = interval_type::closed;
+
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits, Func&& f) noexcept {
+                        return f(nearest_to_even{});
+                    }
+
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static constexpr auto
+                    invoke_normal_interval_case(SignedSignificandBits s, Func&& f) noexcept {
+                        return f(s.has_even_significand_bits());
+                    }
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static constexpr auto
+                    invoke_shorter_interval_case(SignedSignificandBits, Func&& f) noexcept {
+                        return f();
+                    }
+                };
+                struct nearest_to_odd : base {
+                    using decimal_to_binary_rounding_policy = nearest_to_odd;
+                    static constexpr auto tag = tag_t::to_nearest;
+                    using normal_interval_type = interval_type::symmetric_boundary;
+                    using shorter_interval_type = interval_type::open;
+
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits, Func&& f) noexcept {
+                        return f(nearest_to_odd{});
+                    }
+
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static constexpr auto
+                    invoke_normal_interval_case(SignedSignificandBits s, Func&& f) noexcept {
+                        return f(!s.has_even_significand_bits());
+                    }
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static constexpr auto
+                    invoke_shorter_interval_case(SignedSignificandBits, Func&& f) noexcept {
+                        return f();
+                    }
+                };
+                struct nearest_toward_plus_infinity : base {
+                    using decimal_to_binary_rounding_policy = nearest_toward_plus_infinity;
+                    static constexpr auto tag = tag_t::to_nearest;
+                    using normal_interval_type = interval_type::asymmetric_boundary;
+                    using shorter_interval_type = interval_type::asymmetric_boundary;
+
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits, Func&& f) noexcept {
+                        return f(nearest_toward_plus_infinity{});
+                    }
+
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static constexpr auto
+                    invoke_normal_interval_case(SignedSignificandBits s, Func&& f) noexcept {
+                        return f(!s.is_negative());
+                    }
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static constexpr auto
+                    invoke_shorter_interval_case(SignedSignificandBits s, Func&& f) noexcept {
+                        return f(!s.is_negative());
+                    }
+                };
+                struct nearest_toward_minus_infinity : base {
+                    using decimal_to_binary_rounding_policy = nearest_toward_minus_infinity;
+                    static constexpr auto tag = tag_t::to_nearest;
+                    using normal_interval_type = interval_type::asymmetric_boundary;
+                    using shorter_interval_type = interval_type::asymmetric_boundary;
+
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits, Func&& f) noexcept {
+                        return f(nearest_toward_minus_infinity{});
+                    }
+
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static constexpr auto
+                    invoke_normal_interval_case(SignedSignificandBits s, Func&& f) noexcept {
+                        return f(s.is_negative());
+                    }
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static constexpr auto
+                    invoke_shorter_interval_case(SignedSignificandBits s, Func&& f) noexcept {
+                        return f(s.is_negative());
+                    }
+                };
+                struct nearest_toward_zero : base {
+                    using decimal_to_binary_rounding_policy = nearest_toward_zero;
+                    static constexpr auto tag = tag_t::to_nearest;
+                    using normal_interval_type = interval_type::right_closed_left_open;
+                    using shorter_interval_type = interval_type::right_closed_left_open;
+
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits, Func&& f) noexcept {
+                        return f(nearest_toward_zero{});
+                    }
+
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static constexpr auto
+                    invoke_normal_interval_case(SignedSignificandBits, Func&& f) noexcept {
+                        return f();
+                    }
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static constexpr auto
+                    invoke_shorter_interval_case(SignedSignificandBits, Func&& f) noexcept {
+                        return f();
+                    }
+                };
+                struct nearest_away_from_zero : base {
+                    using decimal_to_binary_rounding_policy = nearest_away_from_zero;
+                    static constexpr auto tag = tag_t::to_nearest;
+                    using normal_interval_type = interval_type::left_closed_right_open;
+                    using shorter_interval_type = interval_type::left_closed_right_open;
+
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits, Func&& f) noexcept {
+                        return f(nearest_away_from_zero{});
+                    }
+
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static constexpr auto
+                    invoke_normal_interval_case(SignedSignificandBits, Func&& f) noexcept {
+                        return f();
+                    }
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static constexpr auto
+                    invoke_shorter_interval_case(SignedSignificandBits, Func&& f) noexcept {
+                        return f();
+                    }
+                };
+
+                namespace detail {
+                    struct nearest_always_closed {
+                        static constexpr auto tag = tag_t::to_nearest;
+                        using normal_interval_type = interval_type::closed;
+                        using shorter_interval_type = interval_type::closed;
+
+                        template <class SignedSignificandBits, class Func>
+                        JKJ_FORCEINLINE static constexpr auto
+                        invoke_normal_interval_case(SignedSignificandBits, Func&& f) noexcept {
+                            return f();
+                        }
+                        template <class SignedSignificandBits, class Func>
+                        JKJ_FORCEINLINE static constexpr auto
+                        invoke_shorter_interval_case(SignedSignificandBits, Func&& f) noexcept {
+                            return f();
+                        }
+                    };
+                    struct nearest_always_open {
+                        static constexpr auto tag = tag_t::to_nearest;
+                        using normal_interval_type = interval_type::open;
+                        using shorter_interval_type = interval_type::open;
+
+                        template <class SignedSignificandBits, class Func>
+                        JKJ_FORCEINLINE static constexpr auto
+                        invoke_normal_interval_case(SignedSignificandBits, Func&& f) noexcept {
+                            return f();
+                        }
+                        template <class SignedSignificandBits, class Func>
+                        JKJ_FORCEINLINE static constexpr auto
+                        invoke_shorter_interval_case(SignedSignificandBits, Func&& f) noexcept {
+                            return f();
+                        }
+                    };
+                }
+
+                struct nearest_to_even_static_boundary : base {
+                    using decimal_to_binary_rounding_policy = nearest_to_even_static_boundary;
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits s,
+                                                         Func&& f) noexcept {
+                        if (s.has_even_significand_bits()) {
+                            return f(detail::nearest_always_closed{});
+                        }
+                        else {
+                            return f(detail::nearest_always_open{});
+                        }
+                    }
+                };
+                struct nearest_to_odd_static_boundary : base {
+                    using decimal_to_binary_rounding_policy = nearest_to_odd_static_boundary;
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits s,
+                                                         Func&& f) noexcept {
+                        if (s.has_even_significand_bits()) {
+                            return f(detail::nearest_always_open{});
+                        }
+                        else {
+                            return f(detail::nearest_always_closed{});
+                        }
+                    }
+                };
+                struct nearest_toward_plus_infinity_static_boundary : base {
+                    using decimal_to_binary_rounding_policy =
+                        nearest_toward_plus_infinity_static_boundary;
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits s,
+                                                         Func&& f) noexcept {
+                        if (s.is_negative()) {
+                            return f(nearest_toward_zero{});
+                        }
+                        else {
+                            return f(nearest_away_from_zero{});
+                        }
+                    }
+                };
+                struct nearest_toward_minus_infinity_static_boundary : base {
+                    using decimal_to_binary_rounding_policy =
+                        nearest_toward_minus_infinity_static_boundary;
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits s,
+                                                         Func&& f) noexcept {
+                        if (s.is_negative()) {
+                            return f(nearest_away_from_zero{});
+                        }
+                        else {
+                            return f(nearest_toward_zero{});
+                        }
+                    }
+                };
+
+                namespace detail {
+                    struct left_closed_directed {
+                        static constexpr auto tag = tag_t::left_closed_directed;
+                    };
+                    struct right_closed_directed {
+                        static constexpr auto tag = tag_t::right_closed_directed;
+                    };
+                }
+
+                struct toward_plus_infinity : base {
+                    using decimal_to_binary_rounding_policy = toward_plus_infinity;
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits s,
+                                                         Func&& f) noexcept {
+                        if (s.is_negative()) {
+                            return f(detail::left_closed_directed{});
+                        }
+                        else {
+                            return f(detail::right_closed_directed{});
+                        }
+                    }
+                };
+                struct toward_minus_infinity : base {
+                    using decimal_to_binary_rounding_policy = toward_minus_infinity;
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits s,
+                                                         Func&& f) noexcept {
+                        if (s.is_negative()) {
+                            return f(detail::right_closed_directed{});
+                        }
+                        else {
+                            return f(detail::left_closed_directed{});
+                        }
+                    }
+                };
+                struct toward_zero : base {
+                    using decimal_to_binary_rounding_policy = toward_zero;
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits, Func&& f) noexcept {
+                        return f(detail::left_closed_directed{});
+                    }
+                };
+                struct away_from_zero : base {
+                    using decimal_to_binary_rounding_policy = away_from_zero;
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits, Func&& f) noexcept {
+                        return f(detail::right_closed_directed{});
+                    }
+                };
+            }
+
+            // Binary-to-decimal rounding policies.
+            // (Always assumes nearest rounding modes.)
+            namespace binary_to_decimal_rounding {
+                struct base {};
+
+                enum class tag_t { do_not_care, to_even, to_odd, away_from_zero, toward_zero };
+
+                struct do_not_care : base {
+                    using binary_to_decimal_rounding_policy = do_not_care;
+                    static constexpr auto tag = tag_t::do_not_care;
+
+                    template <class ReturnType>
+                    static constexpr bool prefer_round_down(ReturnType const&) noexcept {
+                        return false;
+                    }
+                };
+
+                struct to_even : base {
+                    using binary_to_decimal_rounding_policy = to_even;
+                    static constexpr auto tag = tag_t::to_even;
+
+                    template <class ReturnType>
+                    static constexpr bool prefer_round_down(ReturnType const& r) noexcept {
+                        return r.significand % 2 != 0;
+                    }
+                };
+
+                struct to_odd : base {
+                    using binary_to_decimal_rounding_policy = to_odd;
+                    static constexpr auto tag = tag_t::to_odd;
+
+                    template <class ReturnType>
+                    static constexpr bool prefer_round_down(ReturnType const& r) noexcept {
+                        return r.significand % 2 == 0;
+                    }
+                };
+
+                struct away_from_zero : base {
+                    using binary_to_decimal_rounding_policy = away_from_zero;
+                    static constexpr auto tag = tag_t::away_from_zero;
+
+                    template <class ReturnType>
+                    static constexpr bool prefer_round_down(ReturnType const&) noexcept {
+                        return false;
+                    }
+                };
+
+                struct toward_zero : base {
+                    using binary_to_decimal_rounding_policy = toward_zero;
+                    static constexpr auto tag = tag_t::toward_zero;
+
+                    template <class ReturnType>
+                    static constexpr bool prefer_round_down(ReturnType const&) noexcept {
+                        return true;
+                    }
+                };
+            }
+
+            // Cache policies.
+            namespace cache {
+                struct base {};
+
+                struct full : base {
+                    using cache_policy = full;
+                    template <class FloatFormat>
+                    static constexpr typename cache_holder<FloatFormat>::cache_entry_type
+                    get_cache(int k) noexcept {
+                        assert(k >= cache_holder<FloatFormat>::min_k &&
+                               k <= cache_holder<FloatFormat>::max_k);
+                        return cache_holder<FloatFormat>::cache[std::size_t(
+                            k - cache_holder<FloatFormat>::min_k)];
+                    }
+                };
+
+                struct compact : base {
+                    using cache_policy = compact;
+                    template <class FloatFormat>
+                    static constexpr typename cache_holder<FloatFormat>::cache_entry_type
+                    get_cache(int k) noexcept {
+                        assert(k >= cache_holder<FloatFormat>::min_k &&
+                               k <= cache_holder<FloatFormat>::max_k);
+
+                        if constexpr (std::is_same_v<FloatFormat, ieee754_binary64>) {
+                            // Compute the base index.
+                            auto const cache_index =
+                                int(std::uint32_t(k - cache_holder<FloatFormat>::min_k) /
+                                    compressed_cache_detail::compression_ratio);
+                            auto const kb =
+                                cache_index * compressed_cache_detail::compression_ratio +
+                                cache_holder<FloatFormat>::min_k;
+                            auto const offset = k - kb;
+
+                            // Get the base cache.
+                            auto const base_cache =
+                                compressed_cache_detail::cache.table[cache_index];
+
+                            if (offset == 0) {
+                                return base_cache;
+                            }
+                            else {
+                                // Compute the required amount of bit-shift.
+                                auto const alpha = log::floor_log2_pow10(kb + offset) -
+                                                   log::floor_log2_pow10(kb) - offset;
+                                assert(alpha > 0 && alpha < 64);
+
+                                // Try to recover the real cache.
+                                auto const pow5 = compressed_cache_detail::pow5.table[offset];
+                                auto recovered_cache = wuint::umul128(base_cache.high(), pow5);
+                                auto const middle_low = wuint::umul128(base_cache.low(), pow5);
+
+                                recovered_cache += middle_low.high();
+
+                                auto const high_to_middle = recovered_cache.high() << (64 - alpha);
+                                auto const middle_to_low = recovered_cache.low() << (64 - alpha);
+
+                                recovered_cache = wuint::uint128{
+                                    (recovered_cache.low() >> alpha) | high_to_middle,
+                                    ((middle_low.low() >> alpha) | middle_to_low)};
+
+                                assert(recovered_cache.low() + 1 != 0);
+                                recovered_cache = {recovered_cache.high(),
+                                                   recovered_cache.low() + 1};
+
+                                return recovered_cache;
+                            }
+                        }
+                        else {
+                            // Just use the full cache for anything other than binary64
+                            return cache_holder<FloatFormat>::cache[std::size_t(
+                                k - cache_holder<FloatFormat>::min_k)];
+                        }
+                    }
+                };
+            }
+        }
+    }
+
+    namespace policy {
+        namespace sign {
+            inline constexpr auto ignore = detail::policy_impl::sign::ignore{};
+            inline constexpr auto return_sign = detail::policy_impl::sign::return_sign{};
+        }
+
+        namespace trailing_zero {
+            inline constexpr auto ignore = detail::policy_impl::trailing_zero::ignore{};
+            inline constexpr auto remove = detail::policy_impl::trailing_zero::remove{};
+            inline constexpr auto report = detail::policy_impl::trailing_zero::report{};
+        }
+
+        namespace decimal_to_binary_rounding {
+            inline constexpr auto nearest_to_even =
+                detail::policy_impl::decimal_to_binary_rounding::nearest_to_even{};
+            inline constexpr auto nearest_to_odd =
+                detail::policy_impl::decimal_to_binary_rounding::nearest_to_odd{};
+            inline constexpr auto nearest_toward_plus_infinity =
+                detail::policy_impl::decimal_to_binary_rounding::nearest_toward_plus_infinity{};
+            inline constexpr auto nearest_toward_minus_infinity =
+                detail::policy_impl::decimal_to_binary_rounding::nearest_toward_minus_infinity{};
+            inline constexpr auto nearest_toward_zero =
+                detail::policy_impl::decimal_to_binary_rounding::nearest_toward_zero{};
+            inline constexpr auto nearest_away_from_zero =
+                detail::policy_impl::decimal_to_binary_rounding::nearest_away_from_zero{};
+
+            inline constexpr auto nearest_to_even_static_boundary =
+                detail::policy_impl::decimal_to_binary_rounding::nearest_to_even_static_boundary{};
+            inline constexpr auto nearest_to_odd_static_boundary =
+                detail::policy_impl::decimal_to_binary_rounding::nearest_to_odd_static_boundary{};
+            inline constexpr auto nearest_toward_plus_infinity_static_boundary =
+                detail::policy_impl::decimal_to_binary_rounding::
+                    nearest_toward_plus_infinity_static_boundary{};
+            inline constexpr auto nearest_toward_minus_infinity_static_boundary =
+                detail::policy_impl::decimal_to_binary_rounding::
+                    nearest_toward_minus_infinity_static_boundary{};
+
+            inline constexpr auto toward_plus_infinity =
+                detail::policy_impl::decimal_to_binary_rounding::toward_plus_infinity{};
+            inline constexpr auto toward_minus_infinity =
+                detail::policy_impl::decimal_to_binary_rounding::toward_minus_infinity{};
+            inline constexpr auto toward_zero =
+                detail::policy_impl::decimal_to_binary_rounding::toward_zero{};
+            inline constexpr auto away_from_zero =
+                detail::policy_impl::decimal_to_binary_rounding::away_from_zero{};
+        }
+
+        namespace binary_to_decimal_rounding {
+            inline constexpr auto do_not_care =
+                detail::policy_impl::binary_to_decimal_rounding::do_not_care{};
+            inline constexpr auto to_even =
+                detail::policy_impl::binary_to_decimal_rounding::to_even{};
+            inline constexpr auto to_odd =
+                detail::policy_impl::binary_to_decimal_rounding::to_odd{};
+            inline constexpr auto away_from_zero =
+                detail::policy_impl::binary_to_decimal_rounding::away_from_zero{};
+            inline constexpr auto toward_zero =
+                detail::policy_impl::binary_to_decimal_rounding::toward_zero{};
+        }
+
+        namespace cache {
+            inline constexpr auto full = detail::policy_impl::cache::full{};
+            inline constexpr auto compact = detail::policy_impl::cache::compact{};
+        }
+    }
+
+    namespace detail {
+        ////////////////////////////////////////////////////////////////////////////////////////
+        // The main algorithm.
+        ////////////////////////////////////////////////////////////////////////////////////////
+
+        template <class Float, class FloatTraits>
+        struct impl : private FloatTraits, private FloatTraits::format {
+            using format = typename FloatTraits::format;
+            using carrier_uint = typename FloatTraits::carrier_uint;
+
+            using FloatTraits::carrier_bits;
+            using format::significand_bits;
+            using format::min_exponent;
+            using format::max_exponent;
+            using format::exponent_bias;
+            using format::decimal_digits;
+
+            static constexpr int kappa = std::is_same_v<format, ieee754_binary32> ? 1 : 2;
+            static_assert(kappa >= 1);
+            static_assert(carrier_bits >= significand_bits + 2 + log::floor_log2_pow10(kappa + 1));
+
+            static constexpr int min_k = [] {
+                constexpr auto a = -log::floor_log10_pow2_minus_log10_4_over_3(
+                    int(max_exponent - significand_bits));
+                constexpr auto b =
+                    -log::floor_log10_pow2(int(max_exponent - significand_bits)) + kappa;
+                return a < b ? a : b;
+            }();
+            static_assert(min_k >= cache_holder<format>::min_k);
+
+            static constexpr int max_k = [] {
+                // We do invoke shorter_interval_case for exponent == min_exponent case,
+                // so we should not add 1 here.
+                constexpr auto a = -log::floor_log10_pow2_minus_log10_4_over_3(
+                    int(min_exponent - significand_bits /*+ 1*/));
+                constexpr auto b =
+                    -log::floor_log10_pow2(int(min_exponent - significand_bits)) + kappa;
+                return a > b ? a : b;
+            }();
+            static_assert(max_k <= cache_holder<format>::max_k);
+
+            using cache_entry_type = typename cache_holder<format>::cache_entry_type;
+            static constexpr auto cache_bits = cache_holder<format>::cache_bits;
+
+            static constexpr int case_shorter_interval_left_endpoint_lower_threshold = 2;
+            static constexpr int case_shorter_interval_left_endpoint_upper_threshold =
+                2 +
+                log::floor_log2(
+                    compute_power<
+                        count_factors<5>((carrier_uint(1) << (significand_bits + 2)) - 1) + 1>(10) /
+                    3);
+
+            static constexpr int case_shorter_interval_right_endpoint_lower_threshold = 0;
+            static constexpr int case_shorter_interval_right_endpoint_upper_threshold =
+                2 +
+                log::floor_log2(
+                    compute_power<
+                        count_factors<5>((carrier_uint(1) << (significand_bits + 1)) + 1) + 1>(10) /
+                    3);
+
+            static constexpr int shorter_interval_tie_lower_threshold =
+                -log::floor_log5_pow2_minus_log5_3(significand_bits + 4) - 2 - significand_bits;
+            static constexpr int shorter_interval_tie_upper_threshold =
+                -log::floor_log5_pow2(significand_bits + 2) - 2 - significand_bits;
+
+            struct compute_mul_result {
+                carrier_uint result;
+                bool is_integer;
+            };
+            struct compute_mul_parity_result {
+                bool parity;
+                bool is_integer;
+            };
+
+            //// The main algorithm assumes the input is a normal/subnormal finite number
+
+            template <class ReturnType, class IntervalType, class TrailingZeroPolicy,
+                      class BinaryToDecimalRoundingPolicy, class CachePolicy,
+                      class... AdditionalArgs>
+            JKJ_SAFEBUFFERS static ReturnType
+            compute_nearest_normal(carrier_uint const two_fc, int const exponent,
+                                   AdditionalArgs... additional_args) noexcept {
+                //////////////////////////////////////////////////////////////////////
+                // Step 1: Schubfach multiplier calculation
+                //////////////////////////////////////////////////////////////////////
+
+                ReturnType ret_value;
+                IntervalType interval_type{additional_args...};
+
+                // Compute k and beta.
+                int const minus_k = log::floor_log10_pow2(exponent) - kappa;
+                auto const cache = CachePolicy::template get_cache<format>(-minus_k);
+                int const beta = exponent + log::floor_log2_pow10(-minus_k);
+
+                // Compute zi and deltai.
+                // 10^kappa <= deltai < 10^(kappa + 1)
+                auto const deltai = compute_delta(cache, beta);
+                // For the case of binary32, the result of integer check is not correct for
+                // 29711844 * 2^-82
+                // = 6.1442653300000000008655037797566933477355632930994033813476... * 10^-18
+                // and 29711844 * 2^-81
+                // = 1.2288530660000000001731007559513386695471126586198806762695... * 10^-17,
+                // and they are the unique counterexamples. However, since 29711844 is even,
+                // this does not cause any problem for the endpoints calculations; it can only
+                // cause a problem when we need to perform integer check for the center.
+                // Fortunately, with these inputs, that branch is never executed, so we are fine.
+                auto const [zi, is_z_integer] = compute_mul((two_fc | 1) << beta, cache);
+
+
+                //////////////////////////////////////////////////////////////////////
+                // Step 2: Try larger divisor; remove trailing zeros if necessary
+                //////////////////////////////////////////////////////////////////////
+
+                constexpr auto big_divisor = compute_power<kappa + 1>(std::uint32_t(10));
+                constexpr auto small_divisor = compute_power<kappa>(std::uint32_t(10));
+
+                // Using an upper bound on zi, we might be able to optimize the division
+                // better than the compiler; we are computing zi / big_divisor here.
+                ret_value.significand =
+                    div::divide_by_pow10<kappa + 1, carrier_uint,
+                                         (carrier_uint(1) << (significand_bits + 1)) * big_divisor -
+                                             1>(zi);
+                auto r = std::uint32_t(zi - big_divisor * ret_value.significand);
+
+                if (r < deltai) {
+                    // Exclude the right endpoint if necessary.
+                    if (r == 0 && (is_z_integer & !interval_type.include_right_endpoint())) {
+                        if constexpr (BinaryToDecimalRoundingPolicy::tag ==
+                                      policy_impl::binary_to_decimal_rounding::tag_t::do_not_care) {
+                            ret_value.significand *= 10;
+                            ret_value.exponent = minus_k + kappa;
+                            --ret_value.significand;
+                            TrailingZeroPolicy::template no_trailing_zeros<impl>(ret_value);
+                            return ret_value;
+                        }
+                        else {
+                            --ret_value.significand;
+                            r = big_divisor;
+                            goto small_divisor_case_label;
+                        }
+                    }
+                }
+                else if (r > deltai) {
+                    goto small_divisor_case_label;
+                }
+                else {
+                    // r == deltai; compare fractional parts.
+                    auto const [xi_parity, x_is_integer] =
+                        compute_mul_parity(two_fc - 1, cache, beta);
+
+                    if (!(xi_parity | (x_is_integer & interval_type.include_left_endpoint()))) {
+                        goto small_divisor_case_label;
+                    }
+                }
+                ret_value.exponent = minus_k + kappa + 1;
+
+                // We may need to remove trailing zeros.
+                TrailingZeroPolicy::template on_trailing_zeros<impl>(ret_value);
+                return ret_value;
+
+
+                //////////////////////////////////////////////////////////////////////
+                // Step 3: Find the significand with the smaller divisor
+                //////////////////////////////////////////////////////////////////////
+
+            small_divisor_case_label:
+                TrailingZeroPolicy::template no_trailing_zeros<impl>(ret_value);
+                ret_value.significand *= 10;
+                ret_value.exponent = minus_k + kappa;
+
+                if constexpr (BinaryToDecimalRoundingPolicy::tag ==
+                              policy_impl::binary_to_decimal_rounding::tag_t::do_not_care) {
+                    // Normally, we want to compute
+                    // ret_value.significand += r / small_divisor
+                    // and return, but we need to take care of the case that the resulting
+                    // value is exactly the right endpoint, while that is not included in the
+                    // interval.
+                    if (!interval_type.include_right_endpoint()) {
+                        // Is r divisible by 10^kappa?
+                        if (is_z_integer && div::check_divisibility_and_divide_by_pow10<kappa>(r)) {
+                            // This should be in the interval.
+                            ret_value.significand += r - 1;
+                        }
+                        else {
+                            ret_value.significand += r;
+                        }
+                    }
+                    else {
+                        ret_value.significand += div::small_division_by_pow10<kappa>(r);
+                    }
+                }
+                else {
+                    auto dist = r - (deltai / 2) + (small_divisor / 2);
+                    bool const approx_y_parity = ((dist ^ (small_divisor / 2)) & 1) != 0;
+
+                    // Is dist divisible by 10^kappa?
+                    bool const divisible_by_small_divisor =
+                        div::check_divisibility_and_divide_by_pow10<kappa>(dist);
+
+                    // Add dist / 10^kappa to the significand.
+                    ret_value.significand += dist;
+
+                    if (divisible_by_small_divisor) {
+                        // Check z^(f) >= epsilon^(f).
+                        // We have either yi == zi - epsiloni or yi == (zi - epsiloni) - 1,
+                        // where yi == zi - epsiloni if and only if z^(f) >= epsilon^(f).
+                        // Since there are only 2 possibilities, we only need to care about the
+                        // parity. Also, zi and r should have the same parity since the divisor is
+                        // an even number.
+                        auto const [yi_parity, is_y_integer] =
+                            compute_mul_parity(two_fc, cache, beta);
+                        if (yi_parity != approx_y_parity) {
+                            --ret_value.significand;
+                        }
+                        else {
+                            // If z^(f) >= epsilon^(f), we might have a tie
+                            // when z^(f) == epsilon^(f), or equivalently, when y is an integer.
+                            // For tie-to-up case, we can just choose the upper one.
+                            if (BinaryToDecimalRoundingPolicy::prefer_round_down(ret_value) &
+                                is_y_integer) {
+                                --ret_value.significand;
+                            }
+                        }
+                    }
+                }
+                return ret_value;
+            }
+
+            template <class ReturnType, class IntervalType, class TrailingZeroPolicy,
+                      class BinaryToDecimalRoundingPolicy, class CachePolicy,
+                      class... AdditionalArgs>
+            JKJ_SAFEBUFFERS static ReturnType
+            compute_nearest_shorter(int const exponent,
+                                    AdditionalArgs... additional_args) noexcept {
+                ReturnType ret_value;
+                IntervalType interval_type{additional_args...};
+
+                // Compute k and beta.
+                int const minus_k = log::floor_log10_pow2_minus_log10_4_over_3(exponent);
+                int const beta = exponent + log::floor_log2_pow10(-minus_k);
+
+                // Compute xi and zi.
+                auto const cache = CachePolicy::template get_cache<format>(-minus_k);
+
+                auto xi = compute_left_endpoint_for_shorter_interval_case(cache, beta);
+                auto zi = compute_right_endpoint_for_shorter_interval_case(cache, beta);
+
+                // If we don't accept the right endpoint and
+                // if the right endpoint is an integer, decrease it.
+                if (!interval_type.include_right_endpoint() &&
+                    is_right_endpoint_integer_shorter_interval(exponent)) {
+                    --zi;
+                }
+                // If we don't accept the left endpoint or
+                // if the left endpoint is not an integer, increase it.
+                if (!interval_type.include_left_endpoint() ||
+                    !is_left_endpoint_integer_shorter_interval(exponent)) {
+                    ++xi;
+                }
+
+                // Try bigger divisor.
+                ret_value.significand = zi / 10;
+
+                // If succeed, remove trailing zeros if necessary and return.
+                if (ret_value.significand * 10 >= xi) {
+                    ret_value.exponent = minus_k + 1;
+                    TrailingZeroPolicy::template on_trailing_zeros<impl>(ret_value);
+                    return ret_value;
+                }
+
+                // Otherwise, compute the round-up of y.
+                TrailingZeroPolicy::template no_trailing_zeros<impl>(ret_value);
+                ret_value.significand = compute_round_up_for_shorter_interval_case(cache, beta);
+                ret_value.exponent = minus_k;
+
+                // When tie occurs, choose one of them according to the rule.
+                if (BinaryToDecimalRoundingPolicy::prefer_round_down(ret_value) &&
+                    exponent >= shorter_interval_tie_lower_threshold &&
+                    exponent <= shorter_interval_tie_upper_threshold) {
+                    --ret_value.significand;
+                }
+                else if (ret_value.significand < xi) {
+                    ++ret_value.significand;
+                }
+                return ret_value;
+            }
+
+            template <class ReturnType, class TrailingZeroPolicy, class CachePolicy>
+            JKJ_SAFEBUFFERS static ReturnType
+            compute_left_closed_directed(carrier_uint const two_fc, int exponent) noexcept {
+                //////////////////////////////////////////////////////////////////////
+                // Step 1: Schubfach multiplier calculation
+                //////////////////////////////////////////////////////////////////////
+
+                ReturnType ret_value;
+
+                // Compute k and beta.
+                int const minus_k = log::floor_log10_pow2(exponent) - kappa;
+                auto const cache = CachePolicy::template get_cache<format>(-minus_k);
+                int const beta = exponent + log::floor_log2_pow10(-minus_k);
+
+                // Compute xi and deltai.
+                // 10^kappa <= deltai < 10^(kappa + 1)
+                auto const deltai = compute_delta(cache, beta);
+                auto [xi, is_x_integer] = compute_mul(two_fc << beta, cache);
+
+                // Deal with the unique exceptional cases
+                // 29711844 * 2^-82
+                // = 6.1442653300000000008655037797566933477355632930994033813476... * 10^-18
+                // and 29711844 * 2^-81
+                // = 1.2288530660000000001731007559513386695471126586198806762695... * 10^-17
+                // for binary32.
+                if constexpr (std::is_same_v<format, ieee754_binary32>) {
+                    if (exponent <= -80) {
+                        is_x_integer = false;
+                    }
+                }
+
+                if (!is_x_integer) {
+                    ++xi;
+                }
+
+                //////////////////////////////////////////////////////////////////////
+                // Step 2: Try larger divisor; remove trailing zeros if necessary
+                //////////////////////////////////////////////////////////////////////
+
+                constexpr auto big_divisor = compute_power<kappa + 1>(std::uint32_t(10));
+
+                // Using an upper bound on xi, we might be able to optimize the division
+                // better than the compiler; we are computing xi / big_divisor here.
+                ret_value.significand =
+                    div::divide_by_pow10<kappa + 1, carrier_uint,
+                                         (carrier_uint(1) << (significand_bits + 1)) * big_divisor -
+                                             1>(xi);
+                auto r = std::uint32_t(xi - big_divisor * ret_value.significand);
+
+                if (r != 0) {
+                    ++ret_value.significand;
+                    r = big_divisor - r;
+                }
+
+                if (r > deltai) {
+                    goto small_divisor_case_label;
+                }
+                else if (r == deltai) {
+                    // Compare the fractional parts.
+                    // This branch is never taken for the exceptional cases
+                    // 2f_c = 29711482, e = -81
+                    // (6.1442649164096937243516663440523473127541365101933479309082... * 10^-18)
+                    // and 2f_c = 29711482, e = -80
+                    // (1.2288529832819387448703332688104694625508273020386695861816... * 10^-17).
+                    auto const [zi_parity, is_z_integer] =
+                        compute_mul_parity(two_fc + 2, cache, beta);
+                    if (zi_parity || is_z_integer) {
+                        goto small_divisor_case_label;
+                    }
+                }
+
+                // The ceiling is inside, so we are done.
+                ret_value.exponent = minus_k + kappa + 1;
+                TrailingZeroPolicy::template on_trailing_zeros<impl>(ret_value);
+                return ret_value;
+
+
+                //////////////////////////////////////////////////////////////////////
+                // Step 3: Find the significand with the smaller divisor
+                //////////////////////////////////////////////////////////////////////
+
+            small_divisor_case_label:
+                ret_value.significand *= 10;
+                ret_value.significand -= div::small_division_by_pow10<kappa>(r);
+                ret_value.exponent = minus_k + kappa;
+                TrailingZeroPolicy::template no_trailing_zeros<impl>(ret_value);
+                return ret_value;
+            }
+
+            template <class ReturnType, class TrailingZeroPolicy, class CachePolicy>
+            JKJ_SAFEBUFFERS static ReturnType
+            compute_right_closed_directed(carrier_uint const two_fc, int const exponent,
+                                          bool shorter_interval) noexcept {
+                //////////////////////////////////////////////////////////////////////
+                // Step 1: Schubfach multiplier calculation
+                //////////////////////////////////////////////////////////////////////
+
+                ReturnType ret_value;
+
+                // Compute k and beta.
+                int const minus_k =
+                    log::floor_log10_pow2(exponent - (shorter_interval ? 1 : 0)) - kappa;
+                auto const cache = CachePolicy::template get_cache<format>(-minus_k);
+                int const beta = exponent + log::floor_log2_pow10(-minus_k);
+
+                // Compute zi and deltai.
+                // 10^kappa <= deltai < 10^(kappa + 1)
+                auto const deltai =
+                    shorter_interval ? compute_delta(cache, beta - 1) : compute_delta(cache, beta);
+                carrier_uint const zi = compute_mul(two_fc << beta, cache).result;
+
+
+                //////////////////////////////////////////////////////////////////////
+                // Step 2: Try larger divisor; remove trailing zeros if necessary
+                //////////////////////////////////////////////////////////////////////
+
+                constexpr auto big_divisor = compute_power<kappa + 1>(std::uint32_t(10));
+
+                // Using an upper bound on zi, we might be able to optimize the division better than
+                // the compiler; we are computing zi / big_divisor here.
+                ret_value.significand =
+                    div::divide_by_pow10<kappa + 1, carrier_uint,
+                                         (carrier_uint(1) << (significand_bits + 1)) * big_divisor -
+                                             1>(zi);
+                auto const r = std::uint32_t(zi - big_divisor * ret_value.significand);
+
+                if (r > deltai) {
+                    goto small_divisor_case_label;
+                }
+                else if (r == deltai) {
+                    // Compare the fractional parts.
+                    if (!compute_mul_parity(two_fc - (shorter_interval ? 1 : 2), cache, beta)
+                             .parity) {
+                        goto small_divisor_case_label;
+                    }
+                }
+
+                // The floor is inside, so we are done.
+                ret_value.exponent = minus_k + kappa + 1;
+                TrailingZeroPolicy::template on_trailing_zeros<impl>(ret_value);
+                return ret_value;
+
+
+                //////////////////////////////////////////////////////////////////////
+                // Step 3: Find the significand with the small divisor
+                //////////////////////////////////////////////////////////////////////
+
+            small_divisor_case_label:
+                ret_value.significand *= 10;
+                ret_value.significand += div::small_division_by_pow10<kappa>(r);
+                ret_value.exponent = minus_k + kappa;
+                TrailingZeroPolicy::template no_trailing_zeros<impl>(ret_value);
+                return ret_value;
+            }
+
+            // Remove trailing zeros from n and return the number of zeros removed.
+            JKJ_FORCEINLINE static int remove_trailing_zeros(carrier_uint& n) noexcept {
+                assert(n != 0);
+
+                if constexpr (std::is_same_v<format, ieee754_binary32>) {
+                    constexpr auto mod_inv_5 = std::uint32_t(0xcccc'cccd);
+                    constexpr auto mod_inv_25 = mod_inv_5 * mod_inv_5;
+
+                    int s = 0;
+                    while (true) {
+                        auto q = bits::rotr(n * mod_inv_25, 2);
+                        if (q <= std::numeric_limits<std::uint32_t>::max() / 100) {
+                            n = q;
+                            s += 2;
+                        }
+                        else {
+                            break;
+                        }
+                    }
+                    auto q = bits::rotr(n * mod_inv_5, 1);
+                    if (q <= std::numeric_limits<std::uint32_t>::max() / 10) {
+                        n = q;
+                        s |= 1;
+                    }
+
+                    return s;
+                }
+                else {
+                    static_assert(std::is_same_v<format, ieee754_binary64>);
+
+                    // Divide by 10^8 and reduce to 32-bits if divisible.
+                    // Since ret_value.significand <= (2^53 * 1000 - 1) / 1000 < 10^16,
+                    // n is at most of 16 digits.
+
+                    // This magic number is ceil(2^90 / 10^8).
+                    constexpr auto magic_number = std::uint64_t(12379400392853802749ull);
+                    auto nm = wuint::umul128(n, magic_number);
+
+                    // Is n is divisible by 10^8?
+                    if ((nm.high() & ((std::uint64_t(1) << (90 - 64)) - 1)) == 0 &&
+                        nm.low() < magic_number) {
+                        // If yes, work with the quotient.
+                        auto n32 = std::uint32_t(nm.high() >> (90 - 64));
+
+                        constexpr auto mod_inv_5 = std::uint32_t(0xcccc'cccd);
+                        constexpr auto mod_inv_25 = mod_inv_5 * mod_inv_5;
+
+                        int s = 8;
+                        while (true) {
+                            auto q = bits::rotr(n32 * mod_inv_25, 2);
+                            if (q <= std::numeric_limits<std::uint32_t>::max() / 100) {
+                                n32 = q;
+                                s += 2;
+                            }
+                            else {
+                                break;
+                            }
+                        }
+                        auto q = bits::rotr(n32 * mod_inv_5, 1);
+                        if (q <= std::numeric_limits<std::uint32_t>::max() / 10) {
+                            n32 = q;
+                            s |= 1;
+                        }
+
+                        n = n32;
+                        return s;
+                    }
+
+                    // If n is not divisible by 10^8, work with n itself.
+                    constexpr auto mod_inv_5 = std::uint64_t(0xcccc'cccc'cccc'cccd);
+                    constexpr auto mod_inv_25 = mod_inv_5 * mod_inv_5;
+
+                    int s = 0;
+                    while (true) {
+                        auto q = bits::rotr(n * mod_inv_25, 2);
+                        if (q <= std::numeric_limits<std::uint64_t>::max() / 100) {
+                            n = q;
+                            s += 2;
+                        }
+                        else {
+                            break;
+                        }
+                    }
+                    auto q = bits::rotr(n * mod_inv_5, 1);
+                    if (q <= std::numeric_limits<std::uint64_t>::max() / 10) {
+                        n = q;
+                        s |= 1;
+                    }
+
+                    return s;
+                }
+            }
+
+            static compute_mul_result compute_mul(carrier_uint u,
+                                                  cache_entry_type const& cache) noexcept {
+                if constexpr (std::is_same_v<format, ieee754_binary32>) {
+                    auto r = wuint::umul96_upper64(u, cache);
+                    return {carrier_uint(r >> 32), carrier_uint(r) == 0};
+                }
+                else {
+                    static_assert(std::is_same_v<format, ieee754_binary64>);
+                    auto r = wuint::umul192_upper128(u, cache);
+                    return {r.high(), r.low() == 0};
+                }
+            }
+
+            static constexpr std::uint32_t compute_delta(cache_entry_type const& cache,
+                                                         int beta) noexcept {
+                if constexpr (std::is_same_v<format, ieee754_binary32>) {
+                    return std::uint32_t(cache >> (cache_bits - 1 - beta));
+                }
+                else {
+                    static_assert(std::is_same_v<format, ieee754_binary64>);
+                    return std::uint32_t(cache.high() >> (carrier_bits - 1 - beta));
+                }
+            }
+
+            static compute_mul_parity_result compute_mul_parity(carrier_uint two_f,
+                                                                cache_entry_type const& cache,
+                                                                int beta) noexcept {
+                assert(beta >= 1);
+                assert(beta < 64);
+
+                if constexpr (std::is_same_v<format, ieee754_binary32>) {
+                    auto r = wuint::umul96_lower64(two_f, cache);
+                    return {((r >> (64 - beta)) & 1) != 0, std::uint32_t(r >> (32 - beta)) == 0};
+                }
+                else {
+                    static_assert(std::is_same_v<format, ieee754_binary64>);
+                    auto r = wuint::umul192_lower128(two_f, cache);
+                    return {((r.high() >> (64 - beta)) & 1) != 0,
+                            ((r.high() << beta) | (r.low() >> (64 - beta))) == 0};
+                }
+            }
+
+            static constexpr carrier_uint
+            compute_left_endpoint_for_shorter_interval_case(cache_entry_type const& cache,
+                                                            int beta) noexcept {
+                if constexpr (std::is_same_v<format, ieee754_binary32>) {
+                    return carrier_uint((cache - (cache >> (significand_bits + 2))) >>
+                                        (cache_bits - significand_bits - 1 - beta));
+                }
+                else {
+                    static_assert(std::is_same_v<format, ieee754_binary64>);
+                    return (cache.high() - (cache.high() >> (significand_bits + 2))) >>
+                           (carrier_bits - significand_bits - 1 - beta);
+                }
+            }
+
+            static constexpr carrier_uint
+            compute_right_endpoint_for_shorter_interval_case(cache_entry_type const& cache,
+                                                             int beta) noexcept {
+                if constexpr (std::is_same_v<format, ieee754_binary32>) {
+                    return carrier_uint((cache + (cache >> (significand_bits + 1))) >>
+                                        (cache_bits - significand_bits - 1 - beta));
+                }
+                else {
+                    static_assert(std::is_same_v<format, ieee754_binary64>);
+                    return (cache.high() + (cache.high() >> (significand_bits + 1))) >>
+                           (carrier_bits - significand_bits - 1 - beta);
+                }
+            }
+
+            static constexpr carrier_uint
+            compute_round_up_for_shorter_interval_case(cache_entry_type const& cache,
+                                                       int beta) noexcept {
+                if constexpr (std::is_same_v<format, ieee754_binary32>) {
+                    return (carrier_uint(cache >> (cache_bits - significand_bits - 2 - beta)) + 1) /
+                           2;
+                }
+                else {
+                    static_assert(std::is_same_v<format, ieee754_binary64>);
+                    return ((cache.high() >> (carrier_bits - significand_bits - 2 - beta)) + 1) / 2;
+                }
+            }
+
+            static constexpr bool
+            is_right_endpoint_integer_shorter_interval(int exponent) noexcept {
+                return exponent >= case_shorter_interval_right_endpoint_lower_threshold &&
+                       exponent <= case_shorter_interval_right_endpoint_upper_threshold;
+            }
+
+            static constexpr bool is_left_endpoint_integer_shorter_interval(int exponent) noexcept {
+                return exponent >= case_shorter_interval_left_endpoint_lower_threshold &&
+                       exponent <= case_shorter_interval_left_endpoint_upper_threshold;
+            }
+        };
+
+
+        ////////////////////////////////////////////////////////////////////////////////////////
+        // Policy holder.
+        ////////////////////////////////////////////////////////////////////////////////////////
+
+        namespace policy_impl {
+            // The library will specify a list of accepted kinds of policies and their defaults, and
+            // the user will pass a list of policies. The aim of helper classes/functions here is to
+            // do the following:
+            //   1. Check if the policy parameters given by the user are all valid; that means,
+            //      each of them should be of the kinds specified by the library.
+            //      If that's not the case, then the compilation fails.
+            //   2. Check if multiple policy parameters for the same kind is specified by the user.
+            //      If that's the case, then the compilation fails.
+            //   3. Build a class deriving from all policies the user have given, and also from
+            //      the default policies if the user did not specify one for some kinds.
+            // A policy belongs to a certain kind if it is deriving from a base class.
+
+            // For a given kind, find a policy belonging to that kind.
+            // Check if there are more than one such policies.
+            enum class policy_found_info { not_found, unique, repeated };
+            template <class Policy, policy_found_info info>
+            struct found_policy_pair {
+                using policy = Policy;
+                static constexpr auto found_info = info;
+            };
+
+            template <class Base, class DefaultPolicy>
+            struct base_default_pair {
+                using base = Base;
+
+                template <class FoundPolicyInfo>
+                static constexpr FoundPolicyInfo get_policy_impl(FoundPolicyInfo) {
+                    return {};
+                }
+                template <class FoundPolicyInfo, class FirstPolicy, class... RemainingPolicies>
+                static constexpr auto get_policy_impl(FoundPolicyInfo, FirstPolicy,
+                                                      RemainingPolicies... remainings) {
+                    if constexpr (std::is_base_of_v<Base, FirstPolicy>) {
+                        if constexpr (FoundPolicyInfo::found_info == policy_found_info::not_found) {
+                            return get_policy_impl(
+                                found_policy_pair<FirstPolicy, policy_found_info::unique>{},
+                                remainings...);
+                        }
+                        else {
+                            return get_policy_impl(
+                                found_policy_pair<FirstPolicy, policy_found_info::repeated>{},
+                                remainings...);
+                        }
+                    }
+                    else {
+                        return get_policy_impl(FoundPolicyInfo{}, remainings...);
+                    }
+                }
+
+                template <class... Policies>
+                static constexpr auto get_policy(Policies... policies) {
+                    return get_policy_impl(
+                        found_policy_pair<DefaultPolicy, policy_found_info::not_found>{},
+                        policies...);
+                }
+            };
+            template <class... BaseDefaultPairs>
+            struct base_default_pair_list {};
+
+            // Check if a given policy belongs to one of the kinds specified by the library.
+            template <class Policy>
+            constexpr bool check_policy_validity(Policy, base_default_pair_list<>) {
+                return false;
+            }
+            template <class Policy, class FirstBaseDefaultPair, class... RemainingBaseDefaultPairs>
+            constexpr bool check_policy_validity(
+                Policy,
+                base_default_pair_list<FirstBaseDefaultPair, RemainingBaseDefaultPairs...>) {
+                return std::is_base_of_v<typename FirstBaseDefaultPair::base, Policy> ||
+                       check_policy_validity(
+                           Policy{}, base_default_pair_list<RemainingBaseDefaultPairs...>{});
+            }
+
+            template <class BaseDefaultPairList>
+            constexpr bool check_policy_list_validity(BaseDefaultPairList) {
+                return true;
+            }
+
+            template <class BaseDefaultPairList, class FirstPolicy, class... RemainingPolicies>
+            constexpr bool check_policy_list_validity(BaseDefaultPairList, FirstPolicy,
+                                                      RemainingPolicies... remaining_policies) {
+                return check_policy_validity(FirstPolicy{}, BaseDefaultPairList{}) &&
+                       check_policy_list_validity(BaseDefaultPairList{}, remaining_policies...);
+            }
+
+            // Build policy_holder.
+            template <bool repeated_, class... FoundPolicyPairs>
+            struct found_policy_pair_list {
+                static constexpr bool repeated = repeated_;
+            };
+
+            template <class... Policies>
+            struct policy_holder : Policies... {};
+
+            template <bool repeated, class... FoundPolicyPairs, class... Policies>
+            constexpr auto
+            make_policy_holder_impl(base_default_pair_list<>,
+                                    found_policy_pair_list<repeated, FoundPolicyPairs...>,
+                                    Policies...) {
+                return found_policy_pair_list<repeated, FoundPolicyPairs...>{};
+            }
+
+            template <class FirstBaseDefaultPair, class... RemainingBaseDefaultPairs, bool repeated,
+                      class... FoundPolicyPairs, class... Policies>
+            constexpr auto make_policy_holder_impl(
+                base_default_pair_list<FirstBaseDefaultPair, RemainingBaseDefaultPairs...>,
+                found_policy_pair_list<repeated, FoundPolicyPairs...>, Policies... policies) {
+                using new_found_policy_pair =
+                    decltype(FirstBaseDefaultPair::get_policy(policies...));
+
+                return make_policy_holder_impl(
+                    base_default_pair_list<RemainingBaseDefaultPairs...>{},
+                    found_policy_pair_list < repeated ||
+                        new_found_policy_pair::found_info == policy_found_info::repeated,
+                    new_found_policy_pair, FoundPolicyPairs... > {}, policies...);
+            }
+
+            template <bool repeated, class... RawPolicies>
+            constexpr auto convert_to_policy_holder(found_policy_pair_list<repeated>,
+                                                    RawPolicies...) {
+                return policy_holder<RawPolicies...>{};
+            }
+
+            template <bool repeated, class FirstFoundPolicyPair, class... RemainingFoundPolicyPairs,
+                      class... RawPolicies>
+            constexpr auto
+            convert_to_policy_holder(found_policy_pair_list<repeated, FirstFoundPolicyPair,
+                                                            RemainingFoundPolicyPairs...>,
+                                     RawPolicies... policies) {
+                return convert_to_policy_holder(
+                    found_policy_pair_list<repeated, RemainingFoundPolicyPairs...>{},
+                    typename FirstFoundPolicyPair::policy{}, policies...);
+            }
+
+            template <class BaseDefaultPairList, class... Policies>
+            constexpr auto make_policy_holder(BaseDefaultPairList, Policies... policies) {
+                static_assert(check_policy_list_validity(BaseDefaultPairList{}, Policies{}...),
+                              "jkj::dragonbox: an invalid policy is specified");
+
+                using policy_pair_list = decltype(make_policy_holder_impl(
+                    BaseDefaultPairList{}, found_policy_pair_list<false>{}, policies...));
+
+                static_assert(!policy_pair_list::repeated,
+                              "jkj::dragonbox: each policy should be specified at most once");
+
+                return convert_to_policy_holder(policy_pair_list{});
+            }
+        }
+    }
+
+
+    ////////////////////////////////////////////////////////////////////////////////////////
+    // The interface function.
+    ////////////////////////////////////////////////////////////////////////////////////////
+
+    template <class Float, class FloatTraits = default_float_traits<Float>, class... Policies>
+    JKJ_FORCEINLINE JKJ_SAFEBUFFERS auto
+    to_decimal(signed_significand_bits<Float, FloatTraits> signed_significand_bits,
+               unsigned int exponent_bits, Policies... policies) noexcept {
+        // Build policy holder type.
+        using namespace detail::policy_impl;
+        using policy_holder = decltype(make_policy_holder(
+            base_default_pair_list<base_default_pair<sign::base, sign::return_sign>,
+                                   base_default_pair<trailing_zero::base, trailing_zero::remove>,
+                                   base_default_pair<decimal_to_binary_rounding::base,
+                                                     decimal_to_binary_rounding::nearest_to_even>,
+                                   base_default_pair<binary_to_decimal_rounding::base,
+                                                     binary_to_decimal_rounding::to_even>,
+                                   base_default_pair<cache::base, cache::full>>{},
+            policies...));
+
+        using return_type =
+            decimal_fp<typename FloatTraits::carrier_uint, policy_holder::return_has_sign,
+                       policy_holder::report_trailing_zeros>;
+
+        return_type ret = policy_holder::delegate(
+            signed_significand_bits,
+            [exponent_bits, signed_significand_bits](auto interval_type_provider) {
+                using format = typename FloatTraits::format;
+                constexpr auto tag = decltype(interval_type_provider)::tag;
+
+                auto two_fc = signed_significand_bits.remove_sign_bit_and_shift();
+                auto exponent = int(exponent_bits);
+
+                if constexpr (tag == decimal_to_binary_rounding::tag_t::to_nearest) {
+                    // Is the input a normal number?
+                    if (exponent != 0) {
+                        exponent += format::exponent_bias - format::significand_bits;
+
+                        // Shorter interval case; proceed like Schubfach.
+                        // One might think this condition is wrong, since when exponent_bits == 1
+                        // and two_fc == 0, the interval is actually regular. However, it turns out
+                        // that this seemingly wrong condition is actually fine, because the end
+                        // result is anyway the same.
+                        //
+                        // [binary32]
+                        // (fc-1/2) * 2^e = 1.175'494'28... * 10^-38
+                        // (fc-1/4) * 2^e = 1.175'494'31... * 10^-38
+                        //    fc    * 2^e = 1.175'494'35... * 10^-38
+                        // (fc+1/2) * 2^e = 1.175'494'42... * 10^-38
+                        //
+                        // Hence, shorter_interval_case will return 1.175'494'4 * 10^-38.
+                        // 1.175'494'3 * 10^-38 is also a correct shortest representation that will
+                        // be rejected if we assume shorter interval, but 1.175'494'4 * 10^-38 is
+                        // closer to the true value so it doesn't matter.
+                        //
+                        // [binary64]
+                        // (fc-1/2) * 2^e = 2.225'073'858'507'201'13... * 10^-308
+                        // (fc-1/4) * 2^e = 2.225'073'858'507'201'25... * 10^-308
+                        //    fc    * 2^e = 2.225'073'858'507'201'38... * 10^-308
+                        // (fc+1/2) * 2^e = 2.225'073'858'507'201'63... * 10^-308
+                        //
+                        // Hence, shorter_interval_case will return 2.225'073'858'507'201'4 *
+                        // 10^-308. This is indeed of the shortest length, and it is the unique one
+                        // closest to the true value among valid representations of the same length.
+                        static_assert(std::is_same_v<format, ieee754_binary32> ||
+                                      std::is_same_v<format, ieee754_binary64>);
+
+                        if (two_fc == 0) {
+                            return decltype(interval_type_provider)::invoke_shorter_interval_case(
+                                signed_significand_bits, [exponent](auto... additional_args) {
+                                    return detail::impl<Float, FloatTraits>::
+                                        template compute_nearest_shorter<
+                                            return_type,
+                                            typename decltype(interval_type_provider)::
+                                                shorter_interval_type,
+                                            typename policy_holder::trailing_zero_policy,
+                                            typename policy_holder::
+                                                binary_to_decimal_rounding_policy,
+                                            typename policy_holder::cache_policy>(
+                                            exponent, additional_args...);
+                                });
+                        }
+
+                        two_fc |= (decltype(two_fc)(1) << (format::significand_bits + 1));
+                    }
+                    // Is the input a subnormal number?
+                    else {
+                        exponent = format::min_exponent - format::significand_bits;
+                    }
+
+                    return decltype(interval_type_provider)::invoke_normal_interval_case(
+                        signed_significand_bits, [two_fc, exponent](auto... additional_args) {
+                            return detail::impl<Float, FloatTraits>::
+                                template compute_nearest_normal<
+                                    return_type,
+                                    typename decltype(interval_type_provider)::normal_interval_type,
+                                    typename policy_holder::trailing_zero_policy,
+                                    typename policy_holder::binary_to_decimal_rounding_policy,
+                                    typename policy_holder::cache_policy>(two_fc, exponent,
+                                                                          additional_args...);
+                        });
+                }
+                else if constexpr (tag == decimal_to_binary_rounding::tag_t::left_closed_directed) {
+                    // Is the input a normal number?
+                    if (exponent != 0) {
+                        exponent += format::exponent_bias - format::significand_bits;
+                        two_fc |= (decltype(two_fc)(1) << (format::significand_bits + 1));
+                    }
+                    // Is the input a subnormal number?
+                    else {
+                        exponent = format::min_exponent - format::significand_bits;
+                    }
+
+                    return detail::impl<Float>::template compute_left_closed_directed<
+                        return_type, typename policy_holder::trailing_zero_policy,
+                        typename policy_holder::cache_policy>(two_fc, exponent);
+                }
+                else {
+                    static_assert(tag == decimal_to_binary_rounding::tag_t::right_closed_directed);
+
+                    bool shorter_interval = false;
+
+                    // Is the input a normal number?
+                    if (exponent != 0) {
+                        if (two_fc == 0 && exponent != 1) {
+                            shorter_interval = true;
+                        }
+                        exponent += format::exponent_bias - format::significand_bits;
+                        two_fc |= (decltype(two_fc)(1) << (format::significand_bits + 1));
+                    }
+                    // Is the input a subnormal number?
+                    else {
+                        exponent = format::min_exponent - format::significand_bits;
+                    }
+
+                    return detail::impl<Float>::template compute_right_closed_directed<
+                        return_type, typename policy_holder::trailing_zero_policy,
+                        typename policy_holder::cache_policy>(two_fc, exponent, shorter_interval);
+                }
+            });
+
+        policy_holder::handle_sign(signed_significand_bits, ret);
+        return ret;
+    }
+
+    template <class Float, class FloatTraits = default_float_traits<Float>, class... Policies>
+    JKJ_FORCEINLINE JKJ_SAFEBUFFERS auto to_decimal(Float x, Policies... policies) noexcept {
+        auto const br = float_bits<Float, FloatTraits>(x);
+        auto const exponent_bits = br.extract_exponent_bits();
+        auto const s = br.remove_exponent_bits(exponent_bits);
+        assert(br.is_finite());
+
+        return to_decimal<Float, FloatTraits>(s, exponent_bits, policies...);
+    }
+}
+
+#undef JKJ_FORCEINLINE
+#undef JKJ_SAFEBUFFERS
+#undef JKJ_DRAGONBOX_HAS_BUILTIN
+
+#endif
diff --git a/server/dragonbox/dragonbox_to_chars.cpp b/server/dragonbox/dragonbox_to_chars.cpp
new file mode 100644
index 0000000..75b82b6
--- /dev/null
+++ b/server/dragonbox/dragonbox_to_chars.cpp
@@ -0,0 +1,519 @@
+// Copyright 2020-2022 Junekey Jeon
+//
+// The contents of this file may be used under the terms of
+// the Apache License v2.0 with LLVM Exceptions.
+//
+//    (See accompanying file LICENSE-Apache or copy at
+//     https://llvm.org/foundation/relicensing/LICENSE.txt)
+//
+// Alternatively, the contents of this file may be used under the terms of
+// the Boost Software License, Version 1.0.
+//    (See accompanying file LICENSE-Boost or copy at
+//     https://www.boost.org/LICENSE_1_0.txt)
+//
+// Unless required by applicable law or agreed to in writing, this software
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.
+
+
+#include "dragonbox_to_chars.h"
+
+#if defined(__GNUC__) || defined(__clang__)
+    #define JKJ_FORCEINLINE inline __attribute__((always_inline))
+#elif defined(_MSC_VER)
+    #define JKJ_FORCEINLINE __forceinline
+#else
+    #define JKJ_FORCEINLINE inline
+#endif
+
+namespace jkj::dragonbox {
+    namespace to_chars_detail {
+        // These "//"'s are to prevent clang-format to ruin this nice alignment.
+        // Thanks to reddit user u/mcmcc:
+        // https://www.reddit.com/r/cpp/comments/so3wx9/dragonbox_110_is_released_a_fast_floattostring/hw8z26r/?context=3
+        static constexpr char radix_100_table[] = {
+            '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', //
+            '0', '5', '0', '6', '0', '7', '0', '8', '0', '9', //
+            '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', //
+            '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', //
+            '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', //
+            '2', '5', '2', '6', '2', '7', '2', '8', '2', '9', //
+            '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', //
+            '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', //
+            '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', //
+            '4', '5', '4', '6', '4', '7', '4', '8', '4', '9', //
+            '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', //
+            '5', '5', '5', '6', '5', '7', '5', '8', '5', '9', //
+            '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', //
+            '6', '5', '6', '6', '6', '7', '6', '8', '6', '9', //
+            '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', //
+            '7', '5', '7', '6', '7', '7', '7', '8', '7', '9', //
+            '8', '0', '8', '1', '8', '2', '8', '3', '8', '4', //
+            '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', //
+            '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', //
+            '9', '5', '9', '6', '9', '7', '9', '8', '9', '9'  //
+        };
+        static constexpr char radix_100_head_table[] = {
+            '0', '.', '1', '.', '2', '.', '3', '.', '4', '.', //
+            '5', '.', '6', '.', '7', '.', '8', '.', '9', '.', //
+            '1', '.', '1', '.', '1', '.', '1', '.', '1', '.', //
+            '1', '.', '1', '.', '1', '.', '1', '.', '1', '.', //
+            '2', '.', '2', '.', '2', '.', '2', '.', '2', '.', //
+            '2', '.', '2', '.', '2', '.', '2', '.', '2', '.', //
+            '3', '.', '3', '.', '3', '.', '3', '.', '3', '.', //
+            '3', '.', '3', '.', '3', '.', '3', '.', '3', '.', //
+            '4', '.', '4', '.', '4', '.', '4', '.', '4', '.', //
+            '4', '.', '4', '.', '4', '.', '4', '.', '4', '.', //
+            '5', '.', '5', '.', '5', '.', '5', '.', '5', '.', //
+            '5', '.', '5', '.', '5', '.', '5', '.', '5', '.', //
+            '6', '.', '6', '.', '6', '.', '6', '.', '6', '.', //
+            '6', '.', '6', '.', '6', '.', '6', '.', '6', '.', //
+            '7', '.', '7', '.', '7', '.', '7', '.', '7', '.', //
+            '7', '.', '7', '.', '7', '.', '7', '.', '7', '.', //
+            '8', '.', '8', '.', '8', '.', '8', '.', '8', '.', //
+            '8', '.', '8', '.', '8', '.', '8', '.', '8', '.', //
+            '9', '.', '9', '.', '9', '.', '9', '.', '9', '.', //
+            '9', '.', '9', '.', '9', '.', '9', '.', '9', '.'  //
+        };
+
+        // These digit generation routines are inspired by James Anhalt's itoa algorithm:
+        // https://github.com/jeaiii/itoa
+        // The main idea is for given n, find y such that floor(10^k * y / 2^32) = n holds,
+        // where k is an appropriate integer depending on the length of n.
+        // For example, if n = 1234567, we set k = 6. In this case, we have
+        // floor(y / 2^32) = 1,
+        // floor(10^2 * ((10^0 * y) mod 2^32) / 2^32) = 23,
+        // floor(10^2 * ((10^2 * y) mod 2^32) / 2^32) = 45, and
+        // floor(10^2 * ((10^4 * y) mod 2^32) / 2^32) = 67.
+        // See https://jk-jeon.github.io/posts/2022/02/jeaiii-algorithm/ for more explanation.
+
+        JKJ_FORCEINLINE static void print_9_digits(std::uint32_t s32, int& exponent,
+                                                   char*& buffer) noexcept {
+            // -- IEEE-754 binary32
+            // Since we do not cut trailing zeros in advance, s32 must be of 6~9 digits
+            // unless the original input was subnormal.
+            // In particular, when it is of 9 digits it shouldn't have any trailing zeros.
+            // -- IEEE-754 binary64
+            // In this case, s32 must be of 7~9 digits unless the input is subnormal,
+            // and it shouldn't have any trailing zeros if it is of 9 digits.
+            if (s32 >= 1'0000'0000) {
+                // 9 digits.
+                // 1441151882 = ceil(2^57 / 1'0000'0000) + 1
+                auto prod = s32 * std::uint64_t(1441151882);
+                prod >>= 25;
+                std::memcpy(buffer, radix_100_head_table + std::uint32_t(prod >> 32) * 2, 2);
+
+                prod = std::uint32_t(prod) * std::uint64_t(100);
+                std::memcpy(buffer + 2, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+                prod = std::uint32_t(prod) * std::uint64_t(100);
+                std::memcpy(buffer + 4, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+                prod = std::uint32_t(prod) * std::uint64_t(100);
+                std::memcpy(buffer + 6, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+                prod = std::uint32_t(prod) * std::uint64_t(100);
+                std::memcpy(buffer + 8, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+
+                exponent += 8;
+                buffer += 10;
+            }
+            else if (s32 >= 100'0000) {
+                // 7 or 8 digits.
+                // 281474978 = ceil(2^48 / 100'0000) + 1
+                auto prod = s32 * std::uint64_t(281474978);
+                prod >>= 16;
+                auto two_digits = std::uint32_t(prod >> 32);
+                // If s32 is of 8 digits, increase the exponent by 7.
+                // Otherwise, increase it by 6.
+                exponent += (6 + unsigned(two_digits >= 10));
+
+                // Write the first digit and the decimal point.
+                std::memcpy(buffer, radix_100_head_table + two_digits * 2, 2);
+                // This third character may be overwritten later but we don't care.
+                buffer[2] = radix_100_table[two_digits * 2 + 1];
+
+                // Remaining 6 digits are all zero?
+                if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 100'0000)) {
+                    // The number of characters actually written is:
+                    //   1, if only the first digit is nonzero, which means that either s32 is of 7
+                    //   digits or it is of 8 digits but the second digit is zero, or
+                    //   3, otherwise.
+                    // Note that buffer[2] is never zero if s32 is of 7 digits, because the input is
+                    // never zero.
+                    buffer += (1 + (unsigned(two_digits >= 10) & unsigned(buffer[2] > '0')) * 2);
+                }
+                else {
+                    // At least one of the remaining 6 digits are nonzero.
+                    // After this adjustment, now the first destination becomes buffer + 2.
+                    buffer += unsigned(two_digits >= 10);
+
+                    // Obtain the next two digits.
+                    prod = std::uint32_t(prod) * std::uint64_t(100);
+                    two_digits = std::uint32_t(prod >> 32);
+                    std::memcpy(buffer + 2, radix_100_table + two_digits * 2, 2);
+
+                    // Remaining 4 digits are all zero?
+                    if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 1'0000)) {
+                        buffer += (3 + unsigned(buffer[3] > '0'));
+                    }
+                    else {
+                        // At least one of the remaining 4 digits are nonzero.
+
+                        // Obtain the next two digits.
+                        prod = std::uint32_t(prod) * std::uint64_t(100);
+                        two_digits = std::uint32_t(prod >> 32);
+                        std::memcpy(buffer + 4, radix_100_table + two_digits * 2, 2);
+
+                        // Remaining 2 digits are all zero?
+                        if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 100)) {
+                            buffer += (5 + unsigned(buffer[5] > '0'));
+                        }
+                        else {
+                            // Obtain the last two digits.
+                            prod = std::uint32_t(prod) * std::uint64_t(100);
+                            two_digits = std::uint32_t(prod >> 32);
+                            std::memcpy(buffer + 6, radix_100_table + two_digits * 2, 2);
+
+                            buffer += (7 + unsigned(buffer[7] > '0'));
+                        }
+                    }
+                }
+            }
+            else if (s32 >= 1'0000) {
+                // 5 or 6 digits.
+                // 429497 = ceil(2^32 / 1'0000)
+                auto prod = s32 * std::uint64_t(429497);
+                auto two_digits = std::uint32_t(prod >> 32);
+
+                // If s32 is of 6 digits, increase the exponent by 5.
+                // Otherwise, increase it by 4.
+                exponent += (4 + unsigned(two_digits >= 10));
+
+                // Write the first digit and the decimal point.
+                std::memcpy(buffer, radix_100_head_table + two_digits * 2, 2);
+                // This third character may be overwritten later but we don't care.
+                buffer[2] = radix_100_table[two_digits * 2 + 1];
+
+                // Remaining 4 digits are all zero?
+                if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 1'0000)) {
+                    // The number of characters actually written is 1 or 3, similarly to the case of
+                    // 7 or 8 digits.
+                    buffer += (1 + (unsigned(two_digits >= 10) & unsigned(buffer[2] > '0')) * 2);
+                }
+                else {
+                    // At least one of the remaining 4 digits are nonzero.
+                    // After this adjustment, now the first destination becomes buffer + 2.
+                    buffer += unsigned(two_digits >= 10);
+
+                    // Obtain the next two digits.
+                    prod = std::uint32_t(prod) * std::uint64_t(100);
+                    two_digits = std::uint32_t(prod >> 32);
+                    std::memcpy(buffer + 2, radix_100_table + two_digits * 2, 2);
+
+                    // Remaining 2 digits are all zero?
+                    if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 100)) {
+                        buffer += (3 + unsigned(buffer[3] > '0'));
+                    }
+                    else {
+                        // Obtain the last two digits.
+                        prod = std::uint32_t(prod) * std::uint64_t(100);
+                        two_digits = std::uint32_t(prod >> 32);
+                        std::memcpy(buffer + 4, radix_100_table + two_digits * 2, 2);
+
+                        buffer += (5 + unsigned(buffer[5] > '0'));
+                    }
+                }
+            }
+            else if (s32 >= 100) {
+                // 3 or 4 digits.
+                // 42949673 = ceil(2^32 / 100)
+                auto prod = s32 * std::uint64_t(42949673);
+                auto two_digits = std::uint32_t(prod >> 32);
+
+                // If s32 is of 4 digits, increase the exponent by 3.
+                // Otherwise, increase it by 2.
+                exponent += (2 + int(two_digits >= 10));
+
+                // Write the first digit and the decimal point.
+                std::memcpy(buffer, radix_100_head_table + two_digits * 2, 2);
+                // This third character may be overwritten later but we don't care.
+                buffer[2] = radix_100_table[two_digits * 2 + 1];
+
+                // Remaining 2 digits are all zero?
+                if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 100)) {
+                    // The number of characters actually written is 1 or 3, similarly to the case of
+                    // 7 or 8 digits.
+                    buffer += (1 + (unsigned(two_digits >= 10) & unsigned(buffer[2] > '0')) * 2);
+                }
+                else {
+                    // At least one of the remaining 2 digits are nonzero.
+                    // After this adjustment, now the first destination becomes buffer + 2.
+                    buffer += unsigned(two_digits >= 10);
+
+                    // Obtain the last two digits.
+                    prod = std::uint32_t(prod) * std::uint64_t(100);
+                    two_digits = std::uint32_t(prod >> 32);
+                    std::memcpy(buffer + 2, radix_100_table + two_digits * 2, 2);
+
+                    buffer += (3 + unsigned(buffer[3] > '0'));
+                }
+            }
+            else {
+                // 1 or 2 digits.
+                // If s32 is of 2 digits, increase the exponent by 1.
+                exponent += int(s32 >= 10);
+
+                // Write the first digit and the decimal point.
+                std::memcpy(buffer, radix_100_head_table + s32 * 2, 2);
+                // This third character may be overwritten later but we don't care.
+                buffer[2] = radix_100_table[s32 * 2 + 1];
+
+                // The number of characters actually written is 1 or 3, similarly to the case of
+                // 7 or 8 digits.
+                buffer += (1 + (unsigned(s32 >= 10) & unsigned(buffer[2] > '0')) * 2);
+            }
+        }
+
+        template <>
+        char* to_chars<float, default_float_traits<float>>(std::uint32_t s32, int exponent,
+                                                           char* buffer) noexcept {
+            // Print significand.
+            print_9_digits(s32, exponent, buffer);
+
+            // Print exponent and return
+            if (exponent < 0) {
+                std::memcpy(buffer, "E-", 2);
+                buffer += 2;
+                exponent = -exponent;
+            }
+            else if (exponent > 0) {
+                buffer[0] = 'E';
+                buffer += 1;
+            }
+            else {
+                return buffer;
+            }
+
+            if (exponent >= 10) {
+                std::memcpy(buffer, &radix_100_table[exponent * 2], 2);
+                buffer += 2;
+            }
+            else {
+                buffer[0] = char('0' + exponent);
+                buffer += 1;
+            }
+
+            return buffer;
+        }
+
+        template <>
+        char* to_chars<double, default_float_traits<double>>(std::uint64_t const significand,
+                                                             int exponent, char* buffer) noexcept {
+            // Print significand by decomposing it into a 9-digit block and a 8-digit block.
+            std::uint32_t first_block, second_block;
+            bool no_second_block;
+
+            if (significand >= 1'0000'0000) {
+                first_block = std::uint32_t(significand / 1'0000'0000);
+                second_block = std::uint32_t(significand) - first_block * 1'0000'0000;
+                exponent += 8;
+                no_second_block = (second_block == 0);
+            }
+            else {
+                first_block = std::uint32_t(significand);
+                no_second_block = true;
+            }
+
+            if (no_second_block) {
+                print_9_digits(first_block, exponent, buffer);
+            }
+            else {
+                // We proceed similarly to print_9_digits(), but since we do not need to remove
+                // trailing zeros, the procedure is a bit simpler.
+                if (first_block >= 1'0000'0000) {
+                    // The input is of 17 digits, thus there should be no trailing zero at all.
+                    // The first block is of 9 digits.
+                    // 1441151882 = ceil(2^57 / 1'0000'0000) + 1
+                    auto prod = first_block * std::uint64_t(1441151882);
+                    prod >>= 25;
+                    std::memcpy(buffer, radix_100_head_table + std::uint32_t(prod >> 32) * 2, 2);
+
+                    prod = std::uint32_t(prod) * std::uint64_t(100);
+                    std::memcpy(buffer + 2, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+                    prod = std::uint32_t(prod) * std::uint64_t(100);
+                    std::memcpy(buffer + 4, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+                    prod = std::uint32_t(prod) * std::uint64_t(100);
+                    std::memcpy(buffer + 6, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+                    prod = std::uint32_t(prod) * std::uint64_t(100);
+                    std::memcpy(buffer + 8, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+
+                    // The second block is of 8 digits.
+                    // 281474978 = ceil(2^48 / 100'0000) + 1
+                    prod = second_block * std::uint64_t(281474978);
+                    prod >>= 16;
+                    prod += 1;
+                    std::memcpy(buffer + 10, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+                    prod = std::uint32_t(prod) * std::uint64_t(100);
+                    std::memcpy(buffer + 12, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+                    prod = std::uint32_t(prod) * std::uint64_t(100);
+                    std::memcpy(buffer + 14, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+                    prod = std::uint32_t(prod) * std::uint64_t(100);
+                    std::memcpy(buffer + 16, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+
+                    exponent += 8;
+                    buffer += 18;
+                }
+                else {
+                    if (first_block >= 100'0000) {
+                        // 7 or 8 digits.
+                        // 281474978 = ceil(2^48 / 100'0000) + 1
+                        auto prod = first_block * std::uint64_t(281474978);
+                        prod >>= 16;
+                        auto two_digits = std::uint32_t(prod >> 32);
+
+                        std::memcpy(buffer, radix_100_head_table + two_digits * 2, 2);
+                        buffer[2] = radix_100_table[two_digits * 2 + 1];
+
+                        exponent += (6 + unsigned(two_digits >= 10));
+                        buffer += unsigned(two_digits >= 10);
+
+                        // Print remaining 6 digits.
+                        prod = std::uint32_t(prod) * std::uint64_t(100);
+                        std::memcpy(buffer + 2, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+                        prod = std::uint32_t(prod) * std::uint64_t(100);
+                        std::memcpy(buffer + 4, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+                        prod = std::uint32_t(prod) * std::uint64_t(100);
+                        std::memcpy(buffer + 6, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+
+                        buffer += 8;
+                    }
+                    else if (first_block >= 1'0000) {
+                        // 5 or 6 digits.
+                        // 429497 = ceil(2^32 / 1'0000)
+                        auto prod = first_block * std::uint64_t(429497);
+                        auto two_digits = std::uint32_t(prod >> 32);
+
+                        std::memcpy(buffer, radix_100_head_table + two_digits * 2, 2);
+                        buffer[2] = radix_100_table[two_digits * 2 + 1];
+
+                        exponent += (4 + unsigned(two_digits >= 10));
+                        buffer += unsigned(two_digits >= 10);
+
+                        // Print remaining 4 digits.
+                        prod = std::uint32_t(prod) * std::uint64_t(100);
+                        std::memcpy(buffer + 2, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+                        prod = std::uint32_t(prod) * std::uint64_t(100);
+                        std::memcpy(buffer + 4, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+
+                        buffer += 6;
+                    }
+                    else if (first_block >= 100) {
+                        // 3 or 4 digits.
+                        // 42949673 = ceil(2^32 / 100)
+                        auto prod = first_block * std::uint64_t(42949673);
+                        auto two_digits = std::uint32_t(prod >> 32);
+
+                        std::memcpy(buffer, radix_100_head_table + two_digits * 2, 2);
+                        buffer[2] = radix_100_table[two_digits * 2 + 1];
+
+                        exponent += (2 + unsigned(two_digits >= 10));
+                        buffer += unsigned(two_digits >= 10);
+
+                        // Print remaining 2 digits.
+                        prod = std::uint32_t(prod) * std::uint64_t(100);
+                        std::memcpy(buffer + 2, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+
+                        buffer += 4;
+                    }
+                    else {
+                        // 1 or 2 digits.
+                        std::memcpy(buffer, radix_100_head_table + first_block * 2, 2);
+                        buffer[2] = radix_100_table[first_block * 2 + 1];
+
+                        exponent += unsigned(first_block >= 10);
+                        buffer += (2 + unsigned(first_block >= 10));
+                    }
+
+                    // Next, print the second block.
+                    // The second block is of 8 digits, but we may have trailing zeros.
+                    // 281474978 = ceil(2^48 / 100'0000) + 1
+                    auto prod = second_block * std::uint64_t(281474978);
+                    prod >>= 16;
+                    prod += 1;
+                    auto two_digits = std::uint32_t(prod >> 32);
+                    std::memcpy(buffer, radix_100_table + two_digits * 2, 2);
+
+                    // Remaining 6 digits are all zero?
+                    if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 100'0000)) {
+                        buffer += (1 + unsigned(buffer[1] > '0'));
+                    }
+                    else {
+                        // Obtain the next two digits.
+                        prod = std::uint32_t(prod) * std::uint64_t(100);
+                        two_digits = std::uint32_t(prod >> 32);
+                        std::memcpy(buffer + 2, radix_100_table + two_digits * 2, 2);
+
+                        // Remaining 4 digits are all zero?
+                        if (std::uint32_t(prod) <=
+                            std::uint32_t((std::uint64_t(1) << 32) / 1'0000)) {
+                            buffer += (3 + unsigned(buffer[3] > '0'));
+                        }
+                        else {
+                            // Obtain the next two digits.
+                            prod = std::uint32_t(prod) * std::uint64_t(100);
+                            two_digits = std::uint32_t(prod >> 32);
+                            std::memcpy(buffer + 4, radix_100_table + two_digits * 2, 2);
+
+                            // Remaining 2 digits are all zero?
+                            if (std::uint32_t(prod) <=
+                                std::uint32_t((std::uint64_t(1) << 32) / 100)) {
+                                buffer += (5 + unsigned(buffer[5] > '0'));
+                            }
+                            else {
+                                // Obtain the last two digits.
+                                prod = std::uint32_t(prod) * std::uint64_t(100);
+                                two_digits = std::uint32_t(prod >> 32);
+                                std::memcpy(buffer + 6, radix_100_table + two_digits * 2, 2);
+                                buffer += (7 + unsigned(buffer[7] > '0'));
+                            }
+                        }
+                    }
+                }
+            }
+
+            // Print exponent and return
+            if (exponent < 0) {
+                std::memcpy(buffer, "E-", 2);
+                buffer += 2;
+                exponent = -exponent;
+            }
+            else if (exponent > 0) {
+                buffer[0] = 'E';
+                buffer += 1;
+            }
+            else {
+                return buffer;
+            }
+
+            if (exponent >= 100) {
+                // d1 = exponent / 10; d2 = exponent % 10;
+                // 6554 = ceil(2^16 / 10)
+                auto prod = std::uint32_t(exponent) * std::uint32_t(6554);
+                auto d1 = prod >> 16;
+                prod = std::uint16_t(prod) * std::uint32_t(5); // * 10
+                auto d2 = prod >> 15;                          // >> 16
+                std::memcpy(buffer, &radix_100_table[d1 * 2], 2);
+                buffer[2] = char('0' + d2);
+                buffer += 3;
+            }
+            else if (exponent >= 10) {
+                std::memcpy(buffer, &radix_100_table[exponent * 2], 2);
+                buffer += 2;
+            }
+            else {
+                buffer[0] = char('0' + exponent);
+                buffer += 1;
+            }
+
+            return buffer;
+        }
+    }
+}
diff --git a/server/dragonbox/dragonbox_to_chars.h b/server/dragonbox/dragonbox_to_chars.h
new file mode 100644
index 0000000..e22a2b4
--- /dev/null
+++ b/server/dragonbox/dragonbox_to_chars.h
@@ -0,0 +1,108 @@
+// Copyright 2020-2022 Junekey Jeon
+//
+// The contents of this file may be used under the terms of
+// the Apache License v2.0 with LLVM Exceptions.
+//
+//    (See accompanying file LICENSE-Apache or copy at
+//     https://llvm.org/foundation/relicensing/LICENSE.txt)
+//
+// Alternatively, the contents of this file may be used under the terms of
+// the Boost Software License, Version 1.0.
+//    (See accompanying file LICENSE-Boost or copy at
+//     https://www.boost.org/LICENSE_1_0.txt)
+//
+// Unless required by applicable law or agreed to in writing, this software
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.
+
+#ifndef JKJ_HEADER_DRAGONBOX_TO_CHARS
+#define JKJ_HEADER_DRAGONBOX_TO_CHARS
+
+#include "dragonbox.h"
+
+namespace jkj::dragonbox {
+    namespace to_chars_detail {
+        template <class Float, class FloatTraits>
+        extern char* to_chars(typename FloatTraits::carrier_uint significand, int exponent,
+                              char* buffer) noexcept;
+
+        // Avoid needless ABI overhead incurred by tag dispatch.
+        template <class PolicyHolder, class Float, class FloatTraits>
+        char* to_chars_n_impl(float_bits<Float, FloatTraits> br, char* buffer) noexcept {
+            auto const exponent_bits = br.extract_exponent_bits();
+            auto const s = br.remove_exponent_bits(exponent_bits);
+
+            if (br.is_finite(exponent_bits)) {
+                if (s.is_negative()) {
+                    *buffer = '-';
+                    ++buffer;
+                }
+                if (br.is_nonzero()) {
+                    auto result = to_decimal<Float, FloatTraits>(
+                        s, exponent_bits, policy::sign::ignore, policy::trailing_zero::ignore,
+                        typename PolicyHolder::decimal_to_binary_rounding_policy{},
+                        typename PolicyHolder::binary_to_decimal_rounding_policy{},
+                        typename PolicyHolder::cache_policy{});
+                    return to_chars_detail::to_chars<Float, FloatTraits>(result.significand,
+                                                                         result.exponent, buffer);
+                }
+                else {
+                    std::memcpy(buffer, "0E0", 3);
+                    return buffer + 3;
+                }
+            }
+            else {
+                if (s.has_all_zero_significand_bits()) {
+                    if (s.is_negative()) {
+                        *buffer = '-';
+                        ++buffer;
+                    }
+                    std::memcpy(buffer, "Infinity", 8);
+                    return buffer + 8;
+                }
+                else {
+                    std::memcpy(buffer, "NaN", 3);
+                    return buffer + 3;
+                }
+            }
+        }
+    }
+
+    // Returns the next-to-end position
+    template <class Float, class FloatTraits = default_float_traits<Float>, class... Policies>
+    char* to_chars_n(Float x, char* buffer, Policies... policies) noexcept {
+        using namespace jkj::dragonbox::detail::policy_impl;
+        using policy_holder = decltype(make_policy_holder(
+            base_default_pair_list<base_default_pair<decimal_to_binary_rounding::base,
+                                                     decimal_to_binary_rounding::nearest_to_even>,
+                                   base_default_pair<binary_to_decimal_rounding::base,
+                                                     binary_to_decimal_rounding::to_even>,
+                                   base_default_pair<cache::base, cache::full>>{},
+            policies...));
+
+        return to_chars_detail::to_chars_n_impl<policy_holder>(float_bits<Float, FloatTraits>(x),
+                                                               buffer);
+    }
+
+    // Null-terminate and bypass the return value of fp_to_chars_n
+    template <class Float, class FloatTraits = default_float_traits<Float>, class... Policies>
+    char* to_chars(Float x, char* buffer, Policies... policies) noexcept {
+        auto ptr = to_chars_n<Float, FloatTraits>(x, buffer, policies...);
+        *ptr = '\0';
+        return ptr;
+    }
+
+    // Maximum required buffer size (excluding null-terminator)
+    template <class FloatFormat>
+    inline constexpr std::size_t max_output_string_length =
+        std::is_same_v<FloatFormat, ieee754_binary32>
+            ?
+            // sign(1) + significand(9) + decimal_point(1) + exp_marker(1) + exp_sign(1) + exp(2)
+            (1 + 9 + 1 + 1 + 1 + 2)
+            :
+            // format == ieee754_format::binary64
+            // sign(1) + significand(17) + decimal_point(1) + exp_marker(1) + exp_sign(1) + exp(3)
+            (1 + 17 + 1 + 1 + 1 + 3);
+}
+
+#endif
diff --git a/server/gc.h b/server/gc.h
new file mode 100644
index 0000000..b2eff44
--- /dev/null
+++ b/server/gc.h
@@ -0,0 +1,59 @@
+#ifndef __AQ_USE_THREADEDGC__
+#include <atomic>
+class GC {
+private:;
+
+	size_t max_size, max_slots, 
+		   interval, forced_clean, 
+		   forceclean_timer = 0;
+	bool running, alive;
+//  ptr, dealloc, ref, sz
+	void *q, *q_back;
+	void* handle;
+	std::atomic<uint32_t> slot_pos;
+	std::atomic<uint32_t> alive_cnt;
+	std::atomic<uint64_t> current_size;
+	volatile bool lock;
+	// maybe use volatile std::thread::id instead
+protected:
+	void acquire_lock();
+	void release_lock();
+	void gc();
+	void daemon();
+	void start_deamon();
+	void terminate_daemon();
+
+public:
+	void reg(void* v, uint32_t sz = 1, 
+			void(*f)(void*) = free
+		);
+
+	GC(
+		uint32_t max_size = 0xfffffff, uint32_t max_slots = 4096, 
+		uint32_t interval = 10000, uint32_t forced_clean = 1000000 //one seconds
+	) : max_size(max_size), max_slots(max_slots), 
+		interval(interval), forced_clean(forced_clean){
+
+		start_deamon();
+		GC::gc = this;
+	} // 256 MB
+
+	~GC(){
+		terminate_daemon();
+	}
+	static GC* gc;
+    constexpr static void(*_free) (void*) = free;
+};
+
+#else
+class GC {
+public:
+	GC(uint32_t) = default;
+	void reg(
+		void* v, uint32_t = 0, 
+		void(*f)(void*) = free
+	) const { f(v); }
+	static GC* gc;
+    constexpr static void(*_free) (void*) = free;
+}
+#endif
diff --git a/server/jeaiii_to_text.h b/server/jeaiii_to_text.h
new file mode 100644
index 0000000..a4f1a53
--- /dev/null
+++ b/server/jeaiii_to_text.h
@@ -0,0 +1,116 @@
+
+// Copyright (c) 2022 James Edward Anhalt III - https://github.com/jeaiii/itoa
+using u32 = decltype(0xffffffff);
+using u64 = decltype(0xffffffffffffffff);
+
+static_assert(u32(-1) > 0, "u32 must be unsigned");
+static_assert(u32(0xffffffff) + u32(1) == u32(0), "u32 must be 32 bits");
+static_assert(u64(-1) > 0, "u64 must be unsigned");
+static_assert(u64(0xffffffffffffffff) + u32(1) == u32(0), "u64 must be 64 bits");
+
+constexpr auto digits_00_99 =
+    "00010203040506070809" "10111213141516171819" "20212223242526272829" "30313233343536373839"	"40414243444546474849"
+    "50515253545556575859" "60616263646566676869" "70717273747576777879" "80818283848586878889"	"90919293949596979899";
+
+struct pair { char t, o; };
+
+#define JEAIII_W(I, U) *(pair*)&b[I] = *(pair*)&digits_00_99[(U) * 2]
+#define JEAIII_A(I, N) t = (u64(1) << (32 + N / 5 * N * 53 / 16)) / u32(1e##N) + 1 + N / 6 - N / 8, t *= u, t >>= N / 5 * N * 53 / 16, t += N / 6 * 4, JEAIII_W(I, t >> 32)
+#define JEAIII_S(I) b[I] = char(u64(10) * u32(t) >> 32) + '0'
+#define JEAIII_D(I) t = u64(100) * u32(t), JEAIII_W(I, t >> 32)
+
+#define JEAIII_C0(I) b[I] = char(u) + '0'
+#define JEAIII_C1(I) JEAIII_W(I, u)
+#define JEAIII_C2(I) JEAIII_A(I, 1), JEAIII_S(I + 2)
+#define JEAIII_C3(I) JEAIII_A(I, 2), JEAIII_D(I + 2)
+#define JEAIII_C4(I) JEAIII_A(I, 3), JEAIII_D(I + 2), JEAIII_S(I + 4)
+#define JEAIII_C5(I) JEAIII_A(I, 4), JEAIII_D(I + 2), JEAIII_D(I + 4)
+#define JEAIII_C6(I) JEAIII_A(I, 5), JEAIII_D(I + 2), JEAIII_D(I + 4), JEAIII_S(I + 6)
+#define JEAIII_C7(I) JEAIII_A(I, 6), JEAIII_D(I + 2), JEAIII_D(I + 4), JEAIII_D(I + 6)
+#define JEAIII_C8(I) JEAIII_A(I, 7), JEAIII_D(I + 2), JEAIII_D(I + 4), JEAIII_D(I + 6), JEAIII_S(I + 8)
+#define JEAIII_C9(I) JEAIII_A(I, 8), JEAIII_D(I + 2), JEAIII_D(I + 4), JEAIII_D(I + 6), JEAIII_D(I + 8)
+
+#define JEAIII_L(N, A, B) u < u32(1e##N) ? A : B
+#define JEAIII_L09(F) JEAIII_L(2, JEAIII_L(1, F(0), F(1)), JEAIII_L(6, JEAIII_L(4, JEAIII_L(3, F(2), F(3)), JEAIII_L(5, F(4), F(5))), JEAIII_L(8, JEAIII_L(7, F(6), F(7)), JEAIII_L(9, F(8), F(9)))))
+#define JEAIII_L03(F) JEAIII_L(2, JEAIII_L(1, F(0), F(1)), JEAIII_L(3, F(2), F(3)))
+
+#define JEAIII_K(N) (JEAIII_C##N(0), b + N + 1)
+#define JEAIII_KX(N) (JEAIII_C##N(0), u = x, JEAIII_C7(N + 1), b + N + 9)
+#define JEAIII_KYX(N) (JEAIII_C##N(0), u = y, JEAIII_C7(N + 1), u = x, JEAIII_C7(N + 9), b + N + 17)
+
+template<bool B, class T, class F> struct _cond { using type = F; };
+template<class T, class F> struct _cond<true, T, F> { using type = T; };
+template<bool B, class T, class F> using cond = typename _cond<B, T, F>::type;
+
+template<class T> inline char* to_text_from_integer(char* b, T i)
+{
+    u64 t = u64(i);
+
+    if (i < T(0))
+        t = u64(0) - t, b[0] = '-', ++b;
+
+    u32 u = cond<T(1) != T(2), cond<sizeof(T) != 1, cond<sizeof(T) != sizeof(short), u32, unsigned short>, unsigned char>, bool>(t);
+
+    // if our input type fits in 32bits, or its value does, ctreat as 32bit (the line above ensures the compiler can still know the range limits of the input type)
+    // and optimize out cases for small integer types (if only c++ had a builtin way to get the unsigned type from a signed type)
+    if (sizeof(i) <= sizeof(u) || u == t)
+        return JEAIII_L09(JEAIII_K);
+
+    u32 x = t % 100000000u;
+    u = u32(t /= 100000000u);
+
+    // t / 10^8 (fits in 32 bit), t % 10^8 -> ~17.5 digits
+    if (u == t)
+        return JEAIII_L09(JEAIII_KX);
+
+    // t / 10^16 (1-4 digits), t / 10^8 % 10^8, t % 10^8
+    u32 y = t % 100000000u;
+    u = u32(t / 100000000u);
+    return JEAIII_L03(JEAIII_KYX);
+}
+
+inline char* to_text(char text[], signed char i) { return to_text_from_integer(text, i); }
+inline char* to_text(char text[], unsigned char i) { return to_text_from_integer(text, i); }
+inline char* to_text(char text[], short i) { return to_text_from_integer(text, i); }
+inline char* to_text(char text[], unsigned short i) { return to_text_from_integer(text, i); }
+inline char* to_text(char text[], int i) { return to_text_from_integer(text, i); }
+inline char* to_text(char text[], unsigned int i) { return to_text_from_integer(text, i); }
+inline char* to_text(char text[], long i) { return to_text_from_integer(text, i); }
+inline char* to_text(char text[], unsigned long i) { return to_text_from_integer(text, i); }
+inline char* to_text(char text[], long long i) { return to_text_from_integer(text, i); }
+inline char* to_text(char text[], unsigned long long i) { return to_text_from_integer(text, i); }
+
+// Copyright (c) 2022 Bill Sun
+constexpr static __uint128_t _10_19 = 10000000000000000000ull, 
+    _10_37 = _10_19*_10_19 / 10;
+
+template<class T>
+char* jeaiii_i128(char* buf, T v){
+    if (v < 0){
+        *(buf++) = '0';
+        v = -v;
+    }
+    if (v > _10_37){
+        uint8_t vv = uint8_t(v/_10_37);
+        // vv <<= 1;
+        // if (vv < 20)
+        //     *buf ++ = digits_00_99[vv + 1];
+        // else{
+        //     *buf++ = digits_00_99[vv ];
+        //     *buf++ = digits_00_99[vv + 1];
+        // }  
+    
+        *(buf++) = vv%10 + '0';
+        vv/=10;
+        if (vv) {
+            *buf = *(buf-1);
+            *(buf++-1) = vv + '0';
+        }
+    }
+
+    if (v > _10_19)
+        buf = to_text(buf, uint64_t((v/_10_19) % _10_19));
+    
+    buf = to_text(buf, uint64_t(v % _10_19));
+    return buf;
+}
\ No newline at end of file
diff --git a/server/io.cpp b/server/libaquery.cpp
similarity index 68%
rename from server/io.cpp
rename to server/libaquery.cpp
index 878c0b6..2b3e802 100644
--- a/server/io.cpp
+++ b/server/libaquery.cpp
@@ -1,20 +1,20 @@
 #include "pch_msc.hpp"
 
 #include "io.h"
-#include "table.h"
 #include <limits>
 
 #include <chrono>
 #include <ctime>
 
 #include "utils.h"
+#include "libaquery.h"
 #include <random>
 
 char* gbuf = nullptr;
 
 void setgbuf(char* buf) {
-	static char* b = 0;
-	if (buf == 0)
+	static char* b = nullptr;
+	if (buf == nullptr)
 		gbuf = b;
 	else {
 		gbuf = buf;
@@ -63,6 +63,7 @@ T getInt(const char*& buf){
 	}
 	return ret;
 }
+
 template<class T> 
 char* intToString(T val, char* buf){
 
@@ -275,6 +276,43 @@ inline const char* str(const bool& v) {
 	return v ? "true" : "false";
 }
 
+
+Context::Context() {
+    current.memory_map = new std::unordered_map<void*, deallocator_t>;
+    init_session();
+}
+
+Context::~Context() {
+    auto memmap = (std::unordered_map<void*, deallocator_t>*) this->current.memory_map;
+    delete memmap;
+}
+
+void Context::init_session(){
+    if (log_level == LOG_INFO){
+        memset(&(this->current.stats), 0, sizeof(Session::Statistic));
+    }
+    auto memmap = (std::unordered_map<void*, deallocator_t>*) this->current.memory_map;
+    memmap->clear();
+}
+
+void Context::end_session(){
+    auto memmap = (std::unordered_map<void*, deallocator_t>*) this->current.memory_map;
+    for (auto& mem : *memmap) {
+        mem.second(mem.first);
+    }
+    memmap->clear();
+}
+
+void* Context::get_module_function(const char* fname){
+    auto fmap = static_cast<std::unordered_map<std::string, void*>*>
+        (this->module_function_maps);
+    // printf("%p\n", fmap->find("mydiv")->second);
+    //  for (const auto& [key, value] : *fmap){
+    //      printf("%s %p\n", key.c_str(), value);
+    //  }
+    auto ret = fmap->find(fname);
+    return ret == fmap->end() ? nullptr : ret->second;
+}
 // template<typename _Ty>
 // inline void vector_type<_Ty>::out(uint32_t n, const char* sep) const
 // {
@@ -288,3 +326,123 @@ inline const char* str(const bool& v) {
 // 	}
 // 	std::cout << ')';
 // }
+
+#include "gc.h"
+#include <vector_type>
+#include <utility>
+#include <thread>
+#include <chrono>
+#ifndef __AQ_USE_THREADEDGC__
+
+struct gcmemory_t{
+	void* memory;
+	void (*deallocator)(void*);
+};
+
+using memoryqueue_t = gcmemory_t*;
+void GC::acquire_lock() {
+	auto this_tid = std::this_thread::get_id();
+	while(lock != this_tid)
+	{
+		while(lock != this_tid && lock != std::thread::id()) {
+			std::this_thread::sleep_for(std::chrono::milliseconds(0));
+		}
+		lock = this_tid;
+	}
+}
+
+void GC::release_lock(){
+	lock = std::thread::id();
+}
+
+void GC::gc()
+{
+	auto& _q = static_cast<memoryqueue_t*>(q);
+	auto& _q_back = static_cast<memoryqueue_t*>(q_back);
+	if (_q->size == 0)
+		return;
+	auto t = _q;
+	lock = true;
+	while(alive_cnt > 0);
+	_q = q_back;
+	uint32_t _slot = slot_pos;
+	slot_pos = 0;
+	current_size = 0;
+	lock = false;
+	_q_back = t;
+
+	for(uint32_t i = 0; i < _slot; ++i){
+		if (_q_back[i]->memory != nullptr && _q_back[i]->deallocator != nullptr)
+			_q_back[i]->deallocator(_q_back[i]->memory);
+	}
+	memset(_q_back, 0, sizeof(gcmemory_t) * _slot);
+	running = false;
+}
+
+void GC::daemon() {
+	using namespace std::chrono;
+
+	while (alive) {
+		if (running) {
+			if (current_size > max_size || 
+				forceclean_timer > forced_clean) 
+			{
+				gc();
+				forceclean_timer = 0;
+			}
+			std::this_thread::sleep_for(microseconds(interval));
+			forceclean_timer += interval;
+		}
+		else {
+			std::this_thread::sleep_for(10ms);
+			forceclean_timer += 10000;
+		}
+	}
+}
+
+void GC::start_deamon() {
+	q = new gcmemory_t[max_slots << 1];
+	q_back = new memoryqueue_t[max_slots << 1];
+	lock = false;
+	slot_pos = 0;
+	current_size = 0;
+	alive_cnt = 0;
+	alive = true;
+	handle = new std::thread(&GC::daemon, this);
+}
+
+void GC::terminate_daemon() {
+	running = false;
+	alive = false;
+	decltype(auto) _handle = static_cast<std::thread*>(handle);
+	delete[] static_cast<memoryqueue_t>(q);
+	delete[] static_cast<memoryqueue_t>(q_back);
+	using namespace std::chrono;
+	std::this_thread::sleep_for(microseconds(1000 + std::max(static_cast<size_t>(10000), interval)));
+
+	if (_handle->joinable()) {
+		_handle->join();
+	}
+	delete _handle;
+}
+
+void GC::reg(void* v, uint32_t sz, void(*f)(void*)) { //~ 40ns expected v. free ~ 75ns
+	if (v == nullptr || f == nullptr)
+		return;
+	if (sz < threshould){
+		f(v);
+		return;
+	}
+	auto _q = static_cast<memoryqueue_t>q;
+	while(lock);
+	++alive_cnt;
+	current_size += sz;
+	auto _slot = (slot_pos += 1);
+	q[_slot] = {v, f};
+	--alive_cnt;
+	running = true;
+}
+
+#endif
+
+static GC* GC::gc = nullptr;
diff --git a/server/libaquery.h b/server/libaquery.h
index 6227af9..1d6f864 100644
--- a/server/libaquery.h
+++ b/server/libaquery.h
@@ -1,6 +1,12 @@
 #ifndef _AQUERY_H
 #define _AQUERY_H
 
+#ifdef __INTELLISENSE__
+	#define __AQUERY_ITC_USE_SEMPH__
+	#define THREADING
+	#define __AQ_THREADED_GC__
+#endif
+
 #include "table.h"
 #include <unordered_map>
 #include <chrono>
@@ -55,7 +61,10 @@ struct Context{
 #ifdef THREADING
 	void* thread_pool;
 #endif	
-	printf_type print = printf;
+#ifdef __AQ_THREADED_GC__
+	void* gc;
+#endif
+	printf_type print = &printf;
 	Context();
 	virtual ~Context();
 	template <class ...Types>
diff --git a/server/monetdb_conn.cpp b/server/monetdb_conn.cpp
index c0e9d5b..644c1c4 100644
--- a/server/monetdb_conn.cpp
+++ b/server/monetdb_conn.cpp
@@ -6,6 +6,7 @@
 #include "monetdb_conn.h"
 #include "monetdbe.h"
 #include "table.h"
+
 #undef static_assert
 
 const char* monetdbe_type_str[] = {
@@ -121,6 +122,8 @@ bool Server::haserror(){
         return false;
     }
 }
+
+
 void Server::print_results(const char* sep, const char* end){
 
     if (!haserror()){
@@ -138,6 +141,7 @@ void Server::print_results(const char* sep, const char* end){
                 std::string(types::printf_str[types::monetdbe_type_aqtypes[cols[i]->type]]) 
                 + (i < ncols - 1 ? sep : "");
             puts(printf_string[i].c_str());
+            puts(monetdbe_type_str[cols[i]->type]);
             col_data[i] = static_cast<char *>(cols[i]->data);
             szs [i] = monetdbe_type_szs[cols[i]->type];
             header_string = header_string + cols[i]->name + sep + '|' + sep;
@@ -179,7 +183,7 @@ void* Server::getCol(int col_idx){
             auto _ret_col = static_cast<monetdbe_column*>(this->ret_col);
             cnt = _ret_col->count;
              printf("Dbg: Getting col %s, type: %s\n", 
-                 _ret_col->name, monetdbe_type_str[_ret_col->type]);
+                _ret_col->name, monetdbe_type_str[_ret_col->type]);
             return _ret_col->data;
         }
         else{
@@ -198,10 +202,10 @@ Server::~Server(){
 
 bool Server::havehge() {
 #if defined(_MONETDBE_LIB_) and defined(HAVE_HGE)
-    puts("true");
+    // puts("true");
     return HAVE_HGE;
 #else
-    puts("false");
+    // puts("false");
     return false;
 #endif
 }
diff --git a/server/monetdb_conn.h b/server/monetdb_conn.h
index 3255342..9894218 100644
--- a/server/monetdb_conn.h
+++ b/server/monetdb_conn.h
@@ -24,6 +24,7 @@ struct Server{
     static bool havehge();
     void test(const char*);
     void print_results(const char* sep = " ", const char* end = "\n");
+    friend void print_monetdb_results(Server* srv, const char* sep, const char* end, int limit);
     ~Server();
 };
 
diff --git a/server/server.cpp b/server/server.cpp
index e601019..ed97bc4 100644
--- a/server/server.cpp
+++ b/server/server.cpp
@@ -1,6 +1,5 @@
 #include "pch_msc.hpp"
 
-#include "../csv.h"
 #include <iostream>
 #include <string>
 #include <chrono>
@@ -10,28 +9,35 @@
 #ifdef THREADING
 #include "threading.h"
 #endif
+
 #ifdef _WIN32
 #include "winhelper.h"
 #else 
 #include <dlfcn.h>
 #include <fcntl.h>
 #include <sys/mman.h>
+
+// fast numeric to string conversion
+#include "jeaiii_to_text.h"
+#include "dragonbox/dragonbox_to_chars.h"
+
 struct SharedMemory
 {
     std::atomic<bool> a;
     int hFileMap;
     void* pData;
-    SharedMemory(const char* fname) {
+    explicit SharedMemory(const char* fname) {
         hFileMap = open(fname, O_RDWR, 0);
         if (hFileMap != -1)
-            pData = mmap(NULL, 8, PROT_READ | PROT_WRITE, MAP_SHARED, hFileMap, 0);
+            pData = mmap(nullptr, 8, PROT_READ | PROT_WRITE, MAP_SHARED, hFileMap, 0);
         else 
-            pData = 0;
+            pData = nullptr;
     }
-    void FreeMemoryMap() {
-
+    void FreeMemoryMap() const {
+        // automatically unmapped in posix
     }
 };
+
 #ifndef __USE_STD_SEMAPHORE__
 #ifdef __APPLE__
 #include <dispatch/dispatch.h>
@@ -74,9 +80,10 @@ public:
 };
 #endif
 #endif
-
 #endif
+
 #ifdef __USE_STD_SEMAPHORE__
+#define __AQUERY_ITC_USE_SEMPH__
 #include <semaphore>
 class A_Semaphore {
 private:
@@ -94,6 +101,7 @@ public:
     ~A_Semaphore() { }
 };
 #endif
+
 #ifdef __AQUERY_ITC_USE_SEMPH__
 A_Semaphore prompt{ true }, engine{ false };
 #define PROMPT_ACQUIRE() prompt.acquire()
@@ -107,11 +115,9 @@ A_Semaphore prompt{ true }, engine{ false };
 #define ENGINE_RELEASE() 
 #endif
 
-#include "aggregations.h"
 typedef int (*code_snippet)(void*);
 typedef void (*module_init_fn)(Context*);
 
-int test_main();
 
 int n_recv = 0;
 char** n_recvd = nullptr;
@@ -119,6 +125,7 @@ char** n_recvd = nullptr;
 __AQEXPORT__(void) wait_engine(){
     PROMPT_ACQUIRE();
 }
+
 __AQEXPORT__(void) wake_engine(){
     ENGINE_RELEASE();
 }
@@ -152,42 +159,6 @@ __AQEXPORT__(bool) have_hge(){
 #endif
 }
 
-Context::Context() {
-    current.memory_map = new std::unordered_map<void*, deallocator_t>;
-    init_session();
-}
-
-Context::~Context() {
-    auto memmap = (std::unordered_map<void*, deallocator_t>*) this->current.memory_map;
-    delete memmap;
-}
-
-void Context::init_session(){
-    if (log_level == LOG_INFO){
-        memset(&(this->current.stats), 0, sizeof(Session::Statistic));
-    }
-    auto memmap = (std::unordered_map<void*, deallocator_t>*) this->current.memory_map;
-    memmap->clear();
-}
-
-void Context::end_session(){
-    auto memmap = (std::unordered_map<void*, deallocator_t>*) this->current.memory_map;
-    for (auto& mem : *memmap) {
-        mem.second(mem.first);
-    }
-    memmap->clear();
-}
-
-void* Context::get_module_function(const char* fname){
-    auto fmap = static_cast<std::unordered_map<std::string, void*>*>
-        (this->module_function_maps);
-    // printf("%p\n", fmap->find("mydiv")->second);
-    //  for (const auto& [key, value] : *fmap){
-    //      printf("%s %p\n", key.c_str(), value);
-    //  }
-    auto ret = fmap->find(fname);
-    return ret == fmap->end() ? nullptr : ret->second;
-}
 
 void initialize_module(const char* module_name, void* module_handle, Context* cxt){
     auto _init_module = reinterpret_cast<module_init_fn>(dlsym(module_handle, "init_session"));
@@ -253,7 +224,7 @@ int dll_main(int argc, char** argv, Context* cxt){
                                 timer.reset();
                                 server->exec(n_recvd[i] + 1);
                                 cfg->stats.monet_time += timer.elapsed();
-                                printf("Exec Q%d: %s", i, n_recvd[i]);
+                                // printf("Exec Q%d: %s", i, n_recvd[i]);
                             }
                             break;
                         case 'P': // Postprocessing procedure 
@@ -313,7 +284,7 @@ int dll_main(int argc, char** argv, Context* cxt){
                         dlclose(handle);
                         handle = nullptr;
                     }
-                    printf("%ld, %ld", cfg->stats.monet_time, cfg->stats.postproc_time);
+                    printf("%lld, %lld", cfg->stats.monet_time, cfg->stats.postproc_time);
                     cxt->end_session();
                     n_recv = 0;
                 }
@@ -370,20 +341,21 @@ extern "C" int __DLLEXPORT__ main(int argc, char** argv) {
 #ifdef __AQ_BUILD_LAUNCHER__
    return launcher(argc, argv);
 #endif
-   puts("running");
+   // puts("running");
    Context* cxt = new Context();
-   cxt->log("%d %s\n", argc, argv[1]);
+   // cxt->log("%d %s\n", argc, argv[1]);
 
 #ifdef THREADING
     auto tp = new ThreadPool();
     cxt->thread_pool = tp;
 #endif
     
+#ifdef __AQ_THREADED_GC__
+    cxt->gc_thread = new std::thread(gc_thread, cxt);
+#endif    
    const char* shmname;
    if (argc < 0)
         return dll_main(argc, argv, cxt);
-   else if (argc <= 1)
-        return test_main();
    else
        shmname = argv[1];
    SharedMemory shm = SharedMemory(shmname);
@@ -417,56 +389,3 @@ extern "C" int __DLLEXPORT__ main(int argc, char** argv) {
    return 0;
 }
 
-#include "utils.h"
-#include "table_ext_monetdb.hpp"
-int test_main()
-{
-    Context* cxt = new Context();
-    if (cxt->alt_server == 0)
-        cxt->alt_server = new Server(cxt);
-    Server* server = reinterpret_cast<Server*>(cxt->alt_server);
-
-    const char* qs[]= {
-        "QCREATE TABLE trade(stocksymbol INT, time INT, quantity INT, price INT);",
-        "QCOPY OFFSET 2 INTO trade FROM  'w:/gg/AQuery++/data/trade_numerical.csv'  ON SERVER    USING DELIMITERS  ',';",
-        "QSELECT stocksymbol, (SUM((quantity * price)) / SUM(quantity)) AS weighted_average  FROM trade GROUP BY stocksymbol  ;",
-        "Pdll_5lYrMY",
-        "QSELECT stocksymbol, price  FROM trade ORDER BY time  ;",
-        "Pdll_4Sg6Ri",
-        "QSELECT stocksymbol, quantity, price  FROM trade ORDER BY time  ;",
-        "Pdll_5h4kL2",
-        "QSELECT stocksymbol, price  FROM trade ORDER BY time  ;",
-        "Pdll_7tEWCO",
-        "QSELECT query_c.weighted_moving_averages, query_c.stocksymbol  FROM query_c;",
-        "Pdll_7FCPnF"
-    };
-    n_recv = sizeof(qs)/(sizeof (char*));
-	n_recvd = const_cast<char**>(qs);
-            void* handle = 0;
-                    handle = dlopen("./dll.so", RTLD_LAZY);
-                    cxt->init_session();
-                    for (int i = 0; i < n_recv; ++i)
-                    {
-                        //printf("%s, %d\n", n_recvd[i], n_recvd[i][0] == 'Q');
-                        switch (n_recvd[i][0]) {
-                        case 'Q': // SQL query for monetdbe
-                        {
-                            server->exec(n_recvd[i] + 1);
-                            printf("Exec Q%d: %s\n", i, n_recvd[i]);
-                        }
-                        break;
-                        case 'P': // Postprocessing procedure 
-                            if (handle && !server->haserror()) {
-                                code_snippet c = reinterpret_cast<code_snippet>(dlsym(handle, n_recvd[i] + 1));
-                                c(cxt);
-                            }
-                            break;
-                        }
-                    }
-                    n_recv = 0;
-
-    //static_assert(std::is_same_v<decltype(fill_integer_array<5, 1>()), std::integer_sequence<bool, 1,1,1,1,1>>, "");
-    
-    return 0;
-}
-
diff --git a/server/utils.h b/server/utils.h
index 3cd7b2a..f96937f 100644
--- a/server/utils.h
+++ b/server/utils.h
@@ -1,5 +1,9 @@
 #pragma once
+
 #include <ctime>
+#include <type_traits>
+#include<string>
+
 #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L)
 constexpr static bool cpp_17 = true;
 #else
@@ -10,5 +14,19 @@ inline const char* str(const T& v) {
 	return "";
 }
 
-#include<string>
+template <class T>
+constexpr char* aq_itoa(T t, char* buf){
+	if constexpr (std::is_signed<T>::value){
+		if (t < 0){
+			*buf++ = '-';
+			t = -t;
+		}
+	}
+	while(t > 0){
+		*buf++ = t%10 + '0';
+		t /= 10;
+	}
+	return buf;
+}
+
 extern std::string base62uuid(int l = 6);

From c1b1b26d1a255e9df41adec36568964e6bae49bb Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Fri, 11 Nov 2022 19:01:13 +0800
Subject: [PATCH 15/30] wip: fast print

---
 Makefile                     |  10 +--
 build.py                     |   4 +-
 msc-plugin/libaquery.vcxproj |   2 +-
 server/gc.h                  |  16 +++--
 server/gc.hpp                | 130 -----------------------------------
 server/libaquery.cpp         |  55 ++++++++-------
 server/libaquery.h           |  47 +++++++------
 server/server.cpp            |   3 +
 server/table.h               |   2 +-
 server/utils.h               |  18 +----
 server/vector_type.hpp       |   4 +-
 11 files changed, 84 insertions(+), 207 deletions(-)
 delete mode 100644 server/gc.hpp

diff --git a/Makefile b/Makefile
index ede166b..a3aa69d 100644
--- a/Makefile
+++ b/Makefile
@@ -15,8 +15,8 @@ FPIC = -fPIC
 COMPILER = $(shell $(CXX) --version | grep -q clang && echo clang|| echo gcc) 
 LIBTOOL = ar rcs
 USELIB_FLAG = -Wl,--whole-archive,libaquery.a -Wl,-no-whole-archive
-LIBAQ_SRC = server/monetdb_conn.cpp server/libaquery.cpp 
-LIBAQ_OBJ = monetdb_conn.o libaquery.o 
+LIBAQ_SRC = server/monetdb_conn.cpp server/libaquery.cpp server/dragonbox/dragonbox_to_chars.cpp
+LIBAQ_OBJ = monetdb_conn.o libaquery.o dragonbox_to_chars.o
 SEMANTIC_INTERPOSITION = -fno-semantic-interposition
 RANLIB = ranlib
 
@@ -117,7 +117,7 @@ info:
 	$(info $"	FPIC: $(FPIC))
 pch:
 	$(CXX) -x c++-header server/pch.hpp $(FPIC) $(CXXFLAGS)
-libaquery.a:
+libaquery:
 	$(CXX) -c $(FPIC) $(PCHFLAGS) $(LIBAQ_SRC) $(OS_SUPPORT) $(CXXFLAGS) &&\
 	$(LIBTOOL) libaquery.a $(LIBAQ_OBJ) &&\
 	$(RANLIB) libaquery.a
@@ -130,9 +130,9 @@ launcher:
 	$(CXX) -D__AQ_BUILD_LAUNCHER__ $(LIBAQ_SRC) $(OS_SUPPORT) $(BINARYFLAGS) -o aq
 server.so:
 #	$(CXX) -z muldefs server/server.cpp server/monetdb_conn.cpp -fPIC -shared $(OS_SUPPORT) monetdb/msvc/monetdbe.dll --std=c++1z -O3 -march=native -o server.so -I./monetdb/msvc 
-	$(CXX) $(SHAREDFLAGS) $(PCHFLAGS) $(LIBAQ_SRC) server/server.cpp server/dragonbox/dragonbox_to_chars.cpp $(OS_SUPPORT) -o server.so 
+	$(CXX) $(SHAREDFLAGS) $(PCHFLAGS) $(LIBAQ_SRC) server/server.cpp $(OS_SUPPORT) -o server.so 
 server_uselib:
-	$(CXX) $(SHAREDFLAGS) server/server.cpp libaquery.a server/dragonbox/dragonbox_to_chars.cpp -o server.so
+	$(CXX) $(SHAREDFLAGS) server/server.cpp libaquery.a -o server.so
 
 snippet:
 	$(CXX) $(SHAREDFLAGS) $(PCHFLAGS) out.cpp $(LIBAQ_SRC) -o dll.so
diff --git a/build.py b/build.py
index 5d3bf0d..5ce74e8 100644
--- a/build.py
+++ b/build.py
@@ -80,7 +80,7 @@ class build_manager:
     headerfiles = ['server/aggregations.h', 'server/hasher.h', 'server/io.h', 
                    'server/libaquery.h', 'server/monetdb_conn.h', 'server/pch.hpp', 
                    'server/table.h', 'server/threading.h', 'server/types.h', 'server/utils.h', 
-                   'server/winhelper.h', 'server/gc.hpp', 'server/vector_type.hpp', 
+                   'server/winhelper.h', 'server/gc.h', 'server/vector_type.hpp', 
                    'server/table_ext_monetdb.hpp' 
                    ]
    
@@ -120,7 +120,7 @@ class build_manager:
                 os.environ['AQ_DEBUG'] = '0' if mgr.OptimizationLv else '1'
 
         def libaquery_a(self):
-            self.build_cmd = [['rm', 'libaquery.a'],['make', 'libaquery.a']]
+            self.build_cmd = [['rm', 'libaquery.a'],['make', 'libaquery']]
             return self.build()
         def pch(self):
             self.build_cmd = [['rm', 'server/pch.hpp.gch'], ['make', 'pch']]
diff --git a/msc-plugin/libaquery.vcxproj b/msc-plugin/libaquery.vcxproj
index a727a3c..f0d3dd4 100644
--- a/msc-plugin/libaquery.vcxproj
+++ b/msc-plugin/libaquery.vcxproj
@@ -221,7 +221,7 @@
   <ItemGroup>
     <ClInclude Include="..\csv.h" />
     <ClInclude Include="..\server\aggregations.h" />
-    <ClInclude Include="..\server\gc.hpp" />
+    <ClInclude Include="..\server\gc.h" />
     <ClInclude Include="..\server\hasher.h" />
     <ClInclude Include="..\server\io.h" />
     <ClInclude Include="..\server\libaquery.h" />
diff --git a/server/gc.h b/server/gc.h
index b2eff44..7bc8d8d 100644
--- a/server/gc.h
+++ b/server/gc.h
@@ -3,11 +3,13 @@
 class GC {
 private:;
 
-	size_t max_size, max_slots, 
+	size_t max_slots, 
 		   interval, forced_clean, 
 		   forceclean_timer = 0;
+	uint64_t max_size;
 	bool running, alive;
 //  ptr, dealloc, ref, sz
+	uint32_t threshould;
 	void *q, *q_back;
 	void* handle;
 	std::atomic<uint32_t> slot_pos;
@@ -29,19 +31,21 @@ public:
 		);
 
 	GC(
-		uint32_t max_size = 0xfffffff, uint32_t max_slots = 4096, 
-		uint32_t interval = 10000, uint32_t forced_clean = 1000000 //one seconds
+		uint64_t max_size = 0xfffffff, uint32_t max_slots = 4096, 
+		uint32_t interval = 10000, uint32_t forced_clean = 1000000,
+		uint32_t threshould = 64 //one seconds
 	) : max_size(max_size), max_slots(max_slots), 
-		interval(interval), forced_clean(forced_clean){
+		interval(interval), forced_clean(forced_clean), 
+		threshould(threshould) {
 
 		start_deamon();
-		GC::gc = this;
+		GC::gc_handle = this;
 	} // 256 MB
 
 	~GC(){
 		terminate_daemon();
 	}
-	static GC* gc;
+	static GC* gc_handle;
     constexpr static void(*_free) (void*) = free;
 };
 
diff --git a/server/gc.hpp b/server/gc.hpp
deleted file mode 100644
index 246404c..0000000
--- a/server/gc.hpp
+++ /dev/null
@@ -1,130 +0,0 @@
-#pragma once
-#include <vector_type>
-#include <utility>
-#include <thread>
-#include <chrono>
-#include <atomic>
-#ifndef __AQ_USE_THREADEDGC__
-class GC {
-private:
-	template<class T>
-	using vector = vector_type<T>;
-	template<class ...T>
-	using tuple = std::tuple<T...>;
-
-	size_t current_size = 0, max_size, 
-		   interval, forced_clean, 
-		   forceclean_timer = 0;
-	bool running, alive;
-//  ptr, dealloc, ref, sz
-	vector<tuple<void*, void (*)(void*)>> *q, *q_back;
-	std::thread handle;
-	std::atomic<std::thread::id> lock;
-	// maybe use volatile std::thread::id instead
-protected:
-	void acquire_lock() {
-		auto this_pid = std::this_thread::get_id();
-		while(lock != this_pid)
-		{
-			while(lock != this_pid && lock != std::thread::id()) {
-				std::this_thread::sleep_for(std::chrono::milliseconds(0));
-			}
-			lock = this_pid;
-		}
-	}
-	
-	void release_lock(){
-		lock = std::thread::id();
-	}
-
-	void gc()
-	{
-		if (q->size == 0)
-			return;
-		auto t = q;
-		acquire_lock();
-		q = q_back;
-		release_lock();
-		for(const auto& t : *t) {
-			std::get<1>(t)(std::get<0>(t));
-		}
-		t->clear();
-		q_back = t;
-		running = false;
-		current_size = 0;
-	}
-
-
-	void daemon() {
-		using namespace std::chrono;
-
-		while (alive) {
-			if (running) {
-				if (current_size > max_size || 
-					forceclean_timer > forced_clean) 
-				{
-					gc();
-					forceclean_timer = 0;
-				}
-				std::this_thread::sleep_for(microseconds(interval));
-				forceclean_timer += interval;
-			}
-			else {
-				std::this_thread::sleep_for(10ms);
-				forceclean_timer += 10000;
-			}
-		}
-	}
-	void start_deamon() {
-		q = new vector<tuple<void*, void (*)(void*)>>();
-		q_back = new vector<tuple<void*, void (*)(void*)>>();
-		lock = thread::id();
-		alive = true;
-		handle = std::thread(&daemon);
-	}
-
-	void terminate_daemon() {
-		running = false;
-		alive = false;
-		delete q;
-		delete q_back;
-		using namespace std::chrono;
-
-		if (handle.joinable()) {
-			std::this_thread::sleep_for(microseconds(1000 + std::max(static_cast<size_t>(10000), interval)));
-			handle.join();
-		}
-	}
-public:
-	void reg(void* v, uint32_t sz = 1, 
-			void(*f)(void*) = [](void* v) {free (v); }
-		) {
-		acquire_lock();
-		current_size += sz;
-		q.push_back({ v, f });
-		running = true;
-		release_lock()
-	}
-
-	GC(
-		uint32_t max_size = 0xfffffff, uint32_t interval = 10000, 
-		uint32_t forced_clean = 1000000 //one seconds
-	) : max_size(max_size), interval(interval), forced_clean(forced_clean){
-		start_deamon();
-	} // 256 MB
-
-	~GC(){
-		terminate_daemon();
-	}
-};
-
-#else
-class GC {
-public:
-	GC(uint32_t) = default;
-	void reg(
-		void* v, uint32_t = 0, 
-		void(*f)(void*) = [](void* v) {free (v); }
-	) const { f(v); }
-}
-#endif
diff --git a/server/libaquery.cpp b/server/libaquery.cpp
index 2b3e802..af8fedc 100644
--- a/server/libaquery.cpp
+++ b/server/libaquery.cpp
@@ -313,6 +313,7 @@ void* Context::get_module_function(const char* fname){
     auto ret = fmap->find(fname);
     return ret == fmap->end() ? nullptr : ret->second;
 }
+
 // template<typename _Ty>
 // inline void vector_type<_Ty>::out(uint32_t n, const char* sep) const
 // {
@@ -328,10 +329,8 @@ void* Context::get_module_function(const char* fname){
 // }
 
 #include "gc.h"
-#include <vector_type>
 #include <utility>
 #include <thread>
-#include <chrono>
 #ifndef __AQ_USE_THREADEDGC__
 
 struct gcmemory_t{
@@ -341,41 +340,41 @@ struct gcmemory_t{
 
 using memoryqueue_t = gcmemory_t*;
 void GC::acquire_lock() {
-	auto this_tid = std::this_thread::get_id();
-	while(lock != this_tid)
-	{
-		while(lock != this_tid && lock != std::thread::id()) {
-			std::this_thread::sleep_for(std::chrono::milliseconds(0));
-		}
-		lock = this_tid;
-	}
+	// auto this_tid = std::this_thread::get_id();
+	// while(lock != this_tid)
+	// {
+	// 	while(lock != this_tid && lock != std::thread::id()) {
+	// 		std::this_thread::sleep_for(std::chrono::milliseconds(0));
+	// 	}
+	// 	lock = this_tid;
+	// }
 }
 
 void GC::release_lock(){
-	lock = std::thread::id();
+	// lock = std::thread::id();
 }
 
 void GC::gc()
 {
-	auto& _q = static_cast<memoryqueue_t*>(q);
-	auto& _q_back = static_cast<memoryqueue_t*>(q_back);
-	if (_q->size == 0)
+	auto _q = static_cast<memoryqueue_t>(q);
+	auto _q_back = static_cast<memoryqueue_t>(q_back);
+	if (slot_pos == 0)
 		return;
 	auto t = _q;
 	lock = true;
-	while(alive_cnt > 0);
-	_q = q_back;
+	while(alive_cnt != 0);
+	q = _q_back;
 	uint32_t _slot = slot_pos;
 	slot_pos = 0;
 	current_size = 0;
 	lock = false;
-	_q_back = t;
+	q_back = t;
 
 	for(uint32_t i = 0; i < _slot; ++i){
-		if (_q_back[i]->memory != nullptr && _q_back[i]->deallocator != nullptr)
-			_q_back[i]->deallocator(_q_back[i]->memory);
+		if (_q[i].memory != nullptr && _q[i].deallocator != nullptr)
+			_q[i].deallocator(_q[i].memory);
 	}
-	memset(_q_back, 0, sizeof(gcmemory_t) * _slot);
+	memset(_q, 0, sizeof(gcmemory_t) * _slot);
 	running = false;
 }
 
@@ -384,7 +383,7 @@ void GC::daemon() {
 
 	while (alive) {
 		if (running) {
-			if (current_size > max_size || 
+			if (current_size - max_size > 0 || 
 				forceclean_timer > forced_clean) 
 			{
 				gc();
@@ -433,16 +432,24 @@ void GC::reg(void* v, uint32_t sz, void(*f)(void*)) { //~ 40ns expected v. free
 		f(v);
 		return;
 	}
-	auto _q = static_cast<memoryqueue_t>q;
+	auto _q = static_cast<memoryqueue_t>(q);
 	while(lock);
 	++alive_cnt;
 	current_size += sz;
 	auto _slot = (slot_pos += 1);
-	q[_slot] = {v, f};
+	_q[_slot] = {v, f};
 	--alive_cnt;
 	running = true;
 }
 
 #endif
 
-static GC* GC::gc = nullptr;
+GC* GC::gc_handle = nullptr;
+
+#include "dragonbox/dragonbox_to_chars.h"
+void test(){
+	char buf[32];
+	double d = 123456789.123456789;
+	auto ret = jkj::dragonbox::to_chars(d, buf);
+	printf("%s\n", buf);
+}
diff --git a/server/libaquery.h b/server/libaquery.h
index 1d6f864..e737396 100644
--- a/server/libaquery.h
+++ b/server/libaquery.h
@@ -7,9 +7,31 @@
 	#define __AQ_THREADED_GC__
 #endif
 
-#include "table.h"
 #include <unordered_map>
 #include <chrono>
+class aq_timer {
+private:
+	std::chrono::high_resolution_clock::time_point now;
+public:
+	aq_timer(){
+		now = std::chrono::high_resolution_clock::now();
+	}
+	void reset(){
+		now = std::chrono::high_resolution_clock::now();
+	}
+	long long elapsed(){
+		long long ret = (std::chrono::high_resolution_clock::now() - now).count();
+		reset();
+		return ret;
+	}
+	long long lap() const{
+		long long ret = (std::chrono::high_resolution_clock::now() - now).count();
+		return ret;
+	}
+};
+
+#include "table.h"
+
 
 enum Log_level {
 	LOG_INFO,
@@ -84,26 +106,7 @@ struct Context{
     std::unordered_map<const char*, uColRef *> cols;
 };
 
-class aq_timer {
-private:
-	std::chrono::high_resolution_clock::time_point now;
-public:
-	aq_timer(){
-		now = std::chrono::high_resolution_clock::now();
-	}
-	void reset(){
-		now = std::chrono::high_resolution_clock::now();
-	}
-	long long elapsed(){
-		long long ret = (std::chrono::high_resolution_clock::now() - now).count();
-		reset();
-		return ret;
-	}
-	long long lap() const{
-		long long ret = (std::chrono::high_resolution_clock::now() - now).count();
-		return ret;
-	}
-};
+
 
 #ifdef _WIN32
 #define __DLLEXPORT__  __declspec(dllexport) __stdcall 
@@ -115,3 +118,5 @@ public:
 typedef void (*deallocator_t) (void*);
 
 #endif
+
+void test();
\ No newline at end of file
diff --git a/server/server.cpp b/server/server.cpp
index ed97bc4..eaea30e 100644
--- a/server/server.cpp
+++ b/server/server.cpp
@@ -3,6 +3,7 @@
 #include <iostream>
 #include <string>
 #include <chrono>
+#include <thread>
 
 #include "libaquery.h"
 #include "monetdb_conn.h"
@@ -16,6 +17,7 @@
 #include <dlfcn.h>
 #include <fcntl.h>
 #include <sys/mman.h>
+#include <atomic>
 
 // fast numeric to string conversion
 #include "jeaiii_to_text.h"
@@ -171,6 +173,7 @@ void initialize_module(const char* module_name, void* module_handle, Context* cx
 }
 
 int dll_main(int argc, char** argv, Context* cxt){
+    test();
     aq_timer timer;
     Config *cfg = reinterpret_cast<Config *>(argv[0]);
     std::unordered_map<std::string, void*> user_module_map;
diff --git a/server/table.h b/server/table.h
index 20e2449..5727ddc 100644
--- a/server/table.h
+++ b/server/table.h
@@ -213,7 +213,7 @@ template<>
 class ColRef<void> : public ColRef<int> {};
 
 template<typename _Ty>
-class ColView {
+class ColView : public vector_base<_Ty> {
 public:
 	typedef ColRef<_Ty> Decayed_t;
 	const uint32_t size;
diff --git a/server/utils.h b/server/utils.h
index f96937f..6a7eb07 100644
--- a/server/utils.h
+++ b/server/utils.h
@@ -2,31 +2,17 @@
 
 #include <ctime>
 #include <type_traits>
-#include<string>
+#include <string>
 
 #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L)
 constexpr static bool cpp_17 = true;
 #else
 constexpr static bool cpp_17 = false;
 #endif
+
 template <class T>
 inline const char* str(const T& v) {
 	return "";
 }
 
-template <class T>
-constexpr char* aq_itoa(T t, char* buf){
-	if constexpr (std::is_signed<T>::value){
-		if (t < 0){
-			*buf++ = '-';
-			t = -t;
-		}
-	}
-	while(t > 0){
-		*buf++ = t%10 + '0';
-		t /= 10;
-	}
-	return buf;
-}
-
 extern std::string base62uuid(int l = 6);
diff --git a/server/vector_type.hpp b/server/vector_type.hpp
index 3ad5fcc..ceb62e3 100644
--- a/server/vector_type.hpp
+++ b/server/vector_type.hpp
@@ -17,13 +17,15 @@
 #include "types.h"
 
 #pragma pack(push, 1)
+template<class T>
+struct vector_base {};
 struct vectortype_cstorage{
 	void* container;
 	unsigned int size, capacity;
 };
 
 template <typename _Ty>
-class vector_type {
+class vector_type : public vector_base<_Ty>{
 public:
 	typedef vector_type<_Ty> Decayed_t;
 	void inline _copy(const vector_type<_Ty>& vt) {

From 95d94468a9a3dfe6bfa9130b09909b5c0a0bba7e Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Sat, 12 Nov 2022 01:06:48 +0800
Subject: [PATCH 16/30] super fast export(console/wip:file)

---
 Makefile                                      |  4 +-
 README.md                                     |  2 +-
 server/dragonbox/dragonbox_to_chars.h         |  4 +-
 ...ox_to_chars.cpp => dragonbox_to_chars.hpp} |  2 +
 server/jeaiii_to_text.h                       | 19 ++--
 server/libaquery.cpp                          | 78 +++++++++++++++--
 server/libaquery.h                            | 36 +++++++-
 server/monetdb_conn.cpp                       |  8 +-
 server/server.cpp                             | 86 ++++++++++++++++++-
 9 files changed, 215 insertions(+), 24 deletions(-)
 rename server/dragonbox/{dragonbox_to_chars.cpp => dragonbox_to_chars.hpp} (99%)

diff --git a/Makefile b/Makefile
index a3aa69d..cd0d8e5 100644
--- a/Makefile
+++ b/Makefile
@@ -15,8 +15,8 @@ FPIC = -fPIC
 COMPILER = $(shell $(CXX) --version | grep -q clang && echo clang|| echo gcc) 
 LIBTOOL = ar rcs
 USELIB_FLAG = -Wl,--whole-archive,libaquery.a -Wl,-no-whole-archive
-LIBAQ_SRC = server/monetdb_conn.cpp server/libaquery.cpp server/dragonbox/dragonbox_to_chars.cpp
-LIBAQ_OBJ = monetdb_conn.o libaquery.o dragonbox_to_chars.o
+LIBAQ_SRC = server/monetdb_conn.cpp server/libaquery.cpp 
+LIBAQ_OBJ = monetdb_conn.o libaquery.o
 SEMANTIC_INTERPOSITION = -fno-semantic-interposition
 RANLIB = ranlib
 
diff --git a/README.md b/README.md
index 2e6f0f2..a0ec936 100644
--- a/README.md
+++ b/README.md
@@ -306,5 +306,5 @@ DROP TABLE my_table IF EXISTS
   Author: James Edward Anhalt III <br>
   License (MIT): https://github.com/jeaiii/itoa/blob/main/LICENSE
 
-- [MobetDB] (https://www.monetdb.org) <br>
+- [MonetDB](https://www.monetdb.org) <br>
   License (Mozilla Public License): https://github.com/MonetDB/MonetDB/blob/master/license.txt
diff --git a/server/dragonbox/dragonbox_to_chars.h b/server/dragonbox/dragonbox_to_chars.h
index e22a2b4..ca5384f 100644
--- a/server/dragonbox/dragonbox_to_chars.h
+++ b/server/dragonbox/dragonbox_to_chars.h
@@ -47,8 +47,8 @@ namespace jkj::dragonbox {
                                                                          result.exponent, buffer);
                 }
                 else {
-                    std::memcpy(buffer, "0E0", 3);
-                    return buffer + 3;
+                    *buffer = '0';
+                    return buffer + 1;
                 }
             }
             else {
diff --git a/server/dragonbox/dragonbox_to_chars.cpp b/server/dragonbox/dragonbox_to_chars.hpp
similarity index 99%
rename from server/dragonbox/dragonbox_to_chars.cpp
rename to server/dragonbox/dragonbox_to_chars.hpp
index 75b82b6..7199d74 100644
--- a/server/dragonbox/dragonbox_to_chars.cpp
+++ b/server/dragonbox/dragonbox_to_chars.hpp
@@ -15,6 +15,7 @@
 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.
 
+#pragma once 
 
 #include "dragonbox_to_chars.h"
 
@@ -517,3 +518,4 @@ namespace jkj::dragonbox {
         }
     }
 }
+
diff --git a/server/jeaiii_to_text.h b/server/jeaiii_to_text.h
index a4f1a53..63fd9ee 100644
--- a/server/jeaiii_to_text.h
+++ b/server/jeaiii_to_text.h
@@ -1,4 +1,4 @@
-
+#pragma once 
 // Copyright (c) 2022 James Edward Anhalt III - https://github.com/jeaiii/itoa
 using u32 = decltype(0xffffffff);
 using u64 = decltype(0xffffffffffffffff);
@@ -81,14 +81,18 @@ inline char* to_text(char text[], long long i) { return to_text_from_integer(tex
 inline char* to_text(char text[], unsigned long long i) { return to_text_from_integer(text, i); }
 
 // Copyright (c) 2022 Bill Sun
+
+//#if defined(SIZEOF___INT128) || (defined(SIZEOF___INT128_T) && defined(SIZEOF___UINT128_T))
 constexpr static __uint128_t _10_19 = 10000000000000000000ull, 
     _10_37 = _10_19*_10_19 / 10;
 
 template<class T>
 char* jeaiii_i128(char* buf, T v){
-    if (v < 0){
-        *(buf++) = '0';
-        v = -v;
+    if constexpr (std::is_signed_v<T>) {
+        if (v < 0){
+            *(buf++) = '0';
+            v = -v;
+        }
     }
     if (v > _10_37){
         uint8_t vv = uint8_t(v/_10_37);
@@ -96,8 +100,8 @@ char* jeaiii_i128(char* buf, T v){
         // if (vv < 20)
         //     *buf ++ = digits_00_99[vv + 1];
         // else{
-        //     *buf++ = digits_00_99[vv ];
-        //     *buf++ = digits_00_99[vv + 1];
+        //     memcpy(buf, digits_00_99 + vv, 2);
+        //     buf += 2;
         // }  
     
         *(buf++) = vv%10 + '0';
@@ -113,4 +117,5 @@ char* jeaiii_i128(char* buf, T v){
     
     buf = to_text(buf, uint64_t(v % _10_19));
     return buf;
-}
\ No newline at end of file
+}
+// #endif
diff --git a/server/libaquery.cpp b/server/libaquery.cpp
index af8fedc..320852a 100644
--- a/server/libaquery.cpp
+++ b/server/libaquery.cpp
@@ -446,10 +446,76 @@ void GC::reg(void* v, uint32_t sz, void(*f)(void*)) { //~ 40ns expected v. free
 
 GC* GC::gc_handle = nullptr;
 
-#include "dragonbox/dragonbox_to_chars.h"
-void test(){
-	char buf[32];
-	double d = 123456789.123456789;
-	auto ret = jkj::dragonbox::to_chars(d, buf);
-	printf("%s\n", buf);
+#include "dragonbox/dragonbox_to_chars.hpp" 
+
+
+template<>
+char*
+aq_to_chars<float>(void* value, char* buffer) { 
+    return jkj::dragonbox::to_chars_n(*static_cast<float*>(value), buffer);
+}
+template<>
+char*
+aq_to_chars<double>(void* value, char* buffer) { 
+    return jkj::dragonbox::to_chars_n(*static_cast<double*>(value), buffer);
+}
+
+template<>
+inline char*
+aq_to_chars<bool>(void* value, char* buffer) {
+	if (*static_cast<bool*>(value)){
+		memcpy(buffer, "true", 4);
+		return buffer + 4;
+	}
+	else{
+		memcpy(buffer, "false", 5);
+		return buffer + 5;
+	}
+}
+
+template<>
+inline char*
+aq_to_chars<char*>(void* value, char* buffer) {
+	const auto src = *static_cast<char**>(value);
+	const auto len = strlen(src);
+	memcpy(buffer, src, len);
+	return buffer + len;
 }
+
+template<>
+inline char*
+aq_to_chars<types::date_t>(void* value, char* buffer) {
+	const auto& src = *static_cast<types::date_t*>(value);
+	buffer = to_text(buffer, src.year);
+	*buffer++ = '-';
+	buffer = to_text(buffer, src.month);
+	*buffer++ = '-';
+	buffer = to_text(buffer, src.day);
+	return buffer;
+}
+
+template<>
+inline char*
+aq_to_chars<types::time_t>(void* value, char* buffer) {
+	const auto& src = *static_cast<types::time_t*>(value);
+	buffer = to_text(buffer, src.hours);
+	*buffer++ = ':';
+	buffer = to_text(buffer, src.minutes);
+	*buffer++ = ':';
+	buffer = to_text(buffer, src.seconds);
+	*buffer++ = ':';
+	buffer = to_text(buffer, src.ms);
+	return buffer;
+}
+
+template<>
+inline char*
+aq_to_chars<types::timestamp_t>(void* value, char* buffer) {
+	auto& src = *static_cast<types::timestamp_t*>(value);
+	buffer = aq_to_chars<types::date_t>(static_cast<void*>(&src.date), buffer);
+	*buffer++ = ' ';
+	buffer = aq_to_chars<types::date_t>(static_cast<void*>(&src.time), buffer);
+	return buffer;
+}
+
+
diff --git a/server/libaquery.h b/server/libaquery.h
index e737396..cc0b5a9 100644
--- a/server/libaquery.h
+++ b/server/libaquery.h
@@ -117,6 +117,40 @@ struct Context{
 #define __AQEXPORT__(_Ty) extern "C" _Ty __DLLEXPORT__ 
 typedef void (*deallocator_t) (void*);
 
+
+#include <type_traits>
+#include "jeaiii_to_text.h"
+
+template<class T>
+inline std::enable_if_t<std::is_integral_v<T>, char *> 
+aq_to_chars(void* value, char* buffer) { 
+	return to_text(buffer, *static_cast<T*>(value));
+}
+
+template<class T>
+inline std::enable_if_t<!std::is_integral_v<T>, char *> 
+aq_to_chars(void* value, char* buffer) {
+	return buffer;
+}
+
+#ifdef __SIZEOF_INT128__
+template<>
+inline char*
+aq_to_chars<__int128_t>(void* value, char* buffer) {
+    return jeaiii_i128<__int128_t>(buffer, *static_cast<__int128_t*>(value));
+}
+
+template<>
+inline char*
+aq_to_chars<__uint128_t>(void* value, char* buffer) {
+    return jeaiii_i128<__uint128_t>(buffer, *static_cast<__uint128_t*>(value));
+}
 #endif
 
-void test();
\ No newline at end of file
+template<> char* aq_to_chars<float>(void* , char*);
+template<> char* aq_to_chars<double>(void* , char*);
+template<> char* aq_to_chars<char*>(void* , char*);
+template<> char* aq_to_chars<types::date_t>(void* , char*);
+template<> char* aq_to_chars<types::time_t>(void* , char*);
+template<> char* aq_to_chars<types::timestamp_t>(void* , char*);
+#endif
diff --git a/server/monetdb_conn.cpp b/server/monetdb_conn.cpp
index 644c1c4..b3fbd1e 100644
--- a/server/monetdb_conn.cpp
+++ b/server/monetdb_conn.cpp
@@ -9,7 +9,7 @@
 
 #undef static_assert
 
-const char* monetdbe_type_str[] = {
+constexpr const char* monetdbe_type_str[] = {
 	"monetdbe_bool", "monetdbe_int8_t", "monetdbe_int16_t", "monetdbe_int32_t", "monetdbe_int64_t",
 #ifdef HAVE_HGE
 	"monetdbe_int128_t",
@@ -22,7 +22,7 @@ const char* monetdbe_type_str[] = {
 	"monetdbe_type_unknown"
 } ;
 
-const unsigned char monetdbe_type_szs[] = {
+inline constexpr static unsigned char monetdbe_type_szs[] = {
     sizeof(monetdbe_column_bool::null_value), sizeof(monetdbe_column_int8_t::null_value), 
     sizeof(monetdbe_column_int16_t::null_value), sizeof(monetdbe_column_int32_t::null_value), 
     sizeof(monetdbe_column_int64_t::null_value),
@@ -37,8 +37,9 @@ const unsigned char monetdbe_type_szs[] = {
     // should be last:
     1
 };
+
 namespace types{
-    const Type_t monetdbe_type_aqtypes[] = {
+    constexpr const Type_t monetdbe_type_aqtypes[] = {
         ABOOL, AINT8, AINT16, AINT32, AINT64, 
 #ifdef HAVE_HGE
         AINT128,
@@ -50,6 +51,7 @@ namespace types{
 
     };
 }
+
 Server::Server(Context* cxt){
     if (cxt){
         connect(cxt);
diff --git a/server/server.cpp b/server/server.cpp
index eaea30e..446baf9 100644
--- a/server/server.cpp
+++ b/server/server.cpp
@@ -161,6 +161,89 @@ __AQEXPORT__(bool) have_hge(){
 #endif
 }
 
+using prt_fn_t = char* (*)(void*, char*);
+
+
+constexpr prt_fn_t monetdbe_prtfns[] = {
+	aq_to_chars<bool>, aq_to_chars<int8_t>, aq_to_chars<int16_t>, aq_to_chars<int32_t>, 
+	aq_to_chars<int64_t>,
+#if __SIZEOF_INT128__
+	aq_to_chars<__int128_t>, 
+#endif
+	aq_to_chars<size_t>, aq_to_chars<float>, aq_to_chars<double>,
+	aq_to_chars<char*>, aq_to_chars<nullptr_t>,
+	aq_to_chars<types::date_t>, aq_to_chars<types::time_t>, aq_to_chars<types::timestamp_t>,
+
+	// should be last:
+	aq_to_chars<nullptr_t>
+};
+
+#include "monetdbe.h"
+inline constexpr static unsigned char monetdbe_type_szs[] = {
+    sizeof(monetdbe_column_bool::null_value), sizeof(monetdbe_column_int8_t::null_value), 
+    sizeof(monetdbe_column_int16_t::null_value), sizeof(monetdbe_column_int32_t::null_value), 
+    sizeof(monetdbe_column_int64_t::null_value),
+#ifdef __SIZEOF_INT128__
+    sizeof(monetdbe_column_int128_t::null_value),
+#endif
+    sizeof(monetdbe_column_size_t::null_value), sizeof(monetdbe_column_float::null_value),
+    sizeof(monetdbe_column_double::null_value),
+    sizeof(monetdbe_column_str::null_value), sizeof(monetdbe_column_blob::null_value),
+    sizeof(monetdbe_data_date), sizeof(monetdbe_data_time), sizeof(monetdbe_data_timestamp),
+
+    // should be last:
+    1
+};
+constexpr uint32_t output_buffer_size = 65536;
+void print_monetdb_results(Server* srv, const char* sep = " ", const char* end = "\n", 
+    uint32_t limit = std::numeric_limits<uint32_t>::max()) {
+    if (!srv->haserror() && limit){
+        char buffer[output_buffer_size];
+        auto _res = static_cast<monetdbe_result*> (srv->res);
+        const auto& ncols = _res->ncols;
+        monetdbe_column** cols = static_cast<monetdbe_column**>(malloc(sizeof(monetdbe_column*) * ncols));
+        prt_fn_t *prtfns = (prt_fn_t*) alloca(sizeof(prt_fn_t) * ncols);
+        char** col_data = static_cast<char**> (alloca(sizeof(char*) * ncols));
+        uint8_t* szs = static_cast<uint8_t*>(alloca(ncols));
+        std::string header_string = "";
+        const char* err_msg = nullptr;
+        for(uint32_t i = 0; i < ncols; ++i){
+            err_msg = monetdbe_result_fetch(_res, &cols[i], i);
+            col_data[i] = static_cast<char *>(cols[i]->data);
+            prtfns[i] = monetdbe_prtfns[cols[i]->type];
+            szs [i] = monetdbe_type_szs[cols[i]->type];
+            header_string = header_string + cols[i]->name + sep + '|' + sep;
+        }
+        const size_t l_sep = strlen(sep);
+        const size_t l_end = strlen(end);
+		if (header_string.size() - l_sep - 1>= 0)
+			header_string.resize(header_string.size() - l_sep - 1);
+        header_string += end + std::string(header_string.size(), '=') + end;
+        fputs(header_string.c_str(), stdout);
+        char* _buffer = buffer;
+        for(uint64_t i = 0; i < srv->cnt; ++i){
+            for(uint32_t j = 0; j < ncols; ++j){
+                //copy the field to buf
+                _buffer = prtfns[j](col_data[j], _buffer);
+                if (j != ncols - 1){
+                    memcpy(_buffer, sep, l_sep);
+                    _buffer += l_sep;
+                }
+                col_data[j] += szs[j];
+            }
+            memcpy(_buffer, end, l_end);
+            _buffer += l_end;
+            if(output_buffer_size - (_buffer - buffer) <= 1024){
+                fwrite(buffer, 1, _buffer - buffer, stdout);
+                _buffer = buffer;
+            }
+        }
+        if (_buffer != buffer)
+            fwrite(buffer, 1, _buffer - buffer, stdout);
+        free(cols);
+    }
+}
+
 
 void initialize_module(const char* module_name, void* module_handle, Context* cxt){
     auto _init_module = reinterpret_cast<module_init_fn>(dlsym(module_handle, "init_session"));
@@ -173,7 +256,6 @@ void initialize_module(const char* module_name, void* module_handle, Context* cx
 }
 
 int dll_main(int argc, char** argv, Context* cxt){
-    test();
     aq_timer timer;
     Config *cfg = reinterpret_cast<Config *>(argv[0]);
     std::unordered_map<std::string, void*> user_module_map;
@@ -266,7 +348,7 @@ int dll_main(int argc, char** argv, Context* cxt){
                             {
                                 if(!server->haserror()){
                                     timer.reset();
-                                    server->print_results();        
+                                    print_monetdb_results(server);        
                                     cfg->stats.postproc_time += timer.elapsed();
                                 }
                             }

From e199c6700a1cbc383f1d18d505ca1c9b135aa8dd Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Sun, 13 Nov 2022 02:57:40 +0800
Subject: [PATCH 17/30] fixed when ret in enable_if<false, ret> is invalid

---
 server/table.h         | 65 +++++++++++++++++++++++-------------------
 server/vector_type.hpp |  1 +
 2 files changed, 36 insertions(+), 30 deletions(-)

diff --git a/server/table.h b/server/table.h
index 5727ddc..af26ae7 100644
--- a/server/table.h
+++ b/server/table.h
@@ -720,51 +720,56 @@ inline void TableInfo<Types...>::print(const char* __restrict sep, const char* _
 		std::cout << end;
 	}
 }
+
+// use std::is_base_of here and all vt classes should derive from vector_base
 template <class T1,
 			template<typename> class VT,
 			class TRet>
-using test_vt_support = typename std::enable_if_t<std::is_same_v<VT<T1>, ColRef<T1>> || 
-				std::is_same_v<VT<T1>, ColView<T1>> || 
-				std::is_same_v<VT<T1>, vector_type<T1>>, TRet>;
+using test_vt_support = typename std::enable_if_t<
+					std::is_base_of_v<vector_base<T1>, VT<T1>>, 
+					TRet>;
 
-template <class T1, class T2,
-			template<typename> class VT>
-using get_autoext_type = test_vt_support<T1, VT, 
-		decayed_t<VT, typename types::Coercion<T1, T2>::type>>;
 
-template <class T1, class T2,
-			template<typename> class VT>
-using get_long_type = test_vt_support<T1, VT, 
-		decayed_t<VT, types::GetLongType<typename types::Coercion<T1, T2>::type>>>;
+template <class T1, class T2, template<typename> class VT, 
+			test_vt_support<T1, VT, void>* = nullptr>
+using get_autoext_type = 
+		decayed_t<VT, typename types::Coercion<T1, T2>::type>;
 
-template <class T1, class T2,
-			template<typename> class VT>
-using get_fp_type = test_vt_support<T1, VT, 
-		decayed_t<VT, types::GetFPType<typename types::Coercion<T1, T2>::type>>>;
+template <class T1, class T2, template<typename> class VT, 
+		test_vt_support<T1, VT, void>* = nullptr>
+using get_long_type = 
+		decayed_t<VT, types::GetLongType<typename types::Coercion<T1, T2>::type>>;
+
+template <class T1, class T2, template<typename> class VT,
+		test_vt_support<T1, VT, void>* = nullptr>
+using get_fp_type = 
+		decayed_t<VT, types::GetFPType<typename types::Coercion<T1, T2>::type>>;
 
 template <class T1, 
 			template<typename> class VT, template<typename> class VT2,
 			class TRet>
-using test_vt_support2 = typename std::enable_if_t<(std::is_same_v<VT<T1>, ColRef<T1>> || 
-				std::is_same_v<VT<T1>, ColView<T1>> || 
-				std::is_same_v<VT<T1>, vector_type<T1>>) &&
-				(std::is_same_v<VT2<T1>, ColRef<T1>> || 
-				std::is_same_v<VT2<T1>, ColView<T1>> || 
-				std::is_same_v<VT2<T1>, vector_type<T1>>), TRet >;
+using test_vt_support2 = typename std::enable_if_t<
+				std::is_base_of_v<vector_base<T1>, VT<T1>> &&
+				std::is_base_of_v<vector_base<T1>, VT2<T1>>, 
+				TRet >;
+
 template <class T1, class T2,
-			template<typename> class VT, template<typename> class VT2>
-using get_autoext_type2 = test_vt_support2<T1, VT, VT2,
-		decayed_t<VT, typename types::Coercion<T1, T2>::type>>;
+			template<typename> class VT, template<typename> class VT2, 
+			test_vt_support2<T1, VT, VT2, void>* = nullptr >
+using get_autoext_type2 = 
+		decayed_t<VT, typename types::Coercion<T1, T2>::type>;
 
 template <class T1, class T2,
-			template<typename> class VT, template<typename> class VT2>
-using get_long_type2 = test_vt_support2<T1, VT, VT2,
-		decayed_t<VT, types::GetLongType<typename types::Coercion<T1, T2>::type>>>;
+			template<typename> class VT, template<typename> class VT2, 
+			test_vt_support2<T1, VT, VT2, void>* = nullptr >
+using get_long_type2 = 
+		decayed_t<VT, types::GetLongType<typename types::Coercion<T1, T2>::type>>;
 
 template <class T1, class T2,
-			template<typename> class VT, template<typename> class VT2>
-using get_fp_type2 = test_vt_support2<T1, VT, VT2,
-		decayed_t<VT, types::GetFPType<typename types::Coercion<T1, T2>::type>>>;
+			template<typename> class VT, template<typename> class VT2, 
+			test_vt_support2<T1, VT, VT2, void>* = nullptr >
+using get_fp_type2 = 
+		decayed_t<VT, types::GetFPType<typename types::Coercion<T1, T2>::type>>;
 
 template <class T1, class T2, template<typename> class VT, template<typename> class VT2>
 get_autoext_type2<T1, T2, VT, VT2>
diff --git a/server/vector_type.hpp b/server/vector_type.hpp
index ceb62e3..f817051 100644
--- a/server/vector_type.hpp
+++ b/server/vector_type.hpp
@@ -19,6 +19,7 @@
 #pragma pack(push, 1)
 template<class T>
 struct vector_base {};
+
 struct vectortype_cstorage{
 	void* container;
 	unsigned int size, capacity;

From 690692bc8043553da205209b1286a5a9edb04d0e Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Sun, 13 Nov 2022 04:02:33 +0800
Subject: [PATCH 18/30] bug fixes on print

---
 README.md            |  2 +-
 server/libaquery.cpp | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index a0ec936..17dd493 100644
--- a/README.md
+++ b/README.md
@@ -293,7 +293,7 @@ DROP TABLE my_table IF EXISTS
   Author: Kyle Lahnakoski <br>
   License (Mozilla Public License 2.0): https://github.com/klahnakoski/mo-sql-parsing/blob/dev/LICENSE 
 
-- [Fast C++ CSV pParser](https://github.com/ben-strasser/fast-cpp-csv-parser) <br>
+- [Fast C++ CSV Parser](https://github.com/ben-strasser/fast-cpp-csv-parser) <br>
   Author: Ben Strasser <br>
   License (BSD 3-Clause License): https://github.com/ben-strasser/fast-cpp-csv-parser/blob/master/LICENSE
 
diff --git a/server/libaquery.cpp b/server/libaquery.cpp
index 320852a..93a03da 100644
--- a/server/libaquery.cpp
+++ b/server/libaquery.cpp
@@ -474,7 +474,7 @@ aq_to_chars<bool>(void* value, char* buffer) {
 }
 
 template<>
-inline char*
+char*
 aq_to_chars<char*>(void* value, char* buffer) {
 	const auto src = *static_cast<char**>(value);
 	const auto len = strlen(src);
@@ -483,7 +483,7 @@ aq_to_chars<char*>(void* value, char* buffer) {
 }
 
 template<>
-inline char*
+char*
 aq_to_chars<types::date_t>(void* value, char* buffer) {
 	const auto& src = *static_cast<types::date_t*>(value);
 	buffer = to_text(buffer, src.year);
@@ -495,7 +495,7 @@ aq_to_chars<types::date_t>(void* value, char* buffer) {
 }
 
 template<>
-inline char*
+char*
 aq_to_chars<types::time_t>(void* value, char* buffer) {
 	const auto& src = *static_cast<types::time_t*>(value);
 	buffer = to_text(buffer, src.hours);
@@ -509,12 +509,12 @@ aq_to_chars<types::time_t>(void* value, char* buffer) {
 }
 
 template<>
-inline char*
+char*
 aq_to_chars<types::timestamp_t>(void* value, char* buffer) {
 	auto& src = *static_cast<types::timestamp_t*>(value);
 	buffer = aq_to_chars<types::date_t>(static_cast<void*>(&src.date), buffer);
 	*buffer++ = ' ';
-	buffer = aq_to_chars<types::date_t>(static_cast<void*>(&src.time), buffer);
+	buffer = aq_to_chars<types::time_t>(static_cast<void*>(&src.time), buffer);
 	return buffer;
 }
 

From d33a5fdd3120978cf1572227c469fd2c89d90bdb Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Sun, 13 Nov 2022 04:15:32 +0800
Subject: [PATCH 19/30] bug fixes

---
 server/server.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/server/server.cpp b/server/server.cpp
index 446baf9..e4c0a0f 100644
--- a/server/server.cpp
+++ b/server/server.cpp
@@ -171,11 +171,11 @@ constexpr prt_fn_t monetdbe_prtfns[] = {
 	aq_to_chars<__int128_t>, 
 #endif
 	aq_to_chars<size_t>, aq_to_chars<float>, aq_to_chars<double>,
-	aq_to_chars<char*>, aq_to_chars<nullptr_t>,
+	aq_to_chars<char*>, aq_to_chars<std::nullptr_t>,
 	aq_to_chars<types::date_t>, aq_to_chars<types::time_t>, aq_to_chars<types::timestamp_t>,
 
 	// should be last:
-	aq_to_chars<nullptr_t>
+	aq_to_chars<std::nullptr_t>
 };
 
 #include "monetdbe.h"

From f0940ae44222a0fe3cfa20ee14ee24d3ff1a2e2e Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Sun, 13 Nov 2022 14:22:42 +0800
Subject: [PATCH 20/30] added initial support for union_all and except

---
 reconstruct/ast.py | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/reconstruct/ast.py b/reconstruct/ast.py
index 8486ae8..953981b 100644
--- a/reconstruct/ast.py
+++ b/reconstruct/ast.py
@@ -1054,7 +1054,7 @@ class insert(ast_node):
         self.sql += ', '.join(list_values) 
         
 
-class delete_table(ast_node):
+class delete_from(ast_node):
     name = 'delete'
     first_order = name
     def init(self, node):
@@ -1066,6 +1066,31 @@ class delete_table(ast_node):
         if 'where' in node:
             self.sql += filter(self, node['where']).sql
 
+class union_all(ast_node):
+    name = 'union_all'
+    first_order = name
+    sql_name = 'UNION ALL'
+    def produce(self, node):
+        queries = node[self.name]
+        generated_queries : List[Optional[projection]] = [None] * len(queries)
+        is_standard = True
+        for i, q in enumerate(queries):
+            if 'select' in q:
+                generated_queries[i] = projection(self, q)
+                is_standard &= not generated_queries[i].has_postproc
+        if is_standard:
+            self.sql = f' {self.sql_name} '.join([q.sql for q in generated_queries])
+        else:
+            raise NotImplementedError(f"{self.sql_name} only support standard sql for now")
+    def consume(self, node):
+        super().consume(node)
+        self.context.direct_output()
+
+class except_clause(union_all):
+    name = 'except'
+    first_order = name
+    sql_name = 'EXCEPT'
+    
 class load(ast_node):
     name="load"
     first_order = name

From 5591564e476f3487d561f0148f73ea6584c284f3 Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Sun, 13 Nov 2022 14:35:49 +0800
Subject: [PATCH 21/30] updated readme

---
 README.md        | 7 +++++++
 aquery_config.py | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index fb1e715..091722e 100644
--- a/README.md
+++ b/README.md
@@ -223,6 +223,13 @@ DROP TABLE my_table IF EXISTS
 - File name can also be absolute path.
 - See `data/q1.sql` for more information 
 
+## Combine Queries
+- `UNION ALL` is a bag union of two query results with same schema. e.g. 
+```
+SELECT * FROM table 1 UNION ALL SELECT * FROM table 2
+```
+- `EXCEPT` clause will return the difference of two query results. e.g.
+  
 ## Delete Data:
 - Use a query like `DELETE FROM <table_name> [WHERE <conditions>]` to delete rows from a table that matches the conditions.
   
diff --git a/aquery_config.py b/aquery_config.py
index 0327d06..f0e64a9 100644
--- a/aquery_config.py
+++ b/aquery_config.py
@@ -2,7 +2,7 @@
 
 ## GLOBAL CONFIGURATION FLAGS
 
-version_string = '0.5.1a'
+version_string = '0.5.2a'
 add_path_to_ldpath = True
 rebuild_backend = False
 run_backend = True

From a5f22475b47f791743fd0bf95cdfe2590a0a0d55 Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Mon, 14 Nov 2022 02:14:43 +0800
Subject: [PATCH 22/30] fixed link error for GNU ld

---
 Makefile | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/Makefile b/Makefile
index cd0d8e5..7c9cdf9 100644
--- a/Makefile
+++ b/Makefile
@@ -19,6 +19,12 @@ LIBAQ_SRC = server/monetdb_conn.cpp server/libaquery.cpp
 LIBAQ_OBJ = monetdb_conn.o libaquery.o
 SEMANTIC_INTERPOSITION = -fno-semantic-interposition
 RANLIB = ranlib
+LINKER_BINARY = $(shell $(CXX) -print-prog-name=ld | grep -q llvm && echo lld || echo ld)
+ifeq (LINKER_BINARY, ld )
+	LINKER_FLAGS = -Wl,--allow-multiple-definition
+else
+	LINKER_FLAGS =
+endif
 
 ifeq ($(COMPILER), clang )
 	CLANG_GE_10 = $(shell expr `$(CXX) -dumpversion | cut -f1 -d.` \>= 10)
@@ -123,21 +129,21 @@ libaquery:
 	$(RANLIB) libaquery.a
 
 warmup:
-	$(CXX) $(SHAREDFLAGS) msc-plugin/dummy.cpp libaquery.a -o dll.so
+	$(CXX)  msc-plugin/dummy.cpp libaquery.a $(SHAREDFLAGS) -o dll.so
 server.bin:
-	$(CXX) $(LIBAQ_SRC) $(BINARYFLAGS) $(OS_SUPPORT) -o server.bin
+	$(CXX) $(LIBAQ_SRC) $(OS_SUPPORT) $(BINARYFLAGS) -o server.bin
 launcher:
 	$(CXX) -D__AQ_BUILD_LAUNCHER__ $(LIBAQ_SRC) $(OS_SUPPORT) $(BINARYFLAGS) -o aq
 server.so:
 #	$(CXX) -z muldefs server/server.cpp server/monetdb_conn.cpp -fPIC -shared $(OS_SUPPORT) monetdb/msvc/monetdbe.dll --std=c++1z -O3 -march=native -o server.so -I./monetdb/msvc 
-	$(CXX) $(SHAREDFLAGS) $(PCHFLAGS) $(LIBAQ_SRC) server/server.cpp $(OS_SUPPORT) -o server.so 
+	$(CXX) $(PCHFLAGS) $(LIBAQ_SRC) server/server.cpp $(OS_SUPPORT) $(SHAREDFLAGS) -o server.so 
 server_uselib:
-	$(CXX) $(SHAREDFLAGS) server/server.cpp libaquery.a -o server.so
+	$(CXX) $(LINKER_FLAGS) server/server.cpp libaquery.a $(SHAREDFLAGS) -o server.so
 
 snippet:
-	$(CXX) $(SHAREDFLAGS) $(PCHFLAGS) out.cpp $(LIBAQ_SRC) -o dll.so
+	$(CXX) $(PCHFLAGS) out.cpp $(LIBAQ_SRC) $(SHAREDFLAGS) -o dll.so
 snippet_uselib:
-	$(CXX) $(SHAREDFLAGS) $(PCHFLAGS) out.cpp libaquery.a -o dll.so
+	$(CXX) $(PCHFLAGS) out.cpp libaquery.a $(SHAREDFLAGS) -o dll.so
 
 docker:
 	docker build -t aquery .

From 504505640382bb6d871cb9869f9716473620a189 Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Mon, 14 Nov 2022 02:36:06 +0800
Subject: [PATCH 23/30] bug fix on GNU ld

---
 Makefile | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/Makefile b/Makefile
index 7c9cdf9..638cf3e 100644
--- a/Makefile
+++ b/Makefile
@@ -12,21 +12,23 @@ else
 endif
 SHAREDFLAGS = -shared  
 FPIC = -fPIC
-COMPILER = $(shell $(CXX) --version | grep -q clang && echo clang|| echo gcc) 
+_COMPILER = $(shell $(CXX) --version | grep -q clang && echo clang|| echo gcc) 
+COMPILER = $(strip $(_COMPILER))
 LIBTOOL = ar rcs
 USELIB_FLAG = -Wl,--whole-archive,libaquery.a -Wl,-no-whole-archive
 LIBAQ_SRC = server/monetdb_conn.cpp server/libaquery.cpp 
 LIBAQ_OBJ = monetdb_conn.o libaquery.o
 SEMANTIC_INTERPOSITION = -fno-semantic-interposition
 RANLIB = ranlib
-LINKER_BINARY = $(shell $(CXX) -print-prog-name=ld | grep -q llvm && echo lld || echo ld)
-ifeq (LINKER_BINARY, ld )
+_LINKER_BINARY = $(shell $(CXX) -print-prog-name=ld 2>&1 | grep -q LLVM && echo lld || echo ld)
+LINKER_BINARY = $(strip $(_LINKER_BINARY))q
+ifeq ($(LINKER_BINARY), ld)
 	LINKER_FLAGS = -Wl,--allow-multiple-definition
 else
 	LINKER_FLAGS =
 endif
 
-ifeq ($(COMPILER), clang )
+ifeq ($(COMPILER), clang)
 	CLANG_GE_10 = $(shell expr `$(CXX) -dumpversion | cut -f1 -d.` \>= 10)
 	ifneq ($(CLANG_GE_10), 1)
 		SEMANTIC_INTERPOSITION = 
@@ -56,7 +58,7 @@ ifeq ($(OS),Windows_NT)
 	MonetDB_LIB += msc-plugin/monetdbe.dll 
 	MonetDB_INC +=  -Imonetdb/msvc
 	LIBTOOL = gcc-ar rcs
-	ifeq ($(COMPILER), clang )
+	ifeq ($(COMPILER), clang)
 		FPIC =
 	endif
 else
@@ -68,7 +70,7 @@ else
 		USELIB_FLAG = -Wl,-force_load
 		MonetDB_LIB += -L$(shell brew --prefix monetdb)/lib 
 		MonetDB_INC += -I$(shell brew --prefix monetdb)/include/monetdb
-		ifeq ($(COMPILER), clang )
+		ifeq ($(COMPILER), clang)
 			LIBTOOL = libtool -static -o
 		endif
 		ifneq ($(UNAME_M),arm64)
@@ -120,7 +122,8 @@ info:
 	$(info $"	MonetDB_INC: $(MonetDB_INC))
 	$(info $"	COMPILER: $(COMPILER))
 	$(info $"	CXX: $(CXX))
-	$(info $"	FPIC: $(FPIC))
+	$(info $"	LINKER_BINARY: $(LINKER_BINARY))
+	$(info $"	LINKER_FLAGS: $(LINKER_FLAGS))
 pch:
 	$(CXX) -x c++-header server/pch.hpp $(FPIC) $(CXXFLAGS)
 libaquery:

From 1add924b0d5a1dd9b13e017dd560c34a5030964d Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Mon, 14 Nov 2022 02:37:57 +0800
Subject: [PATCH 24/30] fixed GNU ld

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 638cf3e..8fb0d75 100644
--- a/Makefile
+++ b/Makefile
@@ -21,7 +21,7 @@ LIBAQ_OBJ = monetdb_conn.o libaquery.o
 SEMANTIC_INTERPOSITION = -fno-semantic-interposition
 RANLIB = ranlib
 _LINKER_BINARY = $(shell $(CXX) -print-prog-name=ld 2>&1 | grep -q LLVM && echo lld || echo ld)
-LINKER_BINARY = $(strip $(_LINKER_BINARY))q
+LINKER_BINARY = $(strip $(_LINKER_BINARY))
 ifeq ($(LINKER_BINARY), ld)
 	LINKER_FLAGS = -Wl,--allow-multiple-definition
 else
@@ -136,7 +136,7 @@ warmup:
 server.bin:
 	$(CXX) $(LIBAQ_SRC) $(OS_SUPPORT) $(BINARYFLAGS) -o server.bin
 launcher:
-	$(CXX) -D__AQ_BUILD_LAUNCHER__ $(LIBAQ_SRC) $(OS_SUPPORT) $(BINARYFLAGS) -o aq
+	$(CXX) -D__AQ_BUILD_LAUNCHER__ server/server.cpp $(LIBAQ_SRC) $(OS_SUPPORT) $(BINARYFLAGS) -o aq
 server.so:
 #	$(CXX) -z muldefs server/server.cpp server/monetdb_conn.cpp -fPIC -shared $(OS_SUPPORT) monetdb/msvc/monetdbe.dll --std=c++1z -O3 -march=native -o server.so -I./monetdb/msvc 
 	$(CXX) $(PCHFLAGS) $(LIBAQ_SRC) server/server.cpp $(OS_SUPPORT) $(SHAREDFLAGS) -o server.so 

From 5b1e8063fb4cef557edaa26d596a5e9d2bda0162 Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Mon, 14 Nov 2022 15:23:12 +0800
Subject: [PATCH 25/30] [skip ci] fix build for macos

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 8fb0d75..21b55bd 100644
--- a/Makefile
+++ b/Makefile
@@ -20,7 +20,7 @@ LIBAQ_SRC = server/monetdb_conn.cpp server/libaquery.cpp
 LIBAQ_OBJ = monetdb_conn.o libaquery.o
 SEMANTIC_INTERPOSITION = -fno-semantic-interposition
 RANLIB = ranlib
-_LINKER_BINARY = $(shell $(CXX) -print-prog-name=ld 2>&1 | grep -q LLVM && echo lld || echo ld)
+_LINKER_BINARY = $(shell `$(CXX) -print-prog-name=ld` -v 2>&1 | grep -q LLVM && echo lld || echo ld)
 LINKER_BINARY = $(strip $(_LINKER_BINARY))
 ifeq ($(LINKER_BINARY), ld)
 	LINKER_FLAGS = -Wl,--allow-multiple-definition

From b58503b83dd77dfc7d6789c543623f665e45d480 Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Tue, 15 Nov 2022 13:14:21 +0800
Subject: [PATCH 26/30] fixed regression

---
 reconstruct/ast.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/reconstruct/ast.py b/reconstruct/ast.py
index 953981b..91e100f 100644
--- a/reconstruct/ast.py
+++ b/reconstruct/ast.py
@@ -426,7 +426,7 @@ class select_distinct(projection):
         super().consume(node)
         if self.has_postproc:
             self.context.emitc(
-                f'{self.out_table.table_name}->distinct();'
+                f'{self.out_table.contextname_cpp}->distinct();'
             )
         self.finalize()
         

From 65be0e207dd3611d892192793ea6c454f04af659 Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Wed, 16 Nov 2022 21:37:54 +0800
Subject: [PATCH 27/30] Bugs: incorrectly handling decimal literals lack of
 support for select into in monetdb

---
 reconstruct/ast.py  | 34 +++++++++++++++++++++++-----------
 reconstruct/expr.py |  3 ++-
 2 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/reconstruct/ast.py b/reconstruct/ast.py
index 91e100f..c5a8f24 100644
--- a/reconstruct/ast.py
+++ b/reconstruct/ast.py
@@ -31,8 +31,8 @@ class ast_node:
     
     def emit(self, code):
         self.context.emit(code)
-    def add(self, code):
-        self.sql += code + ' '
+    def add(self, code, sp = ' '):
+        self.sql += code + sp
     def addc(self, code):
         self.ccode += code + '\n'
 
@@ -84,7 +84,7 @@ class projection(ast_node):
     
     def produce(self, node):
         self.add('SELECT')
-        self.has_postproc = False
+        self.has_postproc = 'into' in node
         if 'select' in node:
             p = node['select']
             self.distinct = False
@@ -272,7 +272,11 @@ class projection(ast_node):
                 self.var_table[col.name] = offset
                 for n in (col.table.alias):
                     self.var_table[f'{n}.'+col.name] = offset
-    
+        # monetdb doesn't support select into table
+        # if 'into' in node:
+        #     self.into_stub = f'{{INTOSTUB{base62uuid(20)}}}'
+        #     self.add(self.into_stub, '')
+            
         def finialize(astnode:ast_node):
             if(astnode is not None):
                 self.add(astnode.sql)
@@ -431,13 +435,13 @@ class select_distinct(projection):
         self.finalize()
         
 class select_into(ast_node):
-    def init(self, node):
+    def init(self, _):
         if isinstance(self.parent, projection):
-            if self.context.has_dll:
-                # has postproc put back to monetdb
-                self.produce = self.produce_cpp
-            else:
-                self.produce = self.produce_sql
+            # if self.parent.has_postproc:
+            #     # has postproc put back to monetdb
+            self.produce = self.produce_cpp
+            # else:
+            #     self.produce = self.produce_sql
         else:
             raise ValueError('parent must be projection')
         
@@ -449,7 +453,8 @@ class select_into(ast_node):
             self.ccode = f'{self.parent.out_table.contextname_cpp}->monetdb_append_table(cxt->alt_server, \"{node.lower()}\");'
             
     def produce_sql(self, node):
-        self.sql = f' INTO {node}'
+        self.context.sql = self.context.sql.replace(
+            self.parent.into_stub, f'INTO {node}', 1)
     
 
 class orderby(ast_node):
@@ -1246,6 +1251,13 @@ class outfile(ast_node):
         filename = node['loc']['literal'] if 'loc' in node else node['literal']
         import os
         p =  os.path.abspath('.').replace('\\', '/') + '/' + filename
+        print('Warning: file {p} exists and will be overwritten')
+        if os.path.exists(p):
+            try:
+                os.remove(p)
+            except OSError:
+                print(f'Error: file {p} exists and cannot be removed')
+                
         self.sql = f'COPY {self.parent.sql} INTO \'{p}\''
         d = ','
         e = '\\n'
diff --git a/reconstruct/expr.py b/reconstruct/expr.py
index 9e16d61..af1f0cb 100644
--- a/reconstruct/expr.py
+++ b/reconstruct/expr.py
@@ -346,7 +346,8 @@ class expr(ast_node):
                     self.type = ByteT
             elif type(node) is float:
                 self.type = DoubleT
-    
+                self.sql = f'{{"CAST({node} AS DOUBLE)" if not c_code else "{node}f"}}'
+                
     def finalize(self, override = False):
         from reconstruct.ast import udf
         if self.codebuf is None or override:

From 4792bfa08f7c9676c4f154a7a9dc10f4f8d28ad0 Mon Sep 17 00:00:00 2001
From: Bill Sun <sunyinqi0508@gmail.com>
Date: Thu, 17 Nov 2022 05:49:32 +0800
Subject: [PATCH 28/30] fixed parser bug in create table as initial support for
 <sql> blocks

---
 aquery_parser/keywords.py   |  4 ++--
 aquery_parser/sql_parser.py |  9 +++++----
 reconstruct/ast.py          | 21 +++++++++++++++++++--
 server/server.cpp           |  3 ++-
 4 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/aquery_parser/keywords.py b/aquery_parser/keywords.py
index 5ae05bf..b9da28a 100644
--- a/aquery_parser/keywords.py
+++ b/aquery_parser/keywords.py
@@ -243,8 +243,8 @@ RESERVED = MatchFirst([
     WITHIN,
     INTO,
 ])
-L_INLINE = Literal("<k>").suppress()
-R_INLINE = Literal("</k>").suppress()
+L_INLINE = Literal("<sql>").suppress()
+R_INLINE = Literal("</sql>").suppress()
 LBRACE = Literal("{").suppress()
 RBRACE = Literal("}").suppress()
 LSB = Literal("[").suppress()
diff --git a/aquery_parser/sql_parser.py b/aquery_parser/sql_parser.py
index 5308c2a..9237470 100644
--- a/aquery_parser/sql_parser.py
+++ b/aquery_parser/sql_parser.py
@@ -66,7 +66,7 @@ def parser(literal_string, ident, sqlserver=False):
 
         var_name = ~RESERVED + ident
         
-        inline_kblock = (L_INLINE + SkipTo(R_INLINE, include=True))("c")
+        inline_sqlblock = (L_INLINE + SkipTo(R_INLINE, include=True))("sql")
         # EXPRESSIONS
         expr = Forward()
         column_type, column_definition, column_def_references = get_column_type(
@@ -569,8 +569,9 @@ def parser(literal_string, ident, sqlserver=False):
                 | assign("comment", EQ + literal_string)
                 | assign("default character set", EQ + var_name)
                 | assign("default charset", EQ + var_name)
-            )
-            + Optional(AS.suppress() + infix_notation(query, [])("query"))
+            ) 
+            + Optional(AS.suppress() + query("query")) 
+            # investigate why infix_notation(query, []) eats up the rest of queries
         )("create_table")
 
         create_view = (
@@ -724,7 +725,7 @@ def parser(literal_string, ident, sqlserver=False):
         )("stmts"), ";")
 
         other_stmt = (
-            inline_kblock
+            inline_sqlblock
             | udf
         ) ("stmts")
         
diff --git a/reconstruct/ast.py b/reconstruct/ast.py
index 953981b..d7225ed 100644
--- a/reconstruct/ast.py
+++ b/reconstruct/ast.py
@@ -9,7 +9,6 @@ from engine.utils import (base62alp, base62uuid, enlist, get_innermost,
                           get_legal_name)
 from reconstruct.storage import ColRef, Context, TableInfo
 
-
 class ast_node:
     header = []
     types = dict()
@@ -1523,7 +1522,25 @@ class udf(ast_node):
             return udf.ReturnPattern.elemental_return
         else:
             return udf.ReturnPattern.bulk_return
-            
+
+class passthru_sql(ast_node):
+    name = 'sql'
+    first_order = name
+    import re
+    # escapestr = r'''(?:((?:[^;"']|"[^"]*"|'[^']*')+)|(?:--[^\r\n]*[\r|\n])+)'''
+    # escape_comment = fr'''(?:{escapestr}|{escapestr}*-{escapestr}*)'''
+    seprator = re.compile(r'''((?:[^;"']|"[^"]*"|'[^']*')+)''')
+    def __init__(self, _, node, context:Context):
+        sqls = passthru_sql.seprator.split(node['sql'])
+        for sql in sqls:
+            sq = sql.strip(' \t\n\r;')
+            if sq:
+                context.queries.append('Q' + sql + ';')
+                lq = sq.lower()
+                if lq.startswith('select'):
+                    context.queries.append('O')
+
+
 class user_module_function(OperatorBase):
     def __init__(self, name, nargs, ret_type, context : Context):
         super().__init__(name, nargs, lambda *_: ret_type, call=fn_behavior)
diff --git a/server/server.cpp b/server/server.cpp
index e4c0a0f..dde84b3 100644
--- a/server/server.cpp
+++ b/server/server.cpp
@@ -197,7 +197,7 @@ inline constexpr static unsigned char monetdbe_type_szs[] = {
 constexpr uint32_t output_buffer_size = 65536;
 void print_monetdb_results(Server* srv, const char* sep = " ", const char* end = "\n", 
     uint32_t limit = std::numeric_limits<uint32_t>::max()) {
-    if (!srv->haserror() && limit){
+    if (!srv->haserror() && srv->cnt && limit){
         char buffer[output_buffer_size];
         auto _res = static_cast<monetdbe_result*> (srv->res);
         const auto& ncols = _res->ncols;
@@ -209,6 +209,7 @@ void print_monetdb_results(Server* srv, const char* sep = " ", const char* end =
         const char* err_msg = nullptr;
         for(uint32_t i = 0; i < ncols; ++i){
             err_msg = monetdbe_result_fetch(_res, &cols[i], i);
+            if(err_msg) { free(cols); return; }
             col_data[i] = static_cast<char *>(cols[i]->data);
             prtfns[i] = monetdbe_prtfns[cols[i]->type];
             szs [i] = monetdbe_type_szs[cols[i]->type];

From 8fbcc5bb1cce3342cefc84b416717dcb3f3161fc Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Thu, 17 Nov 2022 05:54:09 +0800
Subject: [PATCH 29/30] [skip ci] bump version number

---
 aquery_config.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/aquery_config.py b/aquery_config.py
index f0e64a9..caa4faa 100644
--- a/aquery_config.py
+++ b/aquery_config.py
@@ -2,7 +2,7 @@
 
 ## GLOBAL CONFIGURATION FLAGS
 
-version_string = '0.5.2a'
+version_string = '0.5.3a'
 add_path_to_ldpath = True
 rebuild_backend = False
 run_backend = True
@@ -13,6 +13,8 @@ os_platform = 'unknown'
 build_driver = 'Auto'
 compilation_output = True
 
+## END GLOBAL CONFIGURATION FLAGS
+
 def init_config():
     global __config_initialized__, os_platform, msbuildroot, build_driver
 ## SETUP ENVIRONMENT VARIABLES

From 4f7f5449837dd3239fa624f8a80872b21ecfb638 Mon Sep 17 00:00:00 2001
From: Bill <sunyinqi0508@gmail.com>
Date: Thu, 17 Nov 2022 06:48:12 +0800
Subject: [PATCH 30/30] bug fix on sqlblock and documentations

---
 README.md          | 24 +++++++++++++++++++++++-
 reconstruct/ast.py |  3 ++-
 server/server.cpp  | 17 +++++++++++++----
 tests/sqlblock.a   |  9 +++++++++
 4 files changed, 47 insertions(+), 6 deletions(-)
 create mode 100644 tests/sqlblock.a

diff --git a/README.md b/README.md
index 091722e..3d7fbe6 100644
--- a/README.md
+++ b/README.md
@@ -233,6 +233,28 @@ SELECT * FROM table 1 UNION ALL SELECT * FROM table 2
 ## Delete Data:
 - Use a query like `DELETE FROM <table_name> [WHERE <conditions>]` to delete rows from a table that matches the conditions.
   
+## Performance Measurement 
+- Execution time can be recorded using the `stats` command described above.
+  - `stats` command without any argument will show the execution time of all queries executed so far.
+  - `stats reset` will reset the timer for total execution time printed by `stats` command above.
+  - `stats on` will show execution time for every following query until a `stats off` command is received.
+
+## MonetDB Passthrough for Hybrid Engine 
+AQuery++ supports MonetDB passthrough for hybrid engine. Simply put standard SQL queries inside a \<sql> \</sql> block. <br>
+
+Each query inside an sql block must be separated by a semicolon. And they will be sent to MonetDB directly which means they should be written in MonetDB dialect instead of AQuery dialect. Please refer to the [MonetDB documentation](https://www.monetdb.org/documentation-Sep2022/user-guide/sql-summary/) for more information.
+
+For example:
+```
+CREATE TABLE my_table (c1 INT, c2 INT, c3 STRING)
+INSERT INTO my_table VALUES(10, 20, "example"), (20, 30, "example2")
+<sql>
+INSERT INTO my_table VALUES(10, 20, "example3");
+CREATE INDEX idx1 ON my_table(c1);
+</sql>
+SELECT * FROM my_table WHERE c1 > 10
+```
+
 ## Built-in functions: 
 - `avg[s]`: average of a column. `avgs(col), avgs(w, col)` is rolling and moving average with window `w` of the column `col`.
 - `var[s]`, `stddev[s]`: [moving/rolling] **population** variance, standard deviation.
@@ -254,7 +276,7 @@ SELECT * FROM table 1 UNION ALL SELECT * FROM table 2
 - AQuery++ supports different execution engines thanks to the decoupled compiler structure.
 - Hybrid Execution Engine: decouples the query into two parts. The sql-compliant part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed.
 - AQuery Library: A set of header based libraries that provide column arithmetic and operations inspired by array programming languages like kdb. This library is used by C++ post-processor code which can significantly reduce the complexity of generated code, reducing compile time while maintaining the best performance. The set of libraries can also be used by UDFs as well as User modules which makes it easier for users to write simple but powerful extensions. 
-  
+
 # Roadmap
 - [x] SQL Parser -> AQuery Parser (Front End)
 - [x] AQuery-C++ Compiler (Back End)
diff --git a/reconstruct/ast.py b/reconstruct/ast.py
index c3a3ea8..04e5abc 100644
--- a/reconstruct/ast.py
+++ b/reconstruct/ast.py
@@ -1037,6 +1037,7 @@ class insert(ast_node):
                 if isinstance(v, dict):
                     keys = v.keys()
                     v = list(v.values())
+                v = [f"'{vv}'" if type(vv) is str else vv for vv in v]
                 _vals.append(v)
             values = _vals
             
@@ -1547,7 +1548,7 @@ class passthru_sql(ast_node):
         for sql in sqls:
             sq = sql.strip(' \t\n\r;')
             if sq:
-                context.queries.append('Q' + sql + ';')
+                context.queries.append('Q' + sql.strip('\r\n\t ;') + ';')
                 lq = sq.lower()
                 if lq.startswith('select'):
                     context.queries.append('O')
diff --git a/server/server.cpp b/server/server.cpp
index dde84b3..6514093 100644
--- a/server/server.cpp
+++ b/server/server.cpp
@@ -207,21 +207,27 @@ void print_monetdb_results(Server* srv, const char* sep = " ", const char* end =
         uint8_t* szs = static_cast<uint8_t*>(alloca(ncols));
         std::string header_string = "";
         const char* err_msg = nullptr;
+        const size_t l_sep = strlen(sep);
+        const size_t l_end = strlen(end);
+        char* _buffer = buffer;
+
         for(uint32_t i = 0; i < ncols; ++i){
             err_msg = monetdbe_result_fetch(_res, &cols[i], i);
-            if(err_msg) { free(cols); return; }
+            if(err_msg) { goto cleanup; }
             col_data[i] = static_cast<char *>(cols[i]->data);
             prtfns[i] = monetdbe_prtfns[cols[i]->type];
             szs [i] = monetdbe_type_szs[cols[i]->type];
             header_string = header_string + cols[i]->name + sep + '|' + sep;
         }
-        const size_t l_sep = strlen(sep);
-        const size_t l_end = strlen(end);
+
+        if(l_sep > 512 || l_end > 512) {
+            puts("Error: separator or end string too long");
+            goto cleanup;
+        }
 		if (header_string.size() - l_sep - 1>= 0)
 			header_string.resize(header_string.size() - l_sep - 1);
         header_string += end + std::string(header_string.size(), '=') + end;
         fputs(header_string.c_str(), stdout);
-        char* _buffer = buffer;
         for(uint64_t i = 0; i < srv->cnt; ++i){
             for(uint32_t j = 0; j < ncols; ++j){
                 //copy the field to buf
@@ -239,8 +245,11 @@ void print_monetdb_results(Server* srv, const char* sep = " ", const char* end =
                 _buffer = buffer;
             }
         }
+        memcpy(_buffer, end, l_end);
+        _buffer += l_end;
         if (_buffer != buffer)
             fwrite(buffer, 1, _buffer - buffer, stdout);
+cleanup:        
         free(cols);
     }
 }
diff --git a/tests/sqlblock.a b/tests/sqlblock.a
new file mode 100644
index 0000000..861c0eb
--- /dev/null
+++ b/tests/sqlblock.a
@@ -0,0 +1,9 @@
+CREATE TABLE my_table (c1 INT, c2 INT, c3 STRING)
+INSERT INTO my_table VALUES(10, 20, "example"), (20, 30, "example2")
+<sql>
+INSERT INTO my_table VALUES(14, 24, 'example3');
+CREATE INDEX idx1 ON my_table(c1);
+SELECT * FROM my_table WHERE c1 < 15;
+
+</sql>
+SELECT * FROM my_table WHERE c1 > 15
\ No newline at end of file