diff --git a/.gitignore b/.gitignore
index 4807b2c..508685f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -51,12 +51,14 @@ k
 **/Debug
 **/Release
 test*.c*
+data/benchmark
 *.csv
 !test.csv
 !test2.csv
 !moving_avg.csv
 !nyctx100.csv
 !network.csv
+!test_complex.csv
 *.out
 *.asm
 !mmw.so
diff --git a/Makefile b/Makefile
index dd7747e..21b55bd 100644
--- a/Makefile
+++ b/Makefile
@@ -1,25 +1,34 @@
 OS_SUPPORT = 
 MonetDB_LIB = 
 MonetDB_INC = 
-Threading = 
+Defines = 
 CXXFLAGS = --std=c++1z
 ifeq ($(AQ_DEBUG), 1)
-	OPTFLAGS = -g3 
+	OPTFLAGS = -g3 -fsanitize=address -fsanitize=leak
+	LINKFLAGS = 
 else
 	OPTFLAGS = -O3 -DNDEBUG -fno-stack-protector 
+	LINKFLAGS = -flto -s
 endif
-LINKFLAGS = -flto # + $(AQ_LINK_FLAG)
 SHAREDFLAGS = -shared  
 FPIC = -fPIC
-COMPILER = $(shell $(CXX) --version | grep -q clang && echo clang|| echo gcc) 
+_COMPILER = $(shell $(CXX) --version | grep -q clang && echo clang|| echo gcc) 
+COMPILER = $(strip $(_COMPILER))
 LIBTOOL = ar rcs
 USELIB_FLAG = -Wl,--whole-archive,libaquery.a -Wl,-no-whole-archive
-LIBAQ_SRC = server/server.cpp server/monetdb_conn.cpp server/io.cpp 
-LIBAQ_OBJ = server.o monetdb_conn.o io.o 
+LIBAQ_SRC = server/monetdb_conn.cpp server/libaquery.cpp 
+LIBAQ_OBJ = monetdb_conn.o libaquery.o
 SEMANTIC_INTERPOSITION = -fno-semantic-interposition
 RANLIB = ranlib
+_LINKER_BINARY = $(shell `$(CXX) -print-prog-name=ld` -v 2>&1 | grep -q LLVM && echo lld || echo ld)
+LINKER_BINARY = $(strip $(_LINKER_BINARY))
+ifeq ($(LINKER_BINARY), ld)
+	LINKER_FLAGS = -Wl,--allow-multiple-definition
+else
+	LINKER_FLAGS =
+endif
 
-ifeq ($(COMPILER), clang )
+ifeq ($(COMPILER), clang)
 	CLANG_GE_10 = $(shell expr `$(CXX) -dumpversion | cut -f1 -d.` \>= 10)
 	ifneq ($(CLANG_GE_10), 1)
 		SEMANTIC_INTERPOSITION = 
@@ -49,7 +58,7 @@ ifeq ($(OS),Windows_NT)
 	MonetDB_LIB += msc-plugin/monetdbe.dll 
 	MonetDB_INC +=  -Imonetdb/msvc
 	LIBTOOL = gcc-ar rcs
-	ifeq ($(COMPILER), clang )
+	ifeq ($(COMPILER), clang)
 		FPIC =
 	endif
 else
@@ -61,7 +70,7 @@ else
 		USELIB_FLAG = -Wl,-force_load
 		MonetDB_LIB += -L$(shell brew --prefix monetdb)/lib 
 		MonetDB_INC += -I$(shell brew --prefix monetdb)/include/monetdb
-		ifeq ($(COMPILER), clang )
+		ifeq ($(COMPILER), clang)
 			LIBTOOL = libtool -static -o
 		endif
 		ifneq ($(UNAME_M),arm64)
@@ -79,43 +88,65 @@ endif
 ifeq ($(THREADING),1)
 	LIBAQ_SRC += server/threading.cpp
 	LIBAQ_OBJ += threading.o
-	Threading +=  -DTHREADING
+	Defines +=  -DTHREADING
+endif
+
+ifeq ($(AQUERY_ITC_USE_SEMPH), 1)
+	Defines += -D__AQUERY_ITC_USE_SEMPH__
 endif
 
-SHAREDFLAGS += $(FPIC)
+CXXFLAGS += $(OPTFLAGS) $(Defines) $(MonetDB_INC) 
+BINARYFLAGS = $(CXXFLAGS) $(LINKFLAGS) $(MonetDB_LIB)
+SHAREDFLAGS += $(FPIC) $(BINARYFLAGS)
 
 info:
-	$(info $(OPTFLAGS))
-	$(info $(OS_SUPPORT))
-	$(info $(OS)) 
-	$(info $(Threading))
-	$(info "test")
-	$(info $(LIBTOOL))
-	$(info $(MonetDB_INC))
-	$(info $(COMPILER))
-	$(info $(CXX))
-	$(info $(FPIC))
+	$(info This makefile script is used in AQuery to automatically build required libraries and executables.)
+	$(info Run it manually only for debugging purposes.)
+	$(info Targets (built by `make <target>`):)
+	$(info $"	pch: generate precompiled header)
+	$(info $"	libaquery.a: build static library)
+	$(info $"	server.so: build execution engine)
+	$(info $"	snippet: build generated query snippet)
+	$(info $"	server_uselib: build execution engine using shared library and pch)
+	$(info $"	snippet_uselib: build generated query snippet using shared library and pch)
+	$(info $"	docker: build docker image with name aquery)
+	$(info $"	launcher: build launcher for aquery ./aq)
+	$(info $"	clean: remove all generated binaraies and caches)
+	$(info )
+	$(info Variables:)
+	$(info $"	OPTFLAGS: $(OPTFLAGS))
+	$(info $"	OS_SUPPORT: $(OS_SUPPORT))
+	$(info $"	OS: $(OS)) 
+	$(info $"	Defines: $(Defines))
+	$(info $"	LIBTOOL: $(LIBTOOL))
+	$(info $"	MonetDB_INC: $(MonetDB_INC))
+	$(info $"	COMPILER: $(COMPILER))
+	$(info $"	CXX: $(CXX))
+	$(info $"	LINKER_BINARY: $(LINKER_BINARY))
+	$(info $"	LINKER_FLAGS: $(LINKER_FLAGS))
 pch:
-	$(CXX) -x c++-header server/pch.hpp $(FPIC) $(MonetDB_INC) $(OPTFLAGS) $(CXXFLAGS) $(Threading)
-libaquery.a:
-	$(CXX) -c $(FPIC) $(PCHFLAGS) $(LIBAQ_SRC) $(MonetDB_INC) $(MonetDB_LIB) $(OS_SUPPORT) $(Threading) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) &&\
+	$(CXX) -x c++-header server/pch.hpp $(FPIC) $(CXXFLAGS)
+libaquery:
+	$(CXX) -c $(FPIC) $(PCHFLAGS) $(LIBAQ_SRC) $(OS_SUPPORT) $(CXXFLAGS) &&\
 	$(LIBTOOL) libaquery.a $(LIBAQ_OBJ) &&\
 	$(RANLIB) libaquery.a
 
+warmup:
+	$(CXX)  msc-plugin/dummy.cpp libaquery.a $(SHAREDFLAGS) -o dll.so
 server.bin:
-	$(CXX) $(LIBAQ_SRC) $(LINKFLAGS) $(OS_SUPPORT) $(Threading)  $(MonetDB_INC) $(MonetDB_LIB) $(OPTFLAGS) $(CXXFLAGS) -o server.bin
+	$(CXX) $(LIBAQ_SRC) $(OS_SUPPORT) $(BINARYFLAGS) -o server.bin
 launcher:
-	$(CXX) -D__AQ_BUILD_LAUNCHER__ $(LIBAQ_SRC) $(LINKFLAGS) $(OS_SUPPORT) $(Threading)  $(MonetDB_INC) $(MonetDB_LIB) $(OPTFLAGS) $(CXXFLAGS) -o aq
+	$(CXX) -D__AQ_BUILD_LAUNCHER__ server/server.cpp $(LIBAQ_SRC) $(OS_SUPPORT) $(BINARYFLAGS) -o aq
 server.so:
 #	$(CXX) -z muldefs server/server.cpp server/monetdb_conn.cpp -fPIC -shared $(OS_SUPPORT) monetdb/msvc/monetdbe.dll --std=c++1z -O3 -march=native -o server.so -I./monetdb/msvc 
-	$(CXX) $(SHAREDFLAGS) $(PCHFLAGS) $(LIBAQ_SRC) $(OS_SUPPORT) $(Threading) $(MonetDB_INC) $(MonetDB_LIB) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) -o server.so 
+	$(CXX) $(PCHFLAGS) $(LIBAQ_SRC) server/server.cpp $(OS_SUPPORT) $(SHAREDFLAGS) -o server.so 
 server_uselib:
-	$(CXX) $(SHAREDFLAGS) $(USELIB_FLAG),libaquery.a $(MonetDB_LIB) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) -o server.so
+	$(CXX) $(LINKER_FLAGS) server/server.cpp libaquery.a $(SHAREDFLAGS) -o server.so
 
 snippet:
-	$(CXX) $(SHAREDFLAGS) $(PCHFLAGS) out.cpp $(LIBAQ_SRC) $(MonetDB_INC) $(MonetDB_LIB) $(Threading) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) -o dll.so
+	$(CXX) $(PCHFLAGS) out.cpp $(LIBAQ_SRC) $(SHAREDFLAGS) -o dll.so
 snippet_uselib:
-	$(CXX) $(SHAREDFLAGS) $(PCHFLAGS) out.cpp libaquery.a $(MonetDB_INC) $(Threading) $(MonetDB_LIB) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) -o dll.so
+	$(CXX) $(PCHFLAGS) out.cpp libaquery.a $(SHAREDFLAGS) -o dll.so
 
 docker:
 	docker build -t aquery .
diff --git a/README.md b/README.md
index 8abab8f..d272137 100644
--- a/README.md
+++ b/README.md
@@ -226,9 +226,38 @@ DROP TABLE my_table IF EXISTS
 - File name can also be absolute path.
 - See `data/q1.sql` for more information 
 
+## Combine Queries
+- `UNION ALL` is a bag union of two query results with same schema. e.g. 
+```
+SELECT * FROM table 1 UNION ALL SELECT * FROM table 2
+```
+- `EXCEPT` clause will return the difference of two query results. e.g.
+  
 ## Delete Data:
 - Use a query like `DELETE FROM <table_name> [WHERE <conditions>]` to delete rows from a table that matches the conditions.
   
+## Performance Measurement 
+- Execution time can be recorded using the `stats` command described above.
+  - `stats` command without any argument will show the execution time of all queries executed so far.
+  - `stats reset` will reset the timer for total execution time printed by `stats` command above.
+  - `stats on` will show execution time for every following query until a `stats off` command is received.
+
+## MonetDB Passthrough for Hybrid Engine 
+AQuery++ supports MonetDB passthrough for hybrid engine. Simply put standard SQL queries inside a \<sql> \</sql> block. <br>
+
+Each query inside an sql block must be separated by a semicolon. And they will be sent to MonetDB directly which means they should be written in MonetDB dialect instead of AQuery dialect. Please refer to the [MonetDB documentation](https://www.monetdb.org/documentation-Sep2022/user-guide/sql-summary/) for more information.
+
+For example:
+```
+CREATE TABLE my_table (c1 INT, c2 INT, c3 STRING)
+INSERT INTO my_table VALUES(10, 20, "example"), (20, 30, "example2")
+<sql>
+INSERT INTO my_table VALUES(10, 20, "example3");
+CREATE INDEX idx1 ON my_table(c1);
+</sql>
+SELECT * FROM my_table WHERE c1 > 10
+```
+
 ## Built-in functions: 
 - `avg[s]`: average of a column. `avgs(col), avgs(w, col)` is rolling and moving average with window `w` of the column `col`.
 - `var[s]`, `stddev[s]`: [moving/rolling] **population** variance, standard deviation.
@@ -250,7 +279,7 @@ DROP TABLE my_table IF EXISTS
 - AQuery++ supports different execution engines thanks to the decoupled compiler structure.
 - Hybrid Execution Engine: decouples the query into two parts. The sql-compliant part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed.
 - AQuery Library: A set of header based libraries that provide column arithmetic and operations inspired by array programming languages like kdb. This library is used by C++ post-processor code which can significantly reduce the complexity of generated code, reducing compile time while maintaining the best performance. The set of libraries can also be used by UDFs as well as User modules which makes it easier for users to write simple but powerful extensions. 
-  
+
 # Roadmap
 - [x] SQL Parser -> AQuery Parser (Front End)
 - [x] AQuery-C++ Compiler (Back End)
diff --git a/aquery_config.py b/aquery_config.py
index cdff3b7..caa4faa 100644
--- a/aquery_config.py
+++ b/aquery_config.py
@@ -2,7 +2,7 @@
 
 ## GLOBAL CONFIGURATION FLAGS
 
-version_string = '0.4.9a'
+version_string = '0.5.3a'
 add_path_to_ldpath = True
 rebuild_backend = False
 run_backend = True
@@ -11,6 +11,9 @@ cygroot = 'c:/msys64/usr/bin'
 msbuildroot = ''
 os_platform = 'unknown'
 build_driver = 'Auto'
+compilation_output = True
+
+## END GLOBAL CONFIGURATION FLAGS
 
 def init_config():
     global __config_initialized__, os_platform, msbuildroot, build_driver
@@ -21,7 +24,8 @@ def init_config():
     import os
     from engine.utils import add_dll_dir
     # os.environ['CXX'] = 'C:/Program Files/LLVM/bin/clang.exe'
-    # os.environ['THREADING'] = '1'
+    os.environ['THREADING'] = '1'
+    os.environ['AQUERY_ITC_USE_SEMPH'] = '1'
 
     if  ('__config_initialized__' not in globals() or 
             not __config_initialized__):
diff --git a/aquery_parser/keywords.py b/aquery_parser/keywords.py
index 5ae05bf..b9da28a 100644
--- a/aquery_parser/keywords.py
+++ b/aquery_parser/keywords.py
@@ -243,8 +243,8 @@ RESERVED = MatchFirst([
     WITHIN,
     INTO,
 ])
-L_INLINE = Literal("<k>").suppress()
-R_INLINE = Literal("</k>").suppress()
+L_INLINE = Literal("<sql>").suppress()
+R_INLINE = Literal("</sql>").suppress()
 LBRACE = Literal("{").suppress()
 RBRACE = Literal("}").suppress()
 LSB = Literal("[").suppress()
diff --git a/aquery_parser/sql_parser.py b/aquery_parser/sql_parser.py
index 9c08db6..9237470 100644
--- a/aquery_parser/sql_parser.py
+++ b/aquery_parser/sql_parser.py
@@ -8,6 +8,7 @@
 #
 
 from sre_parse import WHITESPACE
+
 from mo_parsing.helpers import restOfLine
 from mo_parsing.infix import delimited_list
 from mo_parsing.whitespaces import NO_WHITESPACE, Whitespace
@@ -65,7 +66,7 @@ def parser(literal_string, ident, sqlserver=False):
 
         var_name = ~RESERVED + ident
         
-        inline_kblock = (L_INLINE + SkipTo(R_INLINE, include=True))("c")
+        inline_sqlblock = (L_INLINE + SkipTo(R_INLINE, include=True))("sql")
         # EXPRESSIONS
         expr = Forward()
         column_type, column_definition, column_def_references = get_column_type(
@@ -568,8 +569,9 @@ def parser(literal_string, ident, sqlserver=False):
                 | assign("comment", EQ + literal_string)
                 | assign("default character set", EQ + var_name)
                 | assign("default charset", EQ + var_name)
-            )
-            + Optional(AS.suppress() + infix_notation(query, [])("query"))
+            ) 
+            + Optional(AS.suppress() + query("query")) 
+            # investigate why infix_notation(query, []) eats up the rest of queries
         )("create_table")
 
         create_view = (
@@ -655,7 +657,8 @@ def parser(literal_string, ident, sqlserver=False):
         ) / to_json_call
 
         load_data = (
-            keyword("data") ("file_type")
+            Optional(keyword("complex")("complex"))
+            + keyword("data") ("file_type")
             + keyword("infile")("loc")  
             + literal_string ("file")
             + INTO
@@ -667,6 +670,12 @@ def parser(literal_string, ident, sqlserver=False):
                   + keyword("by").suppress() 
                   + literal_string ("term")
             )
+            + Optional(
+                  keyword("element").suppress()
+                  + keyword("terminated").suppress()
+                  + keyword("by").suppress() 
+                  + literal_string ("ele")
+            )
         )
         
         module_func_def = (
@@ -716,7 +725,7 @@ def parser(literal_string, ident, sqlserver=False):
         )("stmts"), ";")
 
         other_stmt = (
-            inline_kblock
+            inline_sqlblock
             | udf
         ) ("stmts")
         
diff --git a/build.py b/build.py
index 8cd4b91..5ce74e8 100644
--- a/build.py
+++ b/build.py
@@ -16,6 +16,7 @@ class checksums:
     server : Optional[Union[bytes, bool]] = None
     sources : Optional[Union[Dict[str, bytes], bool]] = None
     env : str = ''
+    
     def calc(self, compiler_name, libaquery_a = 'libaquery.a' , 
                 pch_hpp_gch = 'server/pch.hpp.gch', 
                 server = 'server.so'
@@ -24,7 +25,8 @@ class checksums:
         self.env = (aquery_config.os_platform +
                     machine() + 
                     aquery_config.build_driver + 
-                    compiler_name
+                    compiler_name + 
+                    aquery_config.version_string
                 )
         for key in self.__dict__.keys():
             try:
@@ -71,14 +73,14 @@ class checksums:
 class build_manager:
     sourcefiles = [
                    'build.py', 'Makefile', 
-                   'server/server.cpp', 'server/io.cpp',  
+                   'server/server.cpp', 'server/libaquery.cpp',  
                    'server/monetdb_conn.cpp', 'server/threading.cpp', 
                    'server/winhelper.cpp' 
                    ]
     headerfiles = ['server/aggregations.h', 'server/hasher.h', 'server/io.h', 
                    'server/libaquery.h', 'server/monetdb_conn.h', 'server/pch.hpp', 
                    'server/table.h', 'server/threading.h', 'server/types.h', 'server/utils.h', 
-                   'server/winhelper.h', 'server/gc.hpp', 'server/vector_type.hpp', 
+                   'server/winhelper.h', 'server/gc.h', 'server/vector_type.hpp', 
                    'server/table_ext_monetdb.hpp' 
                    ]
    
@@ -92,6 +94,9 @@ class build_manager:
             return False
         def build(self, stdout = sys.stdout, stderr = sys.stderr):
             ret = True
+            if not aquery_config.compilation_output:
+                stdout = nullstream
+                stderr = nullstream
             for c in self.build_cmd:
                 if c:
                     try: # only last success matters
@@ -100,6 +105,8 @@ class build_manager:
                         ret = False
                         pass
             return ret
+        def warmup(self):
+            return True
                 
     class MakefileDriver(DriverBase):
         def __init__(self, mgr : 'build_manager') -> None:
@@ -111,9 +118,9 @@ class build_manager:
                 mgr.cxx = os.environ['CXX']
             if 'AQ_DEBUG' not in os.environ:
                 os.environ['AQ_DEBUG'] = '0' if mgr.OptimizationLv else '1'
-                
+
         def libaquery_a(self):
-            self.build_cmd = [['rm', 'libaquery.a'],['make', 'libaquery.a']]
+            self.build_cmd = [['rm', 'libaquery.a'],['make', 'libaquery']]
             return self.build()
         def pch(self):
             self.build_cmd = [['rm', 'server/pch.hpp.gch'], ['make', 'pch']]
@@ -166,6 +173,10 @@ class build_manager:
             self.build_cmd = [[aquery_config.msbuildroot, loc, self.opt, self.platform]]
             return self.build()
 
+        def warmup(self):
+            self.build_cmd = [['make', 'warmup']]
+            return self.build()
+            
     #class PythonDriver(DriverBase):
     #    def __init__(self, mgr : 'build_manager') -> None:
     #        super().__init__(mgr)           
@@ -221,6 +232,9 @@ class build_manager:
             current.calc(self.cxx, libaquery_a)
             with open('.cached', 'wb') as cache_sig:
                 cache_sig.write(pickle.dumps(current))
+            self.driver.warmup()
+            
+            
         else:
             if aquery_config.os_platform == 'mac':
                 os.system('./arch-check.sh')
diff --git a/csv.h b/csv.h
index c5cb5bc..6b10915 100644
--- a/csv.h
+++ b/csv.h
@@ -1,4 +1,4 @@
-// Copyright: (2012-2015) Ben Strasser <code@ben-strasser.net>
+// Copyright: (2012-2015) Ben Strasser <code@ben-strasser.net>, 2022 Bill Sun
 // License: BSD-3
 //
 // All rights reserved.
@@ -49,6 +49,7 @@
 #include <cerrno>
 #include <istream>
 #include <limits>
+#include "server/vector_type.hpp"
 
 namespace io{
         ////////////////////////////////////////////////////////////////////////////
@@ -974,8 +975,7 @@ namespace io{
                                                 return;
                                         }
                                         x = 10*x+y;
-                                }else
-                                        throw error::no_digit();
+                                }
                                 ++col;
                         }
                 }
@@ -1005,8 +1005,7 @@ namespace io{
                                                         return;
                                                 }
                                                 x = 10*x-y;
-                                        }else
-                                                throw error::no_digit();
+                                        }
                                         ++col;
                                 }
                                 return;
@@ -1080,19 +1079,37 @@ namespace io{
                                         }
                                         x *= base;
                                 }
-                        }else{
-                                if(*col != '\0')
-                                        throw error::no_digit();
                         }
 
                         if(is_neg)
                                 x = -x;
                 }
 
+
                 template<class overflow_policy> void parse(char*col, float&x) { parse_float(col, x); }
                 template<class overflow_policy> void parse(char*col, double&x) { parse_float(col, x); }
                 template<class overflow_policy> void parse(char*col, long double&x) { parse_float(col, x); }
-
+                
+
+                template<class overflow_policy, class T, char sep2 = ';'>
+                void parse_vector(char* col, vector_type<T>& x) {
+                    while (*col != '\0') {
+                        char* next_col = col;
+                        while (*next_col != sep2 && *next_col != '\0')
+                            ++next_col;
+                        while (*next_col == ' ' || *next_col == '\t' || 
+                            *next_col == sep2 || *next_col == '\r' || 
+                            *next_col == '\n') 
+                            ++next_col;
+                        char _next_end = *next_col;
+                        *next_col = '\0';
+                        T y;
+                        ::io::detail::parse<overflow_policy>(col, y);
+                        x.emplace_back(y);
+                        col = next_col;
+                        *next_col = _next_end;
+                    }
+                }
                 template<class overflow_policy, class T>
                 void parse(char*col, T&x){
                         // Mute unused variable compiler warning
@@ -1108,6 +1125,7 @@ namespace io{
         }
 
         template<unsigned column_count,
+                char sep2 = ';',
                 class trim_policy = trim_chars<' ', '\t'>,
                 class quote_policy = no_quote_escape<','>,
                 class overflow_policy = throw_on_overflow,
@@ -1234,7 +1252,23 @@ namespace io{
                         parse_helper(r+1, cols...);
                 }
 
-
+                template<class T, class ...ColType>
+                void parse_helper(std::size_t r, vector_type<T>&t, ColType&...cols){
+                        if(row[r]){
+                                try{
+                                        try{
+                                                ::io::detail::parse_vector<overflow_policy, T, sep2>(row[r], t);
+                                        }catch(error::with_column_content&err){
+                                                err.set_column_content(row[r]);
+                                                throw;
+                                        }
+                                }catch(error::with_column_name&err){
+                                        err.set_column_name(column_names[r].c_str());
+                                        throw;
+                                }
+                        }
+                        parse_helper(r+1, cols...);
+                }
         public:
                 template<class ...ColType>
                 bool read_row(ColType& ...cols){
@@ -1269,5 +1303,12 @@ namespace io{
                 }
         };
 }
+
+template <unsigned column_count, char sep1 = ',', char sep2 = ';'>
+using AQCSVReader = io::CSVReader<column_count, sep2, 
+        io::trim_chars<(char)32, (char)9>, io::no_quote_escape<sep1>, 
+        io::ignore_overflow, io::empty_line_comment
+        >;
+
 #endif
 
diff --git a/data/test.csv b/data/test.csv
index 5eb9e8f..b4fe244 100644
--- a/data/test.csv
+++ b/data/test.csv
@@ -1,11 +1,21 @@
 a, b, c, d
 1,1,2,2
+2,1,2,2
+2,4,3,4
 1,2,2,2
 1,2,3,4
 4,2,1,4
-2,1,3,4
+2,1,3,3
+2,1,1,2
 1,2,3,4
+3,2,4,2
 1,2,3,3
 3,2,1,2
-2,1,2,2
+2,1,4,2
+3,3,4,4
+2,2,3,1
+2,3,4,4
+2,4,1,2
+3,4,1,2
+2,3,2,2
 1,2,3,1
diff --git a/data/test_complex.csv b/data/test_complex.csv
new file mode 100644
index 0000000..efd7b3e
--- /dev/null
+++ b/data/test_complex.csv
@@ -0,0 +1,6 @@
+a,b,c
+5e-3, 3;4 ;5e-3;6.32,7
+1,2,3
+4,5;6;7;8;9, 0
+    3 ,2 ; 4; 5.7; -.3; 5., 6
+-3.12312,-4E+7;67456746744567;75,4
diff --git a/datagen.cpp b/datagen.cpp
index 88f5a48..c96b480 100644
--- a/datagen.cpp
+++ b/datagen.cpp
@@ -151,5 +151,5 @@ int gen_stock_data(int argc, char* argv[]){
 }
 
 int main(int argc, char* argv[]){
-    gen_stock_data(argc, argv);
+    return gen_stock_data(argc, argv);
 }
diff --git a/engine/types.py b/engine/types.py
index 8eac736..5baf47f 100644
--- a/engine/types.py
+++ b/engine/types.py
@@ -1,8 +1,9 @@
 from copy import deepcopy
-from engine.utils import base62uuid, defval
-from aquery_config import have_hge
 from typing import Dict, List
 
+from aquery_config import have_hge
+from engine.utils import base62uuid, defval
+
 type_table: Dict[str, "Types"] = {}
 
 class Types:
@@ -65,10 +66,10 @@ class Types:
         return self.sqlname
     
     @staticmethod
-    def decode(aquery_type : str, vector_type:str = 'ColRef') -> "Types":
-        if (aquery_type.startswith('vec')):
+    def decode(aquery_type : str, vector_type:str = 'vector_type') -> "Types":
+        if (aquery_type.lower().startswith('vec')):
             return VectorT(Types.decode(aquery_type[3:]), vector_type)
-        return type_table[aquery_type]
+        return type_table[aquery_type.lower()]
     
 class TypeCollection:
     def __init__(self, sz, deftype, fptype = None, utype = None, *, collection = None) -> None:
@@ -121,7 +122,7 @@ class VectorT(Types):
         return f'{self.vector_type}<{self.inner_type.name}>'
     @property
     def sqlname(self) -> str:
-        return 'BIGINT'
+        return 'HUGEINT' # Store vector_type into 16 bit integers
     @property
     def cname(self) -> str:
         return f'{self.vector_type}<{self.inner_type.cname}>'
@@ -142,7 +143,7 @@ fp_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', FloatT, DoubleT
 temporal_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', DateT, TimeT, TimeStampT)
 builtin_types : Dict[str, Types] = {
     'string' : StrT,
-    **_ty_make_dict('t.sqlname.lower()', AnyT, TextT, VarcharT),
+    **_ty_make_dict('t.sqlname.lower()', AnyT, TextT, VarcharT, HgeT),
     **int_types, **fp_types, **temporal_types}
 
 def get_int128_support():
@@ -294,7 +295,7 @@ opadd = OperatorBase('add', 2, auto_extension, cname = '+', sqlname = '+', call
 # monetdb wont extend int division to fp type
 # opdiv = OperatorBase('div', 2, fp(auto_extension), cname = '/', sqlname = '/', call = binary_op_behavior)
 opdiv = OperatorBase('div', 2, auto_extension, cname = '/', sqlname = '/', call = binary_op_behavior)
-opmul = OperatorBase('mul', 2, fp(auto_extension), cname = '*', sqlname = '*', call = binary_op_behavior)
+opmul = OperatorBase('mul', 2, auto_extension, cname = '*', sqlname = '*', call = binary_op_behavior)
 opsub = OperatorBase('sub', 2, auto_extension, cname = '-', sqlname = '-', call = binary_op_behavior)
 opmod = OperatorBase('mod', 2, auto_extension_int, cname = '%', sqlname = '%', call = binary_op_behavior)
 opneg = OperatorBase('neg', 1, as_is, cname = '-', sqlname = '-', call = unary_op_behavior)
@@ -323,10 +324,14 @@ fnfirst = OperatorBase('first', 1, as_is, cname = 'frist', sqlname = 'FRIST', ca
 #fnavg = OperatorBase('avg', 1, fp(ext(auto_extension)), cname = 'avg', sqlname = 'AVG', call = fn_behavior)
 fnsum = OperatorBase('sum', 1, long_return, cname = 'sum', sqlname = 'SUM', call = fn_behavior)
 fnavg = OperatorBase('avg', 1, lfp_return, cname = 'avg', sqlname = 'AVG', call = fn_behavior)
+fnvar = OperatorBase('var', 1, lfp_return, cname = 'var', sqlname = 'VAR_POP', call = fn_behavior)
+fnstd = OperatorBase('stddev', 1, lfp_return, cname = 'stddev', sqlname = 'STDDEV_POP', call = fn_behavior)
 fnmaxs = OperatorBase('maxs', [1, 2], ty_clamp(as_is, -1), cname = 'maxs', sqlname = 'MAXS', call = windowed_fn_behavor)
 fnmins = OperatorBase('mins', [1, 2], ty_clamp(as_is, -1), cname = 'mins', sqlname = 'MINS', call = windowed_fn_behavor)
 fnsums = OperatorBase('sums', [1, 2], ext(ty_clamp(auto_extension, -1)), cname = 'sums', sqlname = 'SUMS', call = windowed_fn_behavor)
 fnavgs = OperatorBase('avgs', [1, 2], fp(ext(ty_clamp(auto_extension, -1))), cname = 'avgs', sqlname = 'AVGS', call = windowed_fn_behavor)
+fnvars = OperatorBase('vars', [1, 2], fp(ext(ty_clamp(auto_extension, -1))), cname = 'vars', sqlname = 'VARS', call = windowed_fn_behavor)
+fnstds = OperatorBase('stddevs', [1, 2], fp(ext(ty_clamp(auto_extension, -1))), cname = 'stddevs', sqlname = 'STDDEVS', call = windowed_fn_behavor)
 fncnt = OperatorBase('count', 1, int_return, cname = 'count', sqlname = 'COUNT', call = count_behavior)
 fnpack = OperatorBase('pack', -1, pack_return, cname = 'pack', sqlname = 'PACK', call = pack_behavior)
 # special
@@ -360,8 +365,14 @@ builtin_cstdlib = _op_make_dict(fnsqrt, fnlog, fnsin, fncos, fntan, fnpow)
 builtin_func = _op_make_dict(fnmax, fnmin, fnsum, fnavg, fnmaxs, 
                              fnmins, fndeltas, fnratios, fnlast,
                              fnfirst, fnsums, fnavgs, fncnt, 
-                             fnpack, fntrunc, fnprev, fnnext)
+                             fnpack, fntrunc, fnprev, fnnext, 
+                             fnvar, fnvars, fnstd, fnstds)
 user_module_func = {}
 builtin_operators : Dict[str, OperatorBase] = {**builtin_binary_arith, **builtin_binary_logical, 
     **builtin_unary_arith, **builtin_unary_logical, **builtin_unary_special, **builtin_func, **builtin_cstdlib, 
     **user_module_func}
+
+type_table = {**builtin_types, **type_table}
+
+# Additional Aliases for type names
+type_table['boolean'] = BoolT
diff --git a/engine/utils.py b/engine/utils.py
index 065f8c8..8e65fcd 100644
--- a/engine/utils.py
+++ b/engine/utils.py
@@ -1,6 +1,6 @@
-from collections import OrderedDict
-from collections.abc import MutableMapping, Mapping
 import uuid
+from collections import OrderedDict
+from collections.abc import Mapping, MutableMapping
 
 lower_alp = 'abcdefghijklmnopqrstuvwxyz'
 upper_alp = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
@@ -107,6 +107,8 @@ def defval(val, default):
 
 # escape must be readonly
 from typing import Mapping, Set
+
+
 def remove_last(pattern : str, string : str, escape : Set[str] = set()) -> str:
     idx = string.rfind(pattern)
     if idx == -1:
@@ -126,9 +128,11 @@ class _Counter:
         return cnt
 
 import re
+
 ws = re.compile(r'\s+')
 import os
 
+
 def add_dll_dir(dll: str):
     import sys
     if sys.version_info.major >= 3 and sys.version_info.minor >7 and os.name == 'nt':
@@ -144,3 +148,13 @@ def clamp(val, minval, maxval):
 
 def escape_qoutes(string : str):
     return re.sub(r'^\'', r'\'',re.sub(r'([^\\])\'', r'\1\'', string))
+
+def get_innermost(sl):
+    if sl and type(sl) is dict:
+        if 'literal' in sl and type(sl['literal']) is str:
+            return f"'{get_innermost(sl['literal'])}'"
+        return get_innermost(next(iter(sl.values()), None))
+    elif sl and type(sl) is list:
+        return get_innermost(sl[0])
+    else:
+        return sl
\ No newline at end of file
diff --git a/msc-plugin/libaquery.vcxproj b/msc-plugin/libaquery.vcxproj
index cb493e4..f0d3dd4 100644
--- a/msc-plugin/libaquery.vcxproj
+++ b/msc-plugin/libaquery.vcxproj
@@ -221,7 +221,7 @@
   <ItemGroup>
     <ClInclude Include="..\csv.h" />
     <ClInclude Include="..\server\aggregations.h" />
-    <ClInclude Include="..\server\gc.hpp" />
+    <ClInclude Include="..\server\gc.h" />
     <ClInclude Include="..\server\hasher.h" />
     <ClInclude Include="..\server\io.h" />
     <ClInclude Include="..\server\libaquery.h" />
@@ -238,7 +238,7 @@
   <ItemGroup>
     <ClCompile Include="..\server\server.cpp" />
     <ClCompile Include="..\server\winhelper.cpp" />
-    <ClCompile Include="..\server\io.cpp" />
+    <ClCompile Include="..\server\libaquery.cpp" />
     <ClCompile Include="..\server\monetdb_conn.cpp" />
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
diff --git a/prompt.py b/prompt.py
index cd17360..b8ec8d1 100644
--- a/prompt.py
+++ b/prompt.py
@@ -1,4 +1,5 @@
 import aquery_config
+
 help_message = '''\
 ======================================================
                 AQUERY COMMANDLINE HELP
@@ -82,31 +83,31 @@ if __name__ == '__main__':
     
 
     
-import os
-from dataclasses import dataclass
+import atexit
+import ctypes
 import enum
-import time
+import mmap
+import os
 # import dbconn
 import re
+import subprocess
+import sys
+import threading
+import time
+from dataclasses import dataclass
 from typing import Callable, List, Optional
+
+import numpy as np
 from mo_parsing import ParseException
+
 import aquery_parser as parser
 import engine
-import engine.projection
 import engine.ddl
+import engine.projection
 import reconstruct as xengine
-import subprocess
-import mmap
-import sys
-from engine.utils import base62uuid
-import atexit
-import threading
-import ctypes
-import numpy as np
-from engine.utils import ws
-from engine.utils import add_dll_dir
-from engine.utils import nullstream
 from build import build_manager
+from engine.utils import add_dll_dir, base62uuid, nullstream, ws
+
 
 ## CLASSES BEGIN
 class RunType(enum.Enum):
@@ -159,9 +160,11 @@ class QueryStats:
 class Config:
     __all_attrs__ = ['running', 'new_query', 'server_mode', 
                      'backend_type', 'has_dll', 
-                     'postproc_time', 'sql_time', 
-                     'n_buffers'
+                     'n_buffers',
                      ]
+    __i64_attrs__ = [
+                     'monetdb_time', 'postproc_time'
+                    ]
     __init_attributes__ = False
     
     @staticmethod
@@ -170,26 +173,42 @@ class Config:
             from functools import partial
             for _i, attr in enumerate(Config.__all_attrs__):
                 if not hasattr(Config, attr):
-                    setattr(Config, attr, property(partial(Config.getter, i = _i), partial(Config.setter, i = _i)))
+                    setattr(Config, attr, property(
+                        partial(Config.getter, i = _i), partial(Config.setter, i = _i)
+                    ))
+            for _i, attr in enumerate(Config.__i64_attrs__):
+                if not hasattr(Config, attr):
+                    setattr(Config, attr, property(
+                        partial(Config.i64_getter, i = _i), partial(Config.i64_setter, i = _i)
+                    ))
             Config.__init_attributes__ = True
             
     def __init__(self, mode, nq = 0, n_bufs = 0, bf_szs = []) -> None:
         Config.__init_self__()
-        self.int_size = 4
         self.n_attrib = len(Config.__all_attrs__)
-        self.buf = bytearray((self.n_attrib + n_bufs) * self.int_size)
-        self.np_buf = np.ndarray(shape=(self.n_attrib), buffer=self.buf, dtype=np.int32)
+        self.buf = bytearray((self.n_attrib + n_bufs) * 4 +
+                              len(self.__i64_attrs__) * 8
+                             )
+        self.np_buf = np.ndarray(shape = (self.n_attrib), buffer = self.buf, dtype = np.int32)
+        self.np_i64buf = np.ndarray(shape = len(self.__i64_attrs__), buffer = self.buf, 
+                                    dtype = np.int64, offset = 4 * len(self.__all_attrs__))
         self.new_query = nq
         self.server_mode = mode.value 
         self.running = 1
-        self.backend_type = Backend_Type.BACKEND_AQuery.value
+        self.backend_type = Backend_Type.BACKEND_MonetDB.value
         self.has_dll = 0
         self.n_buffers = n_bufs
+        self.monetdb_time = 0
+        self.postproc_time = 0
         
     def getter (self, *, i):
         return self.np_buf[i]
     def setter(self, v, *, i):
         self.np_buf[i] = v
+    def i64_getter (self, *, i):
+        return self.np_i64buf[i]
+    def i64_setter(self, v, *, i):
+        self.np_i64buf[i] = v
 
     def set_bufszs(self, buf_szs):
         for i in range(min(len(buf_szs), self.n_buffers)):
@@ -208,6 +227,8 @@ class PromptState():
     test_parser = True
     server_mode: RunType = RunType.Threaded
     server_bin = 'server.bin' if server_mode == RunType.IPC else 'server.so'
+    wait_engine = lambda: None
+    wake_engine = lambda: None
     set_ready = lambda: None
     get_ready = lambda: None
     server_status = lambda: False
@@ -298,12 +319,14 @@ def init_threaded(state : PromptState):
     if aquery_config.run_backend:    
         server_so = ctypes.CDLL('./'+state.server_bin)
         state.send = server_so['receive_args']
+        state.wait_engine = server_so['wait_engine']
+        state.wake_engine = server_so['wake_engine']
         aquery_config.have_hge = server_so['have_hge']()
         if aquery_config.have_hge != 0:
             from engine.types import get_int128_support
             get_int128_support()
         state.th = threading.Thread(target=server_so['main'], args=(-1, ctypes.POINTER(ctypes.c_char_p)(state.cfg.c)), daemon=True)
-        state.th.start()
+        state.th.start() 
 
 def init_prompt() -> PromptState:
     aquery_config.init_config()
@@ -336,6 +359,8 @@ def init_prompt() -> PromptState:
         rm = lambda: None
         def __set_ready():
             state.cfg.new_query = 1
+            state.wake_engine()
+            
         state.set_ready = __set_ready
         state.get_ready = lambda: aquery_config.run_backend and state.cfg.new_query
         if aquery_config.run_backend:
@@ -374,14 +399,23 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
     payload = None
     keep = True
     cxt = engine.initialize()
+    parser.parse('SELECT "**** WELCOME TO AQUERY++! ****";')
+    
     # state.currstats = QueryStats()
     # state.need_print = False
     while running():
         try:
             if state.server_status():
-                state.init()
+                state.init(state)
+            # *** busy waiting ***
+            # while state.get_ready():
+            #     time.sleep(.00001)
             while state.get_ready():
-                time.sleep(.00001)
+                state.wait_engine()
+                if state.need_print:
+                    print(f'MonetDB Time: {state.cfg.monetdb_time/10**9}, '
+                          f'PostProc Time: {state.cfg.postproc_time/10**9}')
+                    state.cfg.monetdb_time = state.cfg.postproc_time = 0
             state.currstats.print(state.stats, need_print=state.need_print)
             try:
                 og_q : str = next()
@@ -407,7 +441,7 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
                     for t in cxt.tables:
                         lst_cols = []
                         for c in t.columns:
-                            lst_cols.append(f'{c.name} : {c.type}')
+                            lst_cols.append(f'{c.name} : {c.type.name}')
                         print(f'{t.table_name} ({", ".join(lst_cols)})')
                 continue
             elif q.startswith('help'):
@@ -498,17 +532,17 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
                 rm(state)
                 exit()
             elif q.startswith('sh'):
-                from distutils.spawn import find_executable
+                from shutil import which
                 qs = re.split(r'[ \t]', q)
                 shells = ('zsh', 'bash', 'sh', 'fish', 'cmd', 'pwsh', 'powershell', 'csh', 'tcsh', 'ksh')
                 shell_path = ''
                 if len(qs) > 1 and qs[1] in shells:
-                    shell_path = find_executable(qs[1])
+                    shell_path = which(qs[1])
                     if shell_path:
                         os.system(shell_path)
                 else:
                     for sh in shells:
-                        shell_path = find_executable(sh)
+                        shell_path = which(sh)
                         if shell_path:
                             os.system(shell_path)
                             break
@@ -575,7 +609,7 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
                 state.stats.print(clear = False)
                 continue
             trimed = ws.sub(' ', og_q).split(' ') 
-            if trimed[0].lower().startswith('f'):
+            if len(trimed) > 1 and trimed[0].lower().startswith('fi') or trimed[0].lower() == 'f':
                 fn = 'stock.a' if len(trimed) <= 1 or len(trimed[1]) == 0 \
                                 else trimed[1]
                 try:
@@ -605,7 +639,8 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
             print("\nBye.")
             raise
         except ValueError as e:
-            import code, traceback
+            import code
+            import traceback
             __stdin = os.dup(0)
             raise_exception = True
             sh = code.InteractiveConsole({**globals(), **locals()})
diff --git a/reconstruct/__init__.py b/reconstruct/__init__.py
index fd02f61..97afaba 100644
--- a/reconstruct/__init__.py
+++ b/reconstruct/__init__.py
@@ -1,4 +1,5 @@
 from reconstruct.ast import Context, ast_node
+
 saved_cxt = None
 
 def initialize(cxt = None, keep = False):
diff --git a/reconstruct/ast.py b/reconstruct/ast.py
index 270e671..04e5abc 100644
--- a/reconstruct/ast.py
+++ b/reconstruct/ast.py
@@ -1,12 +1,14 @@
+from binascii import Error
 from copy import deepcopy
 from dataclasses import dataclass
 from enum import Enum, auto
-from typing import Set, Tuple, Dict, Union, List, Optional
+from typing import Dict, List, Optional, Set, Tuple, Union
 
 from engine.types import *
-from engine.utils import enlist, base62uuid, base62alp, get_legal_name
-from reconstruct.storage import Context, TableInfo, ColRef
-    
+from engine.utils import (base62alp, base62uuid, enlist, get_innermost,
+                          get_legal_name)
+from reconstruct.storage import ColRef, Context, TableInfo
+
 class ast_node:
     header = []
     types = dict()
@@ -28,8 +30,8 @@ class ast_node:
     
     def emit(self, code):
         self.context.emit(code)
-    def add(self, code):
-        self.sql += code + ' '
+    def add(self, code, sp = ' '):
+        self.sql += code + sp
     def addc(self, code):
         self.ccode += code + '\n'
 
@@ -51,26 +53,60 @@ class ast_node:
             self.context.sql_end()
         
 from reconstruct.expr import expr, fastscan
-
-
+class SubqType(Enum):
+    WITH = auto()
+    FROM = auto()
+    PROJECTION = auto()
+    FILTER = auto()
+    GROUPBY = auto()
+    ORDERBY = auto()
+    NONE = auto()
 class projection(ast_node):
     name = 'projection'
     first_order = 'select'
-    
+
+        
+    def __init__(self, 
+                 parent : Optional["ast_node"],
+                 node, 
+                 context : Optional[Context] = None,
+                 force_use_spgb : bool = False,
+                 subq_type: SubqType = SubqType.NONE
+                ):
+        self.force_use_spgb = force_use_spgb
+        self.subq_type = subq_type
+        super().__init__(parent, node, context)
+        
     def init(self, _):
         # skip default init
         pass
     
     def produce(self, node):
         self.add('SELECT')
-        self.has_postproc = False
+        self.has_postproc = 'into' in node
         if 'select' in node:
             p = node['select']
             self.distinct = False
         elif 'select_distinct' in node:
             p = node['select_distinct']
             self.distinct = True
-
+        if 'with' in node:
+            with_table = node['with']['name']
+            with_table_name = tuple(with_table.keys())[0]
+            with_table_cols = tuple(with_table.values())[0]
+            self.with_clause = projection(self, node['with']['value'], subq_type=SubqType.WITH)
+            self.with_clause.out_table.add_alias(with_table_name)
+            for new_name, col in zip(with_table_cols, self.with_clause.out_table.columns):
+                col.rename(new_name)
+            self.with_clause.out_table.contextname_cpp 
+            # in monetdb, in cxt 
+        else:
+            self.with_clause = None
+        
+        self.limit = None
+        if 'limit' in node:
+            self.limit = node['limit']
+            
         self.projections = p if type(p) is list else [p]
         if self.parent is None:
             self.context.sql_begin()
@@ -99,8 +135,9 @@ class projection(ast_node):
         if type(self.datasource) is join:
             self.datasource.process_join_conditions()
         
-        if 'groupby' in node:
-            self.context.special_gb = groupby.check_special(self, node['groupby'])
+        self.context.special_gb = self.force_use_spgb
+        if 'groupby' in node: # if groupby clause contains special stuff
+            self.context.special_gb |= groupby.check_special(self, node['groupby'])
 
     def consume(self, node):
         # deal with projections
@@ -158,6 +195,11 @@ class projection(ast_node):
                         this_type = [c.type for c in _datasource]
                         compound = [c.compound for c in _datasource]
                         proj_expr = [expr(self, c.name) for c in _datasource]
+                        for pe in proj_expr:
+                            if pe.is_ColExpr:
+                                pe.cols_mentioned = {pe.raw_col}
+                            else:
+                                pe.cols_mentioned = set()
                     else:
                         y = lambda x:x
                         count = lambda : 'count(*)'
@@ -203,8 +245,14 @@ class projection(ast_node):
         
         self.out_table.add_cols(cols, new = False)
         
+        self.proj_map = proj_map
+        
         if 'groupby' in node:
             self.group_node = groupby(self, node['groupby'])
+            if self.group_node.terminate:
+                self.context.abandon_query()
+                projection(self.parent, node, self.context, True, subq_type=self.subq_type)
+                return
             if self.group_node.use_sp_gb:
                 self.has_postproc = True
         else:
@@ -223,7 +271,11 @@ class projection(ast_node):
                 self.var_table[col.name] = offset
                 for n in (col.table.alias):
                     self.var_table[f'{n}.'+col.name] = offset
-    
+        # monetdb doesn't support select into table
+        # if 'into' in node:
+        #     self.into_stub = f'{{INTOSTUB{base62uuid(20)}}}'
+        #     self.add(self.into_stub, '')
+            
         def finialize(astnode:ast_node):
             if(astnode is not None):
                 self.add(astnode.sql)
@@ -235,6 +287,9 @@ class projection(ast_node):
         if self.col_ext or self.group_node and self.group_node.use_sp_gb:
             self.has_postproc = True
         
+        if self.group_node and self.group_node.use_sp_gb :
+            self.group_node.dedicated_glist
+            ...
         o = self.assumptions
         if 'orderby' in node:
             o.extend(enlist(node['orderby']))
@@ -258,7 +313,6 @@ class projection(ast_node):
         
         
         # cpp module codegen
-        self.context.has_dll = True
         # extract typed-columns from result-set
         vid2cname = [0]*len(self.var_table)
         self.pyname2cname = dict()
@@ -338,28 +392,36 @@ class projection(ast_node):
                     )
                 else:
                     # for funcs evaluate f_i(x, ...)
-                    self.context.emitc(f'{self.out_table.contextname_cpp}->get_col<{key}>() = {val[1]};')
+                    self.context.emitc(f'{self.out_table.contextname_cpp}->get_col<{key}>().initfrom({val[1]}, "{cols[i].name}");')
         # print out col_is
-        if 'into' not in node:
-            self.context.emitc(f'print(*{self.out_table.contextname_cpp});')
+        
+        if 'into' not in node and self.subq_type == SubqType.NONE:
+            if self.limit is None:
+                self.context.emitc(f'print(*{self.out_table.contextname_cpp});')
+            else:
+                self.context.emitc(f'{self.out_table.contextname_cpp}->printall(" ","\\n", nullptr, nullptr, {self.limit});')
         
         if self.outfile and self.has_postproc:
                 self.outfile.finalize()
 
         if 'into' in node: 
             self.context.emitc(select_into(self, node['into']).ccode)
+            self.has_postproc = True
         if not self.distinct:
             self.finalize()
-            
+                    
     def finalize(self):      
         self.context.emitc(f'puts("done.");')
 
         if self.parent is None:
             self.context.sql_end()
-            if self.outfile and not self.has_postproc:
-                self.context.abandon_postproc()
-            else:
+            if self.has_postproc:
+                self.context.has_dll = True
                 self.context.postproc_end(self.postproc_fname)
+            else:
+                self.context.ccode = ''
+                if self.limit != 0 and not self.outfile:
+                    self.context.direct_output()
         
 class select_distinct(projection):
     first_order = 'select_distinct'
@@ -367,18 +429,18 @@ class select_distinct(projection):
         super().consume(node)
         if self.has_postproc:
             self.context.emitc(
-                f'{self.out_table.table_name}->distinct();'
+                f'{self.out_table.contextname_cpp}->distinct();'
             )
         self.finalize()
         
 class select_into(ast_node):
-    def init(self, node):
+    def init(self, _):
         if isinstance(self.parent, projection):
-            if self.context.has_dll:
-                # has postproc put back to monetdb
-                self.produce = self.produce_cpp
-            else:
-                self.produce = self.produce_sql
+            # if self.parent.has_postproc:
+            #     # has postproc put back to monetdb
+            self.produce = self.produce_cpp
+            # else:
+            #     self.produce = self.produce_sql
         else:
             raise ValueError('parent must be projection')
         
@@ -390,7 +452,8 @@ class select_into(ast_node):
             self.ccode = f'{self.parent.out_table.contextname_cpp}->monetdb_append_table(cxt->alt_server, \"{node.lower()}\");'
             
     def produce_sql(self, node):
-        self.sql = f' INTO {node}'
+        self.context.sql = self.context.sql.replace(
+            self.parent.into_stub, f'INTO {node}', 1)
     
 
 class orderby(ast_node):
@@ -409,7 +472,7 @@ class orderby(ast_node):
                 o_str += ' ' + 'DESC'
             o_list.append(o_str)
         self.add(', '.join(o_list))
-            
+
 
 class scan(ast_node):
     class Position(Enum):
@@ -586,6 +649,10 @@ class groupby(ast_node):
                 return True
         return False
 
+    def init(self, _):
+        self.terminate = False
+        super().init(_)
+        
     def produce(self, node):
         if not isinstance(self.parent, projection):
             raise ValueError('groupby can only be used in projection')
@@ -593,8 +660,10 @@ class groupby(ast_node):
         node = enlist(node)
         o_list = []
         self.refs = set()
+        self.gb_cols = set()
+        # dedicated_glist -> cols populated for special group by
         self.dedicated_glist : List[Tuple[expr, Set[ColRef]]] = []
-        self.use_sp_gb = False
+        self.use_sp_gb = self.parent.force_use_spgb
         for g in node:
             self.datasource.rec = set()
             g_expr = expr(self, g['value'])
@@ -610,7 +679,24 @@ class groupby(ast_node):
             if 'sort' in g and f'{g["sort"]}'.lower() == 'desc':
                 g_str = g_str + ' ' + 'DESC'
             o_list.append(g_str)
-            
+            if g_expr.is_ColExpr:
+                self.gb_cols.add(g_expr.raw_col)
+            else:
+                self.gb_cols.add(g_expr.sql)
+                
+        for projs in self.parent.proj_map.values():
+            if self.use_sp_gb:
+                break
+            if (projs[2].is_compound and 
+                not ((projs[2].is_ColExpr and projs[2].raw_col in self.gb_cols) or
+                projs[2].sql in self.gb_cols)
+                ) and (not self.parent.force_use_spgb):
+                    self.use_sp_gb = True
+                    break
+                
+        if self.use_sp_gb and not self.parent.force_use_spgb:
+            self.terminate = True
+            return
         if not self.use_sp_gb:
             self.dedicated_gb = None
             self.add(', '.join(o_list))
@@ -916,38 +1002,64 @@ class insert(ast_node):
     name = 'insert'
     first_order = name
     def init(self, node):
-        values = node['query']
-        complex_query_kw = ['from', 'where', 'groupby', 'having', 'orderby', 'limit']
-        if any([kw in values for kw in complex_query_kw]):
-            values['into'] = node['insert']
-            proj_cls = (select_distinct 
-            if 'select_distinct' in values 
-            else projection)
-            proj_cls(None, values, self.context)
-            self.produce = lambda*_:None
-            self.spawn = lambda*_:None
-            self.consume = lambda*_:None
+        if 'query' in node:
+            values = node['query']
+            complex_query_kw = ['from', 'where', 'groupby', 'having', 'orderby', 'limit']
+            if any([kw in values for kw in complex_query_kw]):
+                values['into'] = node['insert']
+                proj_cls = (select_distinct 
+                if 'select_distinct' in values 
+                else projection)
+                proj_cls(None, values, self.context)
+                self.produce = lambda*_:None
+                self.spawn = lambda*_:None
+                self.consume = lambda*_:None
         else:
             super().init(node)
             
     def produce(self, node):
-        values = node['query']['select']
+        keys = []
+        if 'query' in node:
+            if 'select' in node['query']:
+                values = enlist(node['query']['select'])
+                if 'columns' in node:
+                    keys = node['columns']
+                values = [v['value'] for v in values]
+
+            elif 'union_all' in node['query']:
+                values = [[v['select']['value']] for v in node['query']['union_all']]
+                if 'columns' in node:
+                    keys = node['columns']
+        else:
+            values = enlist(node['values'])
+            _vals = []
+            for v in values:
+                if isinstance(v, dict):
+                    keys = v.keys()
+                    v = list(v.values())
+                v = [f"'{vv}'" if type(vv) is str else vv for vv in v]
+                _vals.append(v)
+            values = _vals
+            
+        keys = f'({", ".join(keys)})' if keys else ''
         tbl = node['insert']
-        self.sql = f'INSERT INTO {tbl} VALUES('
+        self.sql = f'INSERT INTO {tbl}{keys} VALUES'
         # if len(values) != table.n_cols:
         #     raise ValueError("Column Mismatch")
-
+        values = [values] if isinstance(values, list) and not isinstance(values[0], list) else values
         list_values = []
-        for i, s in enumerate(enlist(values)):
-            if 'value' in s:
-                list_values.append(f"{s['value']}")
-            else:
-                # subquery, dispatch to select astnode
-                pass
-        self.sql += ', '.join(list_values) + ')'
+        for l in values:
+            inner_list_values = []
+            for s in enlist(l):
+                if type(s) is dict and 'value' in s:
+                    s = s['value']
+                inner_list_values.append(f"{get_innermost(s)}")
+            list_values.append(f"({', '.join(inner_list_values)})")
+            
+        self.sql += ', '.join(list_values) 
         
 
-class delete_table(ast_node):
+class delete_from(ast_node):
     name = 'delete'
     first_order = name
     def init(self, node):
@@ -959,6 +1071,31 @@ class delete_table(ast_node):
         if 'where' in node:
             self.sql += filter(self, node['where']).sql
 
+class union_all(ast_node):
+    name = 'union_all'
+    first_order = name
+    sql_name = 'UNION ALL'
+    def produce(self, node):
+        queries = node[self.name]
+        generated_queries : List[Optional[projection]] = [None] * len(queries)
+        is_standard = True
+        for i, q in enumerate(queries):
+            if 'select' in q:
+                generated_queries[i] = projection(self, q)
+                is_standard &= not generated_queries[i].has_postproc
+        if is_standard:
+            self.sql = f' {self.sql_name} '.join([q.sql for q in generated_queries])
+        else:
+            raise NotImplementedError(f"{self.sql_name} only support standard sql for now")
+    def consume(self, node):
+        super().consume(node)
+        self.context.direct_output()
+
+class except_clause(union_all):
+    name = 'except'
+    first_order = name
+    sql_name = 'EXCEPT'
+    
 class load(ast_node):
     name="load"
     first_order = name
@@ -967,6 +1104,9 @@ class load(ast_node):
         if node['load']['file_type'] == 'module':
             self.produce = self.produce_module
             self.module = True
+        elif 'complex' in node['load']:
+            self.produce = self.produce_cpp
+            self.consume = lambda *_: None
         elif self.context.dialect == 'MonetDB':
             self.produce = self.produce_monetdb
         else: 
@@ -998,7 +1138,7 @@ class load(ast_node):
                 self.context.queries.append(f'F{fname}')
                 ret_type = VoidT
                 if 'ret_type' in f:
-                    ret_type = Types.decode(f['ret_type'])
+                    ret_type = Types.decode(f['ret_type'], vector_type='vector_type')
                 nargs = 0
                 arglist = ''
                 if 'vars' in f:
@@ -1008,7 +1148,7 @@ class load(ast_node):
                     nargs = len(arglist)
                     arglist = ', '.join(arglist)
                 # create c++ stub 
-                cpp_stub = f'{ret_type.cname} (*{fname})({arglist}) = nullptr;'
+                cpp_stub = f'{"vectortype_cstorage" if isinstance(ret_type, VectorT) else ret_type.cname} (*{fname})({arglist}) = nullptr;'
                 self.context.module_stubs += cpp_stub + '\n'
                 self.context.module_map[fname] = cpp_stub
                 #registration for parser
@@ -1035,7 +1175,56 @@ class load(ast_node):
         self.sql = f'{s1} \'{p}\' {s2} '
         if 'term' in node:
             self.sql += f' {s3} \'{node["term"]["literal"]}\''
-                    
+            
+    def produce_cpp(self, node):
+        self.context.has_dll = True
+        self.context.headers.add('"csv.h"')
+        node = node['load']
+        self.postproc_fname = 'ld_' + base62uuid(5)
+        self.context.postproc_begin(self.postproc_fname)
+        
+        table:TableInfo = self.context.tables_byname[node['table']]
+        self.sql = F"SELECT {', '.join([c.name for c in table.columns])} FROM {table.table_name};"
+        self.emit(self.sql+';\n')
+        self.context.sql_end()
+        length_name = 'len_' + base62uuid(6)
+        self.context.emitc(f'auto {length_name} = server->cnt;')
+        
+        out_typenames = [t.type.cname for t in table.columns]
+        outtable_col_nameslist = ', '.join([f'"{c.name}"' for c in table.columns])
+        
+        self.outtable_col_names = 'names_' + base62uuid(4)
+        self.context.emitc(f'const char* {self.outtable_col_names}[] = {{{outtable_col_nameslist}}};')
+        
+        self.out_table = 'tbl_' + base62uuid(4)
+        self.context.emitc(f'auto {self.out_table} = new TableInfo<{",".join(out_typenames)}>("{table.table_name}", {self.outtable_col_names});')
+        for i, c in enumerate(table.columns):
+            c.cxt_name = 'c_' + base62uuid(6) 
+            self.context.emitc(f'decltype(auto) {c.cxt_name} = {self.out_table}->get_col<{i}>();')
+            self.context.emitc(f'{c.cxt_name}.initfrom({length_name}, server->getCol({i}), "{table.columns[i].name}");')
+        csv_reader_name = 'csv_reader_' + base62uuid(6)
+        col_types = [c.type.cname for c in table.columns]
+        col_tmp_names = ['tmp_'+base62uuid(8) for _ in range(len(table.columns))]
+        #col_names = ','.join([f'"{c.name}"' for c in table.columns])
+        term_field = ',' if 'term' not in node else node['term']['literal']
+        term_ele = ';' if 'ele' not in node else node['ele']['literal']
+        self.context.emitc(f'AQCSVReader<{len(col_types)}, \'{term_field.strip()[0]}\', \'{term_ele.strip()[0]}\'> {csv_reader_name}("{node["file"]["literal"]}");')
+        # self.context.emitc(f'{csv_reader_name}.read_header(io::ignore_extra_column, {col_names});')
+        self.context.emitc(f'{csv_reader_name}.next_line();')
+
+        for t, n in zip(col_types, col_tmp_names):
+            self.context.emitc(f'{t} {n};')
+        self.context.emitc(f'while({csv_reader_name}.read_row({",".join(col_tmp_names)})) {{ \n')
+        for i, c in enumerate(table.columns):
+            # self.context.emitc(f'print({col_tmp_names[i]});')
+            self.context.emitc(f'{c.cxt_name}.emplace_back({col_tmp_names[i]});')
+            
+        self.context.emitc('}')
+        # self.context.emitc(f'print(*{self.out_table});')
+        self.context.emitc(f'{self.out_table}->monetdb_append_table(cxt->alt_server, "{table.table_name}");')
+        
+        self.context.postproc_end(self.postproc_fname)
+
 class outfile(ast_node):
     name="_outfile"
     def __init__(self, parent, node, context = None, *, sql = None):
@@ -1062,6 +1251,13 @@ class outfile(ast_node):
         filename = node['loc']['literal'] if 'loc' in node else node['literal']
         import os
         p =  os.path.abspath('.').replace('\\', '/') + '/' + filename
+        print('Warning: file {p} exists and will be overwritten')
+        if os.path.exists(p):
+            try:
+                os.remove(p)
+            except OSError:
+                print(f'Error: file {p} exists and cannot be removed')
+                
         self.sql = f'COPY {self.parent.sql} INTO \'{p}\''
         d = ','
         e = '\\n'
@@ -1137,7 +1333,7 @@ class udf(ast_node):
                 
         
     def produce(self, node):
-        from engine.utils import get_legal_name, check_legal_name
+        from engine.utils import check_legal_name, get_legal_name
         node = node[self.name]
         # register udf
         self.agg = 'Agg' in node
@@ -1232,7 +1428,7 @@ class udf(ast_node):
                     
                     
     def consume(self, node):
-        from engine.utils import get_legal_name, check_legal_name
+        from engine.utils import check_legal_name, get_legal_name
         node = node[self.name]
                     
         if 'params' in node:
@@ -1339,7 +1535,25 @@ class udf(ast_node):
             return udf.ReturnPattern.elemental_return
         else:
             return udf.ReturnPattern.bulk_return
-            
+
+class passthru_sql(ast_node):
+    name = 'sql'
+    first_order = name
+    import re
+    # escapestr = r'''(?:((?:[^;"']|"[^"]*"|'[^']*')+)|(?:--[^\r\n]*[\r|\n])+)'''
+    # escape_comment = fr'''(?:{escapestr}|{escapestr}*-{escapestr}*)'''
+    seprator = re.compile(r'''((?:[^;"']|"[^"]*"|'[^']*')+)''')
+    def __init__(self, _, node, context:Context):
+        sqls = passthru_sql.seprator.split(node['sql'])
+        for sql in sqls:
+            sq = sql.strip(' \t\n\r;')
+            if sq:
+                context.queries.append('Q' + sql.strip('\r\n\t ;') + ';')
+                lq = sq.lower()
+                if lq.startswith('select'):
+                    context.queries.append('O')
+
+
 class user_module_function(OperatorBase):
     def __init__(self, name, nargs, ret_type, context : Context):
         super().__init__(name, nargs, lambda *_: ret_type, call=fn_behavior)
@@ -1355,4 +1569,5 @@ def include(objs):
             
             
 import sys
+
 include(sys.modules[__name__])
diff --git a/reconstruct/expr.py b/reconstruct/expr.py
index 4fd483b..af1f0cb 100644
--- a/reconstruct/expr.py
+++ b/reconstruct/expr.py
@@ -1,7 +1,8 @@
 from typing import Optional, Set
+
+from engine.types import *
 from reconstruct.ast import ast_node
 from reconstruct.storage import ColRef, Context
-from engine.types import *
 
 # TODO: Decouple expr and upgrade architecture
 # C_CODE : get ccode/sql code?
@@ -31,6 +32,7 @@ class expr(ast_node):
     
     def __init__(self, parent, node, *, c_code = None, supress_undefined = False):
         from reconstruct.ast import projection, udf
+
         # gen2 expr have multi-passes
         # first pass parse json into expr tree
         # generate target code in later passes upon need
@@ -78,7 +80,7 @@ class expr(ast_node):
         ast_node.__init__(self, parent, node, None)
 
     def init(self, _):
-        from reconstruct.ast import projection, _tmp_join_union
+        from reconstruct.ast import _tmp_join_union, projection
         parent = self.parent
         self.is_compound = parent.is_compound if type(parent) is expr else False
         if type(parent) in [projection, expr, _tmp_join_union]:
@@ -88,11 +90,13 @@ class expr(ast_node):
         self.udf_map = parent.context.udf_map
         self.func_maps = {**builtin_func, **self.udf_map, **user_module_func}
         self.operators = {**builtin_operators, **self.udf_map, **user_module_func}
-        self.ext_aggfuncs = ['sum', 'avg', 'count', 'min', 'max', 'last', 'first', 'prev', 'next']
+        self.ext_aggfuncs = ['sum', 'avg', 'count', 'min', 'max', 
+                             'last', 'first', 'prev', 'next', 'var', 
+                             'stddev']
         
     def produce(self, node):
         from engine.utils import enlist
-        from reconstruct.ast import udf
+        from reconstruct.ast import udf, projection
         
         if type(node) is dict:
             if 'literal' in node:
@@ -166,8 +170,17 @@ class expr(ast_node):
                             
                         special_func = [*self.context.udf_map.keys(), *self.context.module_map.keys(), 
                                         "maxs", "mins", "avgs", "sums", "deltas", "last", "first", 
-                                        "ratios", "pack", "truncate"]
-                        if self.context.special_gb:
+                                        "stddevs", "vars", "ratios", "pack", "truncate"]
+                        
+                        if (
+                                self.context.special_gb 
+                                    or 
+                                (
+                                    type(self.root.parent) is projection 
+                                        and
+                                    self.root.parent.force_use_spgb
+                                )
+                           ):
                             special_func = [*special_func, *self.ext_aggfuncs]
                             
                         if key in special_func and not self.is_special:
@@ -333,7 +346,8 @@ class expr(ast_node):
                     self.type = ByteT
             elif type(node) is float:
                 self.type = DoubleT
-    
+                self.sql = f'{{"CAST({node} AS DOUBLE)" if not c_code else "{node}f"}}'
+                
     def finalize(self, override = False):
         from reconstruct.ast import udf
         if self.codebuf is None or override:
diff --git a/reconstruct/storage.py b/reconstruct/storage.py
index 0ba567a..c8f5e69 100644
--- a/reconstruct/storage.py
+++ b/reconstruct/storage.py
@@ -1,12 +1,14 @@
+from typing import Dict, List, Set
+
 from engine.types import *
 from engine.utils import CaseInsensitiveDict, base62uuid, enlist
-from typing import List, Dict, Set
+
 
 class ColRef:
     def __init__(self, _ty, cobj, table:'TableInfo', name, id, compound = False, _ty_args = None):
         self.type : Types = AnyT
         if type(_ty) is str:
-            self.type = builtin_types[_ty.lower()]
+            self.type = Types.decode(_ty)
             if _ty_args:
                 self.type = self.type(enlist(_ty_args))
         elif type(_ty) is Types:
@@ -17,6 +19,7 @@ class ColRef:
         self.alias = set()
         self.id = id # position in table
         self.compound = compound # compound field (list as a field) 
+        self.cxt_name = ''
         # e.g. order by, group by, filter by expressions
         
         self.__arr__ = (_ty, cobj, table, name, id)
@@ -42,6 +45,14 @@ class ColRef:
             alias = table_name
         return f'{alias}.{self.get_name()}'
     
+    def rename(self, name):
+        self.alias.discard(self.name)
+        self.table.columns_byname.pop(self.name, None)
+        self.name = name
+        self.table.columns_byname[name] = self
+        
+        return self
+    
     def __getitem__(self, key):
         if type(key) is str:
             return getattr(self, key)
@@ -94,6 +105,17 @@ class TableInfo:
             return
         self.cxt.tables_byname[alias] = self
         self.alias.add(alias)
+    
+    def rename(self, name):
+        if name in self.cxt.tables_byname.keys():
+            print(f"Error: table name {name} already exists")
+            return
+        
+        self.cxt.tables_byname.pop(self.table_name, None)
+        self.alias.discard(self.table_name)
+        self.table_name = name
+        self.cxt.tables_byname[name] = self
+        self.alias.add(name)
         
     def parse_col_names(self, colExpr) -> ColRef:
         parsedColExpr = colExpr.split('.')
@@ -134,6 +156,7 @@ class Context:
         self.queries = []
         self.module_init_loc = 0
         self.special_gb = False
+        self.has_dll = False
          
     def __init__(self):
         self.tables_byname = dict()
@@ -147,7 +170,6 @@ class Context:
         self.udf_agg_map = dict()
         self.use_columnstore = False
         self.print = print
-        self.has_dll = False
         self.dialect = 'MonetDB'
         self.is_msvc = False
         self.have_hge = False
@@ -223,6 +245,14 @@ class Context:
         self.queries.append('P' + proc_name)    
         self.finalize_query()
         
+    def abandon_query(self):
+        self.sql = ''
+        self.ccode = ''
+        self.finalize_query()
+    
+    def direct_output(self):
+        self.queries.append('O')
+    
     def abandon_postproc(self):
         self.ccode = ''
         self.finalize_query()
diff --git a/sdk/Evaluation.cpp b/sdk/Evaluation.cpp
index 3683597..8e347a7 100644
--- a/sdk/Evaluation.cpp
+++ b/sdk/Evaluation.cpp
@@ -5,14 +5,13 @@
 
 struct minEval{
         double value;
-        double values;
+        int* values;
 
 	double eval;
         long left; // how many on its left
         double* record;
         long max;
         long** count;
-        long* sorted; // sorted d
 };
 
 minEval giniSparse(double** data, long* result, long* d, long size, long col, long classes, long* totalT){
diff --git a/sdk/Makefile b/sdk/Makefile
index 7bd5c8c..b146a81 100644
--- a/sdk/Makefile
+++ b/sdk/Makefile
@@ -1,5 +1,11 @@
+OPT_FLASG = 
+ifneq ($(DEBUG), 1)
+	OPT_FLAGS = -Ofast -march=native -flto -DNDEBUG 
+else 
+	OPT_FLAGS = -g3 -D_DEBUG -fsanitize=leak -fsanitize=address
+endif
 example:
 	$(CXX) -shared -fPIC example.cpp aquery_mem.cpp -fno-semantic-interposition -Ofast -march=native -flto --std=c++1z -o ../test.so
 irf:
-	$(CXX) -shared -fPIC RF.cpp irf.cpp incrementalDecisionTree.cpp aquery_mem.cpp Evaluation.cpp -fno-semantic-interposition -Ofast -march=native -flto --std=c++1z -o ../libirf.so
+	$(CXX) -shared -fPIC RF.cpp irf.cpp incrementalDecisionTree.cpp aquery_mem.cpp Evaluation.cpp -fno-semantic-interposition $(OPT_FLAGS) --std=c++1z -o ../libirf.so
 all: example
diff --git a/sdk/irf.cpp b/sdk/irf.cpp
index 8433c95..0f9aac1 100644
--- a/sdk/irf.cpp
+++ b/sdk/irf.cpp
@@ -4,9 +4,6 @@
 #include "../server/table.h"
 
 DecisionTree* dt = nullptr;
-long pt = 0;
-double** data = nullptr;
-long* result = nullptr;
 
 __AQEXPORT__(bool) newtree(int height, long f, ColRef<int> sparse, double forget, long maxf, long noclasses, Evaluation e, long r, long rb){
 	if(sparse.size!=f)return 0;
@@ -19,14 +16,13 @@ __AQEXPORT__(bool) newtree(int height, long f, ColRef<int> sparse, double forget
 	return 1;
 }
 
-__AQEXPORT__(bool) additem(ColRef<double>X, long y, long size){
-	long j = 0;
-	if(size>0){
-		free(data);
-		free(result);
-		pt = 0;
-		data=(double**)malloc(size*sizeof(double*));
-		result=(long*)malloc(size*sizeof(long));
+__AQEXPORT__(bool) fit(ColRef<ColRef<double>> X, ColRef<int> y){
+	if(X.size != y.size)return 0;
+	double** data = (double**)malloc(X.size*sizeof(double*));
+	long* result = (long*)malloc(y.size*sizeof(long));
+	for(long i=0; i<X.size; i++){
+		data[i] = X.container[i].container;
+		result[i] = y.container[i];
 	}
 	data[pt] = (double*)malloc(X.size*sizeof(double));
 	for(j=0; j<X.size; j++){
@@ -36,19 +32,32 @@ __AQEXPORT__(bool) additem(ColRef<double>X, long y, long size){
 	pt ++;
 	return 1;
 }
-__AQEXPORT__(bool) fit(){
-	if(pt<=0)return 0;
-	dt->fit(data, result, pt);
-	return 1;
+__AQEXPORT__(bool) fit(vector_type<vector_type<double>> v, vector_type<long> res){
+	double** data = (double**)malloc(v.size*sizeof(double*));
+	for(int i = 0; i < v.size; ++i)
+		data[i] = v.container[i].container;
+	dt->fit(data, res.container, v.size);
+	return true;
 }
 
-__AQEXPORT__(ColRef_storage) predict(){
-	int* result = (int*)malloc(pt*sizeof(int));
-	for(long i=0; i<pt; i++){
-		result[i]=dt->Test(data[i], dt->DTree);
-	}
+__AQEXPORT__(vectortype_cstorage) predict(vector_type<vector_type<double>> v){
+	int* result = (int*)malloc(v.size*sizeof(int));
 	
-	return ColRef_storage(new ColRef_storage(result, pt, 0, "prediction", 0), 1, 0, "prediction", 0);
+	for(long i=0; i<v.size; i++){
+		result[i]=dt->Test(v.container[i].container, dt->DTree);
+		//printf("%d ", result[i]);
+	}
+	auto container = (vector_type<int>*)malloc(sizeof(vector_type<int>));
+	container->size = v.size;
+	container->capacity = 0;
+	container->container = result;
+	// container->out(10);
+	// ColRef<vector_type<int>>* col = (ColRef<vector_type<int>>*)malloc(sizeof(ColRef<vector_type<int>>));
+	auto ret = vectortype_cstorage{.container = container, .size = 1, .capacity = 0};
+	// col->initfrom(ret, "sibal");
+	// print(*col);
+	return ret;
+	//return true;
 }
 
 
diff --git a/server/Makefile b/server/Makefile
index cb082c8..a2d4e44 100644
--- a/server/Makefile
+++ b/server/Makefile
@@ -1,6 +1,6 @@
 debug:
-	g++ -g3 -O0 server/server.cpp server/io.cpp  -o a.out -Wall -Wextra -Wpedantic -lpthread
+	g++ -g3 -O0 server/server.cpp server/libaquery.cpp  -o a.out -Wall -Wextra -Wpedantic -lpthread
 	
 test:
-	g++ --std=c++1z -g3 -O0 server.cpp io.cpp  -o a.out -Wall -Wextra -Wpedantic -lpthread
+	g++ --std=c++1z -g3 -O0 server.cpp libaquery.cpp  -o a.out -Wall -Wextra -Wpedantic -lpthread
 	
diff --git a/server/aggregations.h b/server/aggregations.h
index 5338e23..cb4bcbe 100644
--- a/server/aggregations.h
+++ b/server/aggregations.h
@@ -202,6 +202,102 @@ decayed_t<VT, types::GetFPType<types::GetLongType<T>>> avgw(uint32_t w, const VT
 	return ret;
 }
 
+template<class T, template<typename ...> class VT, bool sd = false>
+decayed_t<VT, types::GetFPType<types::GetLongType<T>>> varw(uint32_t w, const VT<T>& arr) {
+	using FPType = types::GetFPType<types::GetLongType<T>>;
+	const uint32_t& len = arr.size;
+	decayed_t<VT, FPType> ret(len);
+	uint32_t i = 0;
+	types::GetLongType<T> s{};
+	w = w > len ? len : w;
+	FPType EnX {},  MnX{};
+	if (len) {
+		s = arr[0];
+		MnX = 0;
+		EnX = arr[0];
+		ret[i++] = 0;
+	}
+	for (; i < len; ++i){
+		s += arr[i];
+		FPType _EnX = s / (FPType)(i + 1);
+		MnX += (arr[i] - EnX) * (arr[i] - _EnX);
+		EnX = _EnX;
+		ret[i] = MnX / (FPType)(i + 1);
+		if constexpr(sd) ret[i-1] = sqrt(ret[i-1]);
+	}
+	const float rw = 1.f / (float)w;
+	s *= rw;	
+	for (; i < len; ++i){
+		const auto dw = arr[i] - arr[i - w - 1];
+		const auto sw = arr[i] + arr[i - w - 1];
+		const auto dex = dw * rw;
+		ret[i] = ret[i-1] - dex*(s + s + dex - sw);
+		if constexpr(sd) ret[i-1] = sqrt(ret[i-1]);
+		s += dex;
+	}
+	if constexpr(sd) 
+		if(i)
+			ret[i-1] = sqrt(ret[i-1]);
+	
+	return ret;
+}
+
+template<class T, template<typename ...> class VT>
+types::GetFPType<types::GetLongType<decays<T>>> var(const VT<T>& arr) {
+	typedef types::GetFPType<types::GetLongType<decays<T>>> FPType;
+	const uint32_t& len = arr.size;
+	uint32_t i = 0;
+	types::GetLongType<T> s{0};
+	types::GetLongType<T> ssq{0};
+	if (len) {
+		s = arr[0];
+		ssq = arr[0] * arr[0];
+	}
+	for (; i < len; ++i){
+		s += arr[i];
+		ssq += arr[i] * arr[i];
+	}
+	return (ssq - s * s / (FPType)(len + 1)) / (FPType)(len + 1);
+}
+
+template<class T, template<typename ...> class VT, bool sd = false>
+decayed_t<VT, types::GetFPType<types::GetLongType<T>>> vars(const VT<T>& arr) {
+	typedef types::GetFPType<types::GetLongType<T>> FPType;
+	const uint32_t& len = arr.size;
+	decayed_t<VT, FPType> ret(len);
+	uint32_t i = 0;
+	types::GetLongType<T> s{};
+	FPType MnX{};
+	FPType EnX {};
+	if (len) {
+		s = arr[0];
+		MnX = 0;
+		EnX = arr[0];
+		ret[i++] = 0;
+	}
+	for (; i < len; ++i){
+		s += arr[i];
+		FPType _EnX = s / (FPType)(i + 1);
+		MnX += (arr[i] - EnX) * (arr[i] - _EnX);
+		printf("%d %ld ", arr[i], MnX);
+		EnX = _EnX;
+		ret[i] = MnX / (FPType)(i + 1);
+		if constexpr(sd) ret[i] = sqrt(ret[i]);
+	}
+	return ret;
+}
+template<class T, template<typename ...> class VT>
+types::GetFPType<types::GetLongType<decays<T>>> stddev(const VT<T>& arr) {
+	return sqrt(var(arr));
+}
+template<class T, template<typename ...> class VT>
+decayed_t<VT, types::GetFPType<types::GetLongType<T>>> stddevs(const VT<T>& arr) {
+	return vars<T, VT, true>(arr);
+}
+template<class T, template<typename ...> class VT>
+decayed_t<VT, types::GetFPType<types::GetLongType<T>>> stddevw(uint32_t w, const VT<T>& arr) {
+	return varw<T, VT, true>(w, arr);
+}
 // use getSignedType
 template<class T, template<typename ...> class VT>
 decayed_t<VT, T> deltas(const VT<T>& arr) {
@@ -251,26 +347,33 @@ T first(const VT<T>& arr) {
 }
 
 
+
 #define __DEFAULT_AGGREGATE_FUNCTION__(NAME, RET) \
-template <class T> constexpr inline T NAME(const T& v) { return RET; }
+template <class T> constexpr T NAME(const T& v) { return RET; }
 
 // non-aggreation count. E.g. SELECT COUNT(col) from table; 
-template <class T> constexpr inline T count(const T& v) { return 1; }
-template <class T> constexpr inline T max(const T& v) { return v; }
-template <class T> constexpr inline T min(const T& v) { return v; }
-template <class T> constexpr inline T avg(const T& v) { return v; }
-template <class T> constexpr inline T sum(const T& v) { return v; }
-template <class T> constexpr inline T maxw(uint32_t, const T& v) { return v; }
-template <class T> constexpr inline T minw(uint32_t, const T& v) { return v; }
-template <class T> constexpr inline T avgw(uint32_t, const T& v) { return v; }
-template <class T> constexpr inline T sumw(uint32_t, const T& v) { return v; }
-template <class T> constexpr inline T ratiow(uint32_t, const T& v) { return 1; }
-template <class T> constexpr inline T maxs(const T& v) { return v; }
-template <class T> constexpr inline T mins(const T& v) { return v; }
-template <class T> constexpr inline T avgs(const T& v) { return v; }
-template <class T> constexpr inline T sums(const T& v) { return v; }
-template <class T> constexpr inline T last(const T& v) { return v; }
-template <class T> constexpr inline T prev(const T& v) { return v; }
-template <class T> constexpr inline T aggnext(const T& v) { return v; }
-template <class T> constexpr inline T daltas(const T& v) { return 0; }
-template <class T> constexpr inline T ratios(const T& v) { return 1; }
+template <class T> constexpr T count(const T&) { return 1; }
+template <class T> constexpr T var(const T&) { return 0; }
+template <class T> constexpr T vars(const T&) { return 0; }
+template <class T> constexpr T varw(uint32_t, const T&) { return 0; }
+template <class T> constexpr T stddev(const T&) { return 0; }
+template <class T> constexpr T stddevs(const T&) { return 0; }
+template <class T> constexpr T stddevw(uint32_t, const T&) { return 0; }
+template <class T> constexpr T max(const T& v) { return v; }
+template <class T> constexpr T min(const T& v) { return v; }
+template <class T> constexpr T avg(const T& v) { return v; }
+template <class T> constexpr T sum(const T& v) { return v; }
+template <class T> constexpr T maxw(uint32_t, const T& v) { return v; }
+template <class T> constexpr T minw(uint32_t, const T& v) { return v; }
+template <class T> constexpr T avgw(uint32_t, const T& v) { return v; }
+template <class T> constexpr T sumw(uint32_t, const T& v) { return v; }
+template <class T> constexpr T ratiow(uint32_t, const T&) { return 1; }
+template <class T> constexpr T maxs(const T& v) { return v; }
+template <class T> constexpr T mins(const T& v) { return v; }
+template <class T> constexpr T avgs(const T& v) { return v; }
+template <class T> constexpr T sums(const T& v) { return v; }
+template <class T> constexpr T last(const T& v) { return v; }
+template <class T> constexpr T prev(const T& v) { return v; }
+template <class T> constexpr T aggnext(const T& v) { return v; }
+template <class T> constexpr T daltas(const T&) { return 0; }
+template <class T> constexpr T ratios(const T&) { return 1; }
diff --git a/server/dragonbox/dragonbox.h b/server/dragonbox/dragonbox.h
new file mode 100644
index 0000000..e4b954d
--- /dev/null
+++ b/server/dragonbox/dragonbox.h
@@ -0,0 +1,2658 @@
+// Copyright 2020-2022 Junekey Jeon
+//
+// The contents of this file may be used under the terms of
+// the Apache License v2.0 with LLVM Exceptions.
+//
+//    (See accompanying file LICENSE-Apache or copy at
+//     https://llvm.org/foundation/relicensing/LICENSE.txt)
+//
+// Alternatively, the contents of this file may be used under the terms of
+// the Boost Software License, Version 1.0.
+//    (See accompanying file LICENSE-Boost or copy at
+//     https://www.boost.org/LICENSE_1_0.txt)
+//
+// Unless required by applicable law or agreed to in writing, this software
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.
+
+
+#ifndef JKJ_HEADER_DRAGONBOX
+#define JKJ_HEADER_DRAGONBOX
+
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <limits>
+#include <type_traits>
+
+// Suppress additional buffer overrun check.
+// I have no idea why MSVC thinks some functions here are vulnerable to the buffer overrun
+// attacks. No, they aren't.
+#if defined(__GNUC__) || defined(__clang__)
+    #define JKJ_SAFEBUFFERS
+    #define JKJ_FORCEINLINE inline __attribute__((always_inline))
+#elif defined(_MSC_VER)
+    #define JKJ_SAFEBUFFERS __declspec(safebuffers)
+    #define JKJ_FORCEINLINE __forceinline
+#else
+    #define JKJ_SAFEBUFFERS
+    #define JKJ_FORCEINLINE inline
+#endif
+
+#if defined(__has_builtin)
+    #define JKJ_DRAGONBOX_HAS_BUILTIN(x) __has_builtin(x)
+#else
+    #define JKJ_DRAGONBOX_HAS_BUILTIN(x) false
+#endif
+
+#if defined(_MSC_VER)
+    #include <intrin.h>
+#endif
+
+namespace jkj::dragonbox {
+    namespace detail {
+        template <class T>
+        constexpr std::size_t
+            physical_bits = sizeof(T) * std::numeric_limits<unsigned char>::digits;
+
+        template <class T>
+        constexpr std::size_t value_bits =
+            std::numeric_limits<std::enable_if_t<std::is_unsigned_v<T>, T>>::digits;
+    }
+
+    // These classes expose encoding specs of IEEE-754-like floating-point formats.
+    // Currently available formats are IEEE754-binary32 & IEEE754-binary64.
+
+    struct ieee754_binary32 {
+        static constexpr int significand_bits = 23;
+        static constexpr int exponent_bits = 8;
+        static constexpr int min_exponent = -126;
+        static constexpr int max_exponent = 127;
+        static constexpr int exponent_bias = -127;
+        static constexpr int decimal_digits = 9;
+    };
+    struct ieee754_binary64 {
+        static constexpr int significand_bits = 52;
+        static constexpr int exponent_bits = 11;
+        static constexpr int min_exponent = -1022;
+        static constexpr int max_exponent = 1023;
+        static constexpr int exponent_bias = -1023;
+        static constexpr int decimal_digits = 17;
+    };
+
+    // A floating-point traits class defines ways to interpret a bit pattern of given size as an
+    // encoding of floating-point number. This is a default implementation of such a traits class,
+    // supporting ways to interpret 32-bits into a binary32-encoded floating-point number and to
+    // interpret 64-bits into a binary64-encoded floating-point number. Users might specialize this
+    // class to change the default behavior for certain types.
+    template <class T>
+    struct default_float_traits {
+        // I don't know if there is a truly reliable way of detecting
+        // IEEE-754 binary32/binary64 formats; I just did my best here.
+        static_assert(std::numeric_limits<T>::is_iec559 && std::numeric_limits<T>::radix == 2 &&
+                          (detail::physical_bits<T> == 32 || detail::physical_bits<T> == 64),
+                      "default_ieee754_traits only works for 32-bits or 64-bits types "
+                      "supporting binary32 or binary64 formats!");
+
+        // The type that is being viewed.
+        using type = T;
+
+        // Refers to the format specification class.
+        using format =
+            std::conditional_t<detail::physical_bits<T> == 32, ieee754_binary32, ieee754_binary64>;
+
+        // Defines an unsigned integer type that is large enough to carry a variable of type T.
+        // Most of the operations will be done on this integer type.
+        using carrier_uint =
+            std::conditional_t<detail::physical_bits<T> == 32, std::uint32_t, std::uint64_t>;
+        static_assert(sizeof(carrier_uint) == sizeof(T));
+
+        // Number of bits in the above unsigned integer type.
+        static constexpr int carrier_bits = int(detail::physical_bits<carrier_uint>);
+
+        // Convert from carrier_uint into the original type.
+        // Depending on the floating-point encoding format, this operation might not be possible for
+        // some specific bit patterns. However, the contract is that u always denotes a
+        // valid bit pattern, so this function must be assumed to be noexcept.
+        static T carrier_to_float(carrier_uint u) noexcept {
+            T x;
+            std::memcpy(&x, &u, sizeof(carrier_uint));
+            return x;
+        }
+
+        // Same as above.
+        static carrier_uint float_to_carrier(T x) noexcept {
+            carrier_uint u;
+            std::memcpy(&u, &x, sizeof(carrier_uint));
+            return u;
+        }
+
+        // Extract exponent bits from a bit pattern.
+        // The result must be aligned to the LSB so that there is no additional zero paddings
+        // on the right. This function does not do bias adjustment.
+        static constexpr unsigned int extract_exponent_bits(carrier_uint u) noexcept {
+            constexpr int significand_bits = format::significand_bits;
+            constexpr int exponent_bits = format::exponent_bits;
+            static_assert(detail::value_bits<unsigned int> > exponent_bits);
+            constexpr auto exponent_bits_mask =
+                (unsigned int)(((unsigned int)(1) << exponent_bits) - 1);
+            return (unsigned int)(u >> significand_bits) & exponent_bits_mask;
+        }
+
+        // Extract significand bits from a bit pattern.
+        // The result must be aligned to the LSB so that there is no additional zero paddings
+        // on the right. The result does not contain the implicit bit.
+        static constexpr carrier_uint extract_significand_bits(carrier_uint u) noexcept {
+            constexpr auto mask = carrier_uint((carrier_uint(1) << format::significand_bits) - 1);
+            return carrier_uint(u & mask);
+        }
+
+        // Remove the exponent bits and extract significand bits together with the sign bit.
+        static constexpr carrier_uint remove_exponent_bits(carrier_uint u,
+                                                           unsigned int exponent_bits) noexcept {
+            return u ^ (carrier_uint(exponent_bits) << format::significand_bits);
+        }
+
+        // Shift the obtained signed significand bits to the left by 1 to remove the sign bit.
+        static constexpr carrier_uint remove_sign_bit_and_shift(carrier_uint u) noexcept {
+            return carrier_uint(carrier_uint(u) << 1);
+        }
+
+        // The actual value of exponent is obtained by adding this value to the extracted exponent
+        // bits.
+        static constexpr int exponent_bias =
+            1 - (1 << (carrier_bits - format::significand_bits - 2));
+
+        // Obtain the actual value of the binary exponent from the extracted exponent bits.
+        static constexpr int binary_exponent(unsigned int exponent_bits) noexcept {
+            if (exponent_bits == 0) {
+                return format::min_exponent;
+            }
+            else {
+                return int(exponent_bits) + format::exponent_bias;
+            }
+        }
+
+        // Obtain the actual value of the binary exponent from the extracted significand bits and
+        // exponent bits.
+        static constexpr carrier_uint binary_significand(carrier_uint significand_bits,
+                                                         unsigned int exponent_bits) noexcept {
+            if (exponent_bits == 0) {
+                return significand_bits;
+            }
+            else {
+                return significand_bits | (carrier_uint(1) << format::significand_bits);
+            }
+        }
+
+
+        /* Various boolean observer functions */
+
+        static constexpr bool is_nonzero(carrier_uint u) noexcept { return (u << 1) != 0; }
+        static constexpr bool is_positive(carrier_uint u) noexcept {
+            constexpr auto sign_bit = carrier_uint(1)
+                                      << (format::significand_bits + format::exponent_bits);
+            return u < sign_bit;
+        }
+        static constexpr bool is_negative(carrier_uint u) noexcept { return !is_positive(u); }
+        static constexpr bool is_finite(unsigned int exponent_bits) noexcept {
+            constexpr unsigned int exponent_bits_all_set = (1u << format::exponent_bits) - 1;
+            return exponent_bits != exponent_bits_all_set;
+        }
+        static constexpr bool has_all_zero_significand_bits(carrier_uint u) noexcept {
+            return (u << 1) == 0;
+        }
+        static constexpr bool has_even_significand_bits(carrier_uint u) noexcept {
+            return u % 2 == 0;
+        }
+    };
+
+    // Convenient wrappers for floating-point traits classes.
+    // In order to reduce the argument passing overhead, these classes should be as simple as
+    // possible (e.g., no inheritance, no private non-static data member, etc.; this is an
+    // unfortunate fact about common ABI convention).
+
+    template <class T, class Traits = default_float_traits<T>>
+    struct float_bits;
+
+    template <class T, class Traits = default_float_traits<T>>
+    struct signed_significand_bits;
+
+    template <class T, class Traits>
+    struct float_bits {
+        using type = T;
+        using traits_type = Traits;
+        using carrier_uint = typename traits_type::carrier_uint;
+
+        carrier_uint u;
+
+        float_bits() = default;
+        constexpr explicit float_bits(carrier_uint bit_pattern) noexcept : u{bit_pattern} {}
+        constexpr explicit float_bits(T float_value) noexcept
+            : u{traits_type::float_to_carrier(float_value)} {}
+
+        constexpr T to_float() const noexcept { return traits_type::carrier_to_float(u); }
+
+        // Extract exponent bits from a bit pattern.
+        // The result must be aligned to the LSB so that there is no additional zero paddings
+        // on the right. This function does not do bias adjustment.
+        constexpr unsigned int extract_exponent_bits() const noexcept {
+            return traits_type::extract_exponent_bits(u);
+        }
+
+        // Extract significand bits from a bit pattern.
+        // The result must be aligned to the LSB so that there is no additional zero paddings
+        // on the right. The result does not contain the implicit bit.
+        constexpr carrier_uint extract_significand_bits() const noexcept {
+            return traits_type::extract_significand_bits(u);
+        }
+
+        // Remove the exponent bits and extract significand bits together with the sign bit.
+        constexpr auto remove_exponent_bits(unsigned int exponent_bits) const noexcept {
+            return signed_significand_bits<type, traits_type>(
+                traits_type::remove_exponent_bits(u, exponent_bits));
+        }
+
+        // Obtain the actual value of the binary exponent from the extracted exponent bits.
+        static constexpr int binary_exponent(unsigned int exponent_bits) noexcept {
+            return traits_type::binary_exponent(exponent_bits);
+        }
+        constexpr int binary_exponent() const noexcept {
+            return binary_exponent(extract_exponent_bits());
+        }
+
+        // Obtain the actual value of the binary exponent from the extracted significand bits and
+        // exponent bits.
+        static constexpr carrier_uint binary_significand(carrier_uint significand_bits,
+                                                         unsigned int exponent_bits) noexcept {
+            return traits_type::binary_significand(significand_bits, exponent_bits);
+        }
+        constexpr carrier_uint binary_significand() const noexcept {
+            return binary_significand(extract_significand_bits(), extract_exponent_bits());
+        }
+
+        constexpr bool is_nonzero() const noexcept { return traits_type::is_nonzero(u); }
+        constexpr bool is_positive() const noexcept { return traits_type::is_positive(u); }
+        constexpr bool is_negative() const noexcept { return traits_type::is_negative(u); }
+        constexpr bool is_finite(unsigned int exponent_bits) const noexcept {
+            return traits_type::is_finite(exponent_bits);
+        }
+        constexpr bool is_finite() const noexcept {
+            return traits_type::is_finite(extract_exponent_bits());
+        }
+        constexpr bool has_even_significand_bits() const noexcept {
+            return traits_type::has_even_significand_bits(u);
+        }
+    };
+
+    template <class T, class Traits>
+    struct signed_significand_bits {
+        using type = T;
+        using traits_type = Traits;
+        using carrier_uint = typename traits_type::carrier_uint;
+
+        carrier_uint u;
+
+        signed_significand_bits() = default;
+        constexpr explicit signed_significand_bits(carrier_uint bit_pattern) noexcept
+            : u{bit_pattern} {}
+
+        // Shift the obtained signed significand bits to the left by 1 to remove the sign bit.
+        constexpr carrier_uint remove_sign_bit_and_shift() const noexcept {
+            return traits_type::remove_sign_bit_and_shift(u);
+        }
+
+        constexpr bool is_positive() const noexcept { return traits_type::is_positive(u); }
+        constexpr bool is_negative() const noexcept { return traits_type::is_negative(u); }
+        constexpr bool has_all_zero_significand_bits() const noexcept {
+            return traits_type::has_all_zero_significand_bits(u);
+        }
+        constexpr bool has_even_significand_bits() const noexcept {
+            return traits_type::has_even_significand_bits(u);
+        }
+    };
+
+    namespace detail {
+        ////////////////////////////////////////////////////////////////////////////////////////
+        // Bit operation intrinsics.
+        ////////////////////////////////////////////////////////////////////////////////////////
+
+        namespace bits {
+            // Most compilers should be able to optimize this into the ROR instruction.
+            inline std::uint32_t rotr(std::uint32_t n, std::uint32_t r) noexcept {
+                r &= 31;
+                return (n >> r) | (n << (32 - r));
+            }
+            inline std::uint64_t rotr(std::uint64_t n, std::uint32_t r) noexcept {
+                r &= 63;
+                return (n >> r) | (n << (64 - r));
+            }
+        }
+
+        ////////////////////////////////////////////////////////////////////////////////////////
+        // Utilities for wide unsigned integer arithmetic.
+        ////////////////////////////////////////////////////////////////////////////////////////
+
+        namespace wuint {
+            // Compilers might support built-in 128-bit integer types. However, it seems that
+            // emulating them with a pair of 64-bit integers actually produces a better code,
+            // so we avoid using those built-ins. That said, they are still useful for
+            // implementing 64-bit x 64-bit -> 128-bit multiplication.
+
+            // clang-format off
+#if defined(__SIZEOF_INT128__)
+		// To silence "error: ISO C++ does not support '__int128' for 'type name'
+		// [-Wpedantic]"
+#if defined(__GNUC__)
+			__extension__
+#endif
+				using builtin_uint128_t = unsigned __int128;
+#endif
+            // clang-format on
+
+            struct uint128 {
+                uint128() = default;
+
+                std::uint64_t high_;
+                std::uint64_t low_;
+
+                constexpr uint128(std::uint64_t high, std::uint64_t low) noexcept
+                    : high_{high}, low_{low} {}
+
+                constexpr std::uint64_t high() const noexcept { return high_; }
+                constexpr std::uint64_t low() const noexcept { return low_; }
+
+                uint128& operator+=(std::uint64_t n) & noexcept {
+#if JKJ_DRAGONBOX_HAS_BUILTIN(__builtin_addcll)
+                    unsigned long long carry;
+                    low_ = __builtin_addcll(low_, n, 0, &carry);
+                    high_ = __builtin_addcll(high_, 0, carry, &carry);
+#elif JKJ_DRAGONBOX_HAS_BUILTIN(__builtin_ia32_addcarryx_u64)
+                    unsigned long long result;
+                    auto carry = __builtin_ia32_addcarryx_u64(0, low_, n, &result);
+                    low_ = result;
+                    __builtin_ia32_addcarryx_u64(carry, high_, 0, &result);
+                    high_ = result;
+#elif defined(_MSC_VER) && defined(_M_X64)
+                    auto carry = _addcarry_u64(0, low_, n, &low_);
+                    _addcarry_u64(carry, high_, 0, &high_);
+#else
+                    auto sum = low_ + n;
+                    high_ += (sum < low_ ? 1 : 0);
+                    low_ = sum;
+#endif
+                    return *this;
+                }
+            };
+
+            static inline std::uint64_t umul64(std::uint32_t x, std::uint32_t y) noexcept {
+#if defined(_MSC_VER) && defined(_M_IX86)
+                return __emulu(x, y);
+#else
+                return x * std::uint64_t(y);
+#endif
+            }
+
+            // Get 128-bit result of multiplication of two 64-bit unsigned integers.
+            JKJ_SAFEBUFFERS inline uint128 umul128(std::uint64_t x, std::uint64_t y) noexcept {
+#if defined(__SIZEOF_INT128__)
+                auto result = builtin_uint128_t(x) * builtin_uint128_t(y);
+                return {std::uint64_t(result >> 64), std::uint64_t(result)};
+#elif defined(_MSC_VER) && defined(_M_X64)
+                uint128 result;
+                result.low_ = _umul128(x, y, &result.high_);
+                return result;
+#else
+                auto a = std::uint32_t(x >> 32);
+                auto b = std::uint32_t(x);
+                auto c = std::uint32_t(y >> 32);
+                auto d = std::uint32_t(y);
+
+                auto ac = umul64(a, c);
+                auto bc = umul64(b, c);
+                auto ad = umul64(a, d);
+                auto bd = umul64(b, d);
+
+                auto intermediate = (bd >> 32) + std::uint32_t(ad) + std::uint32_t(bc);
+
+                return {ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32),
+                        (intermediate << 32) + std::uint32_t(bd)};
+#endif
+            }
+
+            JKJ_SAFEBUFFERS inline std::uint64_t umul128_upper64(std::uint64_t x,
+                                                                 std::uint64_t y) noexcept {
+#if defined(__SIZEOF_INT128__)
+                auto result = builtin_uint128_t(x) * builtin_uint128_t(y);
+                return std::uint64_t(result >> 64);
+#elif defined(_MSC_VER) && defined(_M_X64)
+                return __umulh(x, y);
+#else
+                auto a = std::uint32_t(x >> 32);
+                auto b = std::uint32_t(x);
+                auto c = std::uint32_t(y >> 32);
+                auto d = std::uint32_t(y);
+
+                auto ac = umul64(a, c);
+                auto bc = umul64(b, c);
+                auto ad = umul64(a, d);
+                auto bd = umul64(b, d);
+
+                auto intermediate = (bd >> 32) + std::uint32_t(ad) + std::uint32_t(bc);
+
+                return ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32);
+#endif
+            }
+
+            // Get upper 128-bits of multiplication of a 64-bit unsigned integer and a 128-bit
+            // unsigned integer.
+            JKJ_SAFEBUFFERS inline uint128 umul192_upper128(std::uint64_t x, uint128 y) noexcept {
+                auto r = umul128(x, y.high());
+                r += umul128_upper64(x, y.low());
+                return r;
+            }
+
+            // Get upper 64-bits of multiplication of a 32-bit unsigned integer and a 64-bit
+            // unsigned integer.
+            inline std::uint64_t umul96_upper64(std::uint32_t x, std::uint64_t y) noexcept {
+#if defined(__SIZEOF_INT128__) || (defined(_MSC_VER) && defined(_M_X64))
+                return umul128_upper64(std::uint64_t(x) << 32, y);
+#else
+                auto yh = std::uint32_t(y >> 32);
+                auto yl = std::uint32_t(y);
+
+                auto xyh = umul64(x, yh);
+                auto xyl = umul64(x, yl);
+
+                return xyh + (xyl >> 32);
+#endif
+            }
+
+            // Get lower 128-bits of multiplication of a 64-bit unsigned integer and a 128-bit
+            // unsigned integer.
+            JKJ_SAFEBUFFERS inline uint128 umul192_lower128(std::uint64_t x, uint128 y) noexcept {
+                auto high = x * y.high();
+                auto high_low = umul128(x, y.low());
+                return {high + high_low.high(), high_low.low()};
+            }
+
+            // Get lower 64-bits of multiplication of a 32-bit unsigned integer and a 64-bit
+            // unsigned integer.
+            inline std::uint64_t umul96_lower64(std::uint32_t x, std::uint64_t y) noexcept {
+                return x * y;
+            }
+        }
+
+        ////////////////////////////////////////////////////////////////////////////////////////
+        // Some simple utilities for constexpr computation.
+        ////////////////////////////////////////////////////////////////////////////////////////
+
+        template <int k, class Int>
+        constexpr Int compute_power(Int a) noexcept {
+            static_assert(k >= 0);
+            Int p = 1;
+            for (int i = 0; i < k; ++i) {
+                p *= a;
+            }
+            return p;
+        }
+
+        template <int a, class UInt>
+        constexpr int count_factors(UInt n) noexcept {
+            static_assert(a > 1);
+            int c = 0;
+            while (n % a == 0) {
+                n /= a;
+                ++c;
+            }
+            return c;
+        }
+
+        ////////////////////////////////////////////////////////////////////////////////////////
+        // Utilities for fast/constexpr log computation.
+        ////////////////////////////////////////////////////////////////////////////////////////
+
+        namespace log {
+            static_assert((-1 >> 1) == -1, "right-shift for signed integers must be arithmetic");
+
+            // Compute floor(e * c - s).
+            enum class multiply : std::uint32_t {};
+            enum class subtract : std::uint32_t {};
+            enum class shift : std::size_t {};
+            enum class min_exponent : std::int32_t {};
+            enum class max_exponent : std::int32_t {};
+
+            template <multiply m, subtract f, shift k, min_exponent e_min, max_exponent e_max>
+            constexpr int compute(int e) noexcept {
+                assert(std::int32_t(e_min) <= e && e <= std::int32_t(e_max));
+                return int((std::int32_t(e) * std::int32_t(m) - std::int32_t(f)) >> std::size_t(k));
+            }
+
+            // For constexpr computation.
+            // Returns -1 when n = 0.
+            template <class UInt>
+            constexpr int floor_log2(UInt n) noexcept {
+                int count = -1;
+                while (n != 0) {
+                    ++count;
+                    n >>= 1;
+                }
+                return count;
+            }
+
+            static constexpr int floor_log10_pow2_min_exponent = -2620;
+            static constexpr int floor_log10_pow2_max_exponent = 2620;
+            constexpr int floor_log10_pow2(int e) noexcept {
+                using namespace log;
+                return compute<multiply(315653), subtract(0), shift(20),
+                               min_exponent(floor_log10_pow2_min_exponent),
+                               max_exponent(floor_log10_pow2_max_exponent)>(e);
+            }
+
+            static constexpr int floor_log2_pow10_min_exponent = -1233;
+            static constexpr int floor_log2_pow10_max_exponent = 1233;
+            constexpr int floor_log2_pow10(int e) noexcept {
+                using namespace log;
+                return compute<multiply(1741647), subtract(0), shift(19),
+                               min_exponent(floor_log2_pow10_min_exponent),
+                               max_exponent(floor_log2_pow10_max_exponent)>(e);
+            }
+
+            static constexpr int floor_log10_pow2_minus_log10_4_over_3_min_exponent = -2985;
+            static constexpr int floor_log10_pow2_minus_log10_4_over_3_max_exponent = 2936;
+            constexpr int floor_log10_pow2_minus_log10_4_over_3(int e) noexcept {
+                using namespace log;
+                return compute<multiply(631305), subtract(261663), shift(21),
+                               min_exponent(floor_log10_pow2_minus_log10_4_over_3_min_exponent),
+                               max_exponent(floor_log10_pow2_minus_log10_4_over_3_max_exponent)>(e);
+            }
+
+            static constexpr int floor_log5_pow2_min_exponent = -1831;
+            static constexpr int floor_log5_pow2_max_exponent = 1831;
+            constexpr int floor_log5_pow2(int e) noexcept {
+                using namespace log;
+                return compute<multiply(225799), subtract(0), shift(19),
+                               min_exponent(floor_log5_pow2_min_exponent),
+                               max_exponent(floor_log5_pow2_max_exponent)>(e);
+            }
+
+            static constexpr int floor_log5_pow2_minus_log5_3_min_exponent = -3543;
+            static constexpr int floor_log5_pow2_minus_log5_3_max_exponent = 2427;
+            constexpr int floor_log5_pow2_minus_log5_3(int e) noexcept {
+                using namespace log;
+                return compute<multiply(451597), subtract(715764), shift(20),
+                               min_exponent(floor_log5_pow2_minus_log5_3_min_exponent),
+                               max_exponent(floor_log5_pow2_minus_log5_3_max_exponent)>(e);
+            }
+        }
+
+        ////////////////////////////////////////////////////////////////////////////////////////
+        // Utilities for fast divisibility tests.
+        ////////////////////////////////////////////////////////////////////////////////////////
+
+        namespace div {
+            // Replace n by floor(n / 10^N).
+            // Returns true if and only if n is divisible by 10^N.
+            // Precondition: n <= 10^(N+1)
+            // !!It takes an in-out parameter!!
+            template <int N>
+            struct divide_by_pow10_info;
+
+            template <>
+            struct divide_by_pow10_info<1> {
+                static constexpr std::uint32_t magic_number = 6554;
+                static constexpr int shift_amount = 16;
+            };
+
+            template <>
+            struct divide_by_pow10_info<2> {
+                static constexpr std::uint32_t magic_number = 656;
+                static constexpr int shift_amount = 16;
+            };
+
+            template <int N>
+            constexpr bool check_divisibility_and_divide_by_pow10(std::uint32_t& n) noexcept {
+                // Make sure the computation for max_n does not overflow.
+                static_assert(N + 1 <= log::floor_log10_pow2(31));
+                assert(n <= compute_power<N + 1>(std::uint32_t(10)));
+
+                using info = divide_by_pow10_info<N>;
+                n *= info::magic_number;
+
+                constexpr auto mask = std::uint32_t(std::uint32_t(1) << info::shift_amount) - 1;
+                bool result = ((n & mask) < info::magic_number);
+
+                n >>= info::shift_amount;
+                return result;
+            }
+
+            // Compute floor(n / 10^N) for small n and N.
+            // Precondition: n <= 10^(N+1)
+            template <int N>
+            constexpr std::uint32_t small_division_by_pow10(std::uint32_t n) noexcept {
+                // Make sure the computation for max_n does not overflow.
+                static_assert(N + 1 <= log::floor_log10_pow2(31));
+                assert(n <= compute_power<N + 1>(std::uint32_t(10)));
+
+                return (n * divide_by_pow10_info<N>::magic_number) >>
+                       divide_by_pow10_info<N>::shift_amount;
+            }
+
+            // Compute floor(n / 10^N) for small N.
+            // Precondition: n <= n_max
+            template <int N, class UInt, UInt n_max>
+            constexpr UInt divide_by_pow10(UInt n) noexcept {
+                static_assert(N >= 0);
+
+                // Specialize for 32-bit division by 100.
+                // Compiler is supposed to generate the identical code for just writing
+                // "n / 100", but for some reason MSVC generates an inefficient code
+                // (mul + mov for no apparent reason, instead of single imul),
+                // so we does this manually.
+                if constexpr (std::is_same_v<UInt, std::uint32_t> && N == 2) {
+                    return std::uint32_t(wuint::umul64(n, std::uint32_t(1374389535)) >> 37);
+                }
+                // Specialize for 64-bit division by 1000.
+                // Ensure that the correctness condition is met.
+                if constexpr (std::is_same_v<UInt, std::uint64_t> && N == 3 &&
+                              n_max <= std::uint64_t(15534100272597517998ull)) {
+                    return wuint::umul128_upper64(n, std::uint64_t(2361183241434822607ull)) >> 7;
+                }
+                else {
+                    constexpr auto divisor = compute_power<N>(UInt(10));
+                    return n / divisor;
+                }
+            }
+        }
+    }
+
+    ////////////////////////////////////////////////////////////////////////////////////////
+    // Return types for the main interface function.
+    ////////////////////////////////////////////////////////////////////////////////////////
+
+    template <class UInt, bool is_signed, bool trailing_zero_flag>
+    struct decimal_fp;
+
+    template <class UInt>
+    struct decimal_fp<UInt, false, false> {
+        using carrier_uint = UInt;
+
+        carrier_uint significand;
+        int exponent;
+    };
+
+    template <class UInt>
+    struct decimal_fp<UInt, true, false> {
+        using carrier_uint = UInt;
+
+        carrier_uint significand;
+        int exponent;
+        bool is_negative;
+    };
+
+    template <class UInt>
+    struct decimal_fp<UInt, false, true> {
+        using carrier_uint = UInt;
+
+        carrier_uint significand;
+        int exponent;
+        bool may_have_trailing_zeros;
+    };
+
+    template <class UInt>
+    struct decimal_fp<UInt, true, true> {
+        using carrier_uint = UInt;
+
+        carrier_uint significand;
+        int exponent;
+        bool is_negative;
+        bool may_have_trailing_zeros;
+    };
+
+    template <class UInt>
+    using unsigned_decimal_fp = decimal_fp<UInt, false, false>;
+
+    template <class UInt>
+    using signed_decimal_fp = decimal_fp<UInt, true, false>;
+
+
+    ////////////////////////////////////////////////////////////////////////////////////////
+    // Computed cache entries.
+    ////////////////////////////////////////////////////////////////////////////////////////
+
+    namespace detail {
+        template <class FloatFormat>
+        struct cache_holder;
+
+        template <>
+        struct cache_holder<ieee754_binary32> {
+            using cache_entry_type = std::uint64_t;
+            static constexpr int cache_bits = 64;
+            static constexpr int min_k = -31;
+            static constexpr int max_k = 46;
+            static constexpr cache_entry_type cache[] = {
+                0x81ceb32c4b43fcf5, 0xa2425ff75e14fc32, 0xcad2f7f5359a3b3f, 0xfd87b5f28300ca0e,
+                0x9e74d1b791e07e49, 0xc612062576589ddb, 0xf79687aed3eec552, 0x9abe14cd44753b53,
+                0xc16d9a0095928a28, 0xf1c90080baf72cb2, 0x971da05074da7bef, 0xbce5086492111aeb,
+                0xec1e4a7db69561a6, 0x9392ee8e921d5d08, 0xb877aa3236a4b44a, 0xe69594bec44de15c,
+                0x901d7cf73ab0acda, 0xb424dc35095cd810, 0xe12e13424bb40e14, 0x8cbccc096f5088cc,
+                0xafebff0bcb24aaff, 0xdbe6fecebdedd5bf, 0x89705f4136b4a598, 0xabcc77118461cefd,
+                0xd6bf94d5e57a42bd, 0x8637bd05af6c69b6, 0xa7c5ac471b478424, 0xd1b71758e219652c,
+                0x83126e978d4fdf3c, 0xa3d70a3d70a3d70b, 0xcccccccccccccccd, 0x8000000000000000,
+                0xa000000000000000, 0xc800000000000000, 0xfa00000000000000, 0x9c40000000000000,
+                0xc350000000000000, 0xf424000000000000, 0x9896800000000000, 0xbebc200000000000,
+                0xee6b280000000000, 0x9502f90000000000, 0xba43b74000000000, 0xe8d4a51000000000,
+                0x9184e72a00000000, 0xb5e620f480000000, 0xe35fa931a0000000, 0x8e1bc9bf04000000,
+                0xb1a2bc2ec5000000, 0xde0b6b3a76400000, 0x8ac7230489e80000, 0xad78ebc5ac620000,
+                0xd8d726b7177a8000, 0x878678326eac9000, 0xa968163f0a57b400, 0xd3c21bcecceda100,
+                0x84595161401484a0, 0xa56fa5b99019a5c8, 0xcecb8f27f4200f3a, 0x813f3978f8940985,
+                0xa18f07d736b90be6, 0xc9f2c9cd04674edf, 0xfc6f7c4045812297, 0x9dc5ada82b70b59e,
+                0xc5371912364ce306, 0xf684df56c3e01bc7, 0x9a130b963a6c115d, 0xc097ce7bc90715b4,
+                0xf0bdc21abb48db21, 0x96769950b50d88f5, 0xbc143fa4e250eb32, 0xeb194f8e1ae525fe,
+                0x92efd1b8d0cf37bf, 0xb7abc627050305ae, 0xe596b7b0c643c71a, 0x8f7e32ce7bea5c70,
+                0xb35dbf821ae4f38c, 0xe0352f62a19e306f};
+        };
+
+        template <>
+        struct cache_holder<ieee754_binary64> {
+            using cache_entry_type = wuint::uint128;
+            static constexpr int cache_bits = 128;
+            static constexpr int min_k = -292;
+            static constexpr int max_k = 326;
+            static constexpr cache_entry_type cache[] = {
+                {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b}, {0x9faacf3df73609b1, 0x77b191618c54e9ad},
+                {0xc795830d75038c1d, 0xd59df5b9ef6a2418}, {0xf97ae3d0d2446f25, 0x4b0573286b44ad1e},
+                {0x9becce62836ac577, 0x4ee367f9430aec33}, {0xc2e801fb244576d5, 0x229c41f793cda740},
+                {0xf3a20279ed56d48a, 0x6b43527578c11110}, {0x9845418c345644d6, 0x830a13896b78aaaa},
+                {0xbe5691ef416bd60c, 0x23cc986bc656d554}, {0xedec366b11c6cb8f, 0x2cbfbe86b7ec8aa9},
+                {0x94b3a202eb1c3f39, 0x7bf7d71432f3d6aa}, {0xb9e08a83a5e34f07, 0xdaf5ccd93fb0cc54},
+                {0xe858ad248f5c22c9, 0xd1b3400f8f9cff69}, {0x91376c36d99995be, 0x23100809b9c21fa2},
+                {0xb58547448ffffb2d, 0xabd40a0c2832a78b}, {0xe2e69915b3fff9f9, 0x16c90c8f323f516d},
+                {0x8dd01fad907ffc3b, 0xae3da7d97f6792e4}, {0xb1442798f49ffb4a, 0x99cd11cfdf41779d},
+                {0xdd95317f31c7fa1d, 0x40405643d711d584}, {0x8a7d3eef7f1cfc52, 0x482835ea666b2573},
+                {0xad1c8eab5ee43b66, 0xda3243650005eed0}, {0xd863b256369d4a40, 0x90bed43e40076a83},
+                {0x873e4f75e2224e68, 0x5a7744a6e804a292}, {0xa90de3535aaae202, 0x711515d0a205cb37},
+                {0xd3515c2831559a83, 0x0d5a5b44ca873e04}, {0x8412d9991ed58091, 0xe858790afe9486c3},
+                {0xa5178fff668ae0b6, 0x626e974dbe39a873}, {0xce5d73ff402d98e3, 0xfb0a3d212dc81290},
+                {0x80fa687f881c7f8e, 0x7ce66634bc9d0b9a}, {0xa139029f6a239f72, 0x1c1fffc1ebc44e81},
+                {0xc987434744ac874e, 0xa327ffb266b56221}, {0xfbe9141915d7a922, 0x4bf1ff9f0062baa9},
+                {0x9d71ac8fada6c9b5, 0x6f773fc3603db4aa}, {0xc4ce17b399107c22, 0xcb550fb4384d21d4},
+                {0xf6019da07f549b2b, 0x7e2a53a146606a49}, {0x99c102844f94e0fb, 0x2eda7444cbfc426e},
+                {0xc0314325637a1939, 0xfa911155fefb5309}, {0xf03d93eebc589f88, 0x793555ab7eba27cb},
+                {0x96267c7535b763b5, 0x4bc1558b2f3458df}, {0xbbb01b9283253ca2, 0x9eb1aaedfb016f17},
+                {0xea9c227723ee8bcb, 0x465e15a979c1cadd}, {0x92a1958a7675175f, 0x0bfacd89ec191eca},
+                {0xb749faed14125d36, 0xcef980ec671f667c}, {0xe51c79a85916f484, 0x82b7e12780e7401b},
+                {0x8f31cc0937ae58d2, 0xd1b2ecb8b0908811}, {0xb2fe3f0b8599ef07, 0x861fa7e6dcb4aa16},
+                {0xdfbdcece67006ac9, 0x67a791e093e1d49b}, {0x8bd6a141006042bd, 0xe0c8bb2c5c6d24e1},
+                {0xaecc49914078536d, 0x58fae9f773886e19}, {0xda7f5bf590966848, 0xaf39a475506a899f},
+                {0x888f99797a5e012d, 0x6d8406c952429604}, {0xaab37fd7d8f58178, 0xc8e5087ba6d33b84},
+                {0xd5605fcdcf32e1d6, 0xfb1e4a9a90880a65}, {0x855c3be0a17fcd26, 0x5cf2eea09a550680},
+                {0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481f}, {0xd0601d8efc57b08b, 0xf13b94daf124da27},
+                {0x823c12795db6ce57, 0x76c53d08d6b70859}, {0xa2cb1717b52481ed, 0x54768c4b0c64ca6f},
+                {0xcb7ddcdda26da268, 0xa9942f5dcf7dfd0a}, {0xfe5d54150b090b02, 0xd3f93b35435d7c4d},
+                {0x9efa548d26e5a6e1, 0xc47bc5014a1a6db0}, {0xc6b8e9b0709f109a, 0x359ab6419ca1091c},
+                {0xf867241c8cc6d4c0, 0xc30163d203c94b63}, {0x9b407691d7fc44f8, 0x79e0de63425dcf1e},
+                {0xc21094364dfb5636, 0x985915fc12f542e5}, {0xf294b943e17a2bc4, 0x3e6f5b7b17b2939e},
+                {0x979cf3ca6cec5b5a, 0xa705992ceecf9c43}, {0xbd8430bd08277231, 0x50c6ff782a838354},
+                {0xece53cec4a314ebd, 0xa4f8bf5635246429}, {0x940f4613ae5ed136, 0x871b7795e136be9a},
+                {0xb913179899f68584, 0x28e2557b59846e40}, {0xe757dd7ec07426e5, 0x331aeada2fe589d0},
+                {0x9096ea6f3848984f, 0x3ff0d2c85def7622}, {0xb4bca50b065abe63, 0x0fed077a756b53aa},
+                {0xe1ebce4dc7f16dfb, 0xd3e8495912c62895}, {0x8d3360f09cf6e4bd, 0x64712dd7abbbd95d},
+                {0xb080392cc4349dec, 0xbd8d794d96aacfb4}, {0xdca04777f541c567, 0xecf0d7a0fc5583a1},
+                {0x89e42caaf9491b60, 0xf41686c49db57245}, {0xac5d37d5b79b6239, 0x311c2875c522ced6},
+                {0xd77485cb25823ac7, 0x7d633293366b828c}, {0x86a8d39ef77164bc, 0xae5dff9c02033198},
+                {0xa8530886b54dbdeb, 0xd9f57f830283fdfd}, {0xd267caa862a12d66, 0xd072df63c324fd7c},
+                {0x8380dea93da4bc60, 0x4247cb9e59f71e6e}, {0xa46116538d0deb78, 0x52d9be85f074e609},
+                {0xcd795be870516656, 0x67902e276c921f8c}, {0x806bd9714632dff6, 0x00ba1cd8a3db53b7},
+                {0xa086cfcd97bf97f3, 0x80e8a40eccd228a5}, {0xc8a883c0fdaf7df0, 0x6122cd128006b2ce},
+                {0xfad2a4b13d1b5d6c, 0x796b805720085f82}, {0x9cc3a6eec6311a63, 0xcbe3303674053bb1},
+                {0xc3f490aa77bd60fc, 0xbedbfc4411068a9d}, {0xf4f1b4d515acb93b, 0xee92fb5515482d45},
+                {0x991711052d8bf3c5, 0x751bdd152d4d1c4b}, {0xbf5cd54678eef0b6, 0xd262d45a78a0635e},
+                {0xef340a98172aace4, 0x86fb897116c87c35}, {0x9580869f0e7aac0e, 0xd45d35e6ae3d4da1},
+                {0xbae0a846d2195712, 0x8974836059cca10a}, {0xe998d258869facd7, 0x2bd1a438703fc94c},
+                {0x91ff83775423cc06, 0x7b6306a34627ddd0}, {0xb67f6455292cbf08, 0x1a3bc84c17b1d543},
+                {0xe41f3d6a7377eeca, 0x20caba5f1d9e4a94}, {0x8e938662882af53e, 0x547eb47b7282ee9d},
+                {0xb23867fb2a35b28d, 0xe99e619a4f23aa44}, {0xdec681f9f4c31f31, 0x6405fa00e2ec94d5},
+                {0x8b3c113c38f9f37e, 0xde83bc408dd3dd05}, {0xae0b158b4738705e, 0x9624ab50b148d446},
+                {0xd98ddaee19068c76, 0x3badd624dd9b0958}, {0x87f8a8d4cfa417c9, 0xe54ca5d70a80e5d7},
+                {0xa9f6d30a038d1dbc, 0x5e9fcf4ccd211f4d}, {0xd47487cc8470652b, 0x7647c32000696720},
+                {0x84c8d4dfd2c63f3b, 0x29ecd9f40041e074}, {0xa5fb0a17c777cf09, 0xf468107100525891},
+                {0xcf79cc9db955c2cc, 0x7182148d4066eeb5}, {0x81ac1fe293d599bf, 0xc6f14cd848405531},
+                {0xa21727db38cb002f, 0xb8ada00e5a506a7d}, {0xca9cf1d206fdc03b, 0xa6d90811f0e4851d},
+                {0xfd442e4688bd304a, 0x908f4a166d1da664}, {0x9e4a9cec15763e2e, 0x9a598e4e043287ff},
+                {0xc5dd44271ad3cdba, 0x40eff1e1853f29fe}, {0xf7549530e188c128, 0xd12bee59e68ef47d},
+                {0x9a94dd3e8cf578b9, 0x82bb74f8301958cf}, {0xc13a148e3032d6e7, 0xe36a52363c1faf02},
+                {0xf18899b1bc3f8ca1, 0xdc44e6c3cb279ac2}, {0x96f5600f15a7b7e5, 0x29ab103a5ef8c0ba},
+                {0xbcb2b812db11a5de, 0x7415d448f6b6f0e8}, {0xebdf661791d60f56, 0x111b495b3464ad22},
+                {0x936b9fcebb25c995, 0xcab10dd900beec35}, {0xb84687c269ef3bfb, 0x3d5d514f40eea743},
+                {0xe65829b3046b0afa, 0x0cb4a5a3112a5113}, {0x8ff71a0fe2c2e6dc, 0x47f0e785eaba72ac},
+                {0xb3f4e093db73a093, 0x59ed216765690f57}, {0xe0f218b8d25088b8, 0x306869c13ec3532d},
+                {0x8c974f7383725573, 0x1e414218c73a13fc}, {0xafbd2350644eeacf, 0xe5d1929ef90898fb},
+                {0xdbac6c247d62a583, 0xdf45f746b74abf3a}, {0x894bc396ce5da772, 0x6b8bba8c328eb784},
+                {0xab9eb47c81f5114f, 0x066ea92f3f326565}, {0xd686619ba27255a2, 0xc80a537b0efefebe},
+                {0x8613fd0145877585, 0xbd06742ce95f5f37}, {0xa798fc4196e952e7, 0x2c48113823b73705},
+                {0xd17f3b51fca3a7a0, 0xf75a15862ca504c6}, {0x82ef85133de648c4, 0x9a984d73dbe722fc},
+                {0xa3ab66580d5fdaf5, 0xc13e60d0d2e0ebbb}, {0xcc963fee10b7d1b3, 0x318df905079926a9},
+                {0xffbbcfe994e5c61f, 0xfdf17746497f7053}, {0x9fd561f1fd0f9bd3, 0xfeb6ea8bedefa634},
+                {0xc7caba6e7c5382c8, 0xfe64a52ee96b8fc1}, {0xf9bd690a1b68637b, 0x3dfdce7aa3c673b1},
+                {0x9c1661a651213e2d, 0x06bea10ca65c084f}, {0xc31bfa0fe5698db8, 0x486e494fcff30a63},
+                {0xf3e2f893dec3f126, 0x5a89dba3c3efccfb}, {0x986ddb5c6b3a76b7, 0xf89629465a75e01d},
+                {0xbe89523386091465, 0xf6bbb397f1135824}, {0xee2ba6c0678b597f, 0x746aa07ded582e2d},
+                {0x94db483840b717ef, 0xa8c2a44eb4571cdd}, {0xba121a4650e4ddeb, 0x92f34d62616ce414},
+                {0xe896a0d7e51e1566, 0x77b020baf9c81d18}, {0x915e2486ef32cd60, 0x0ace1474dc1d122f},
+                {0xb5b5ada8aaff80b8, 0x0d819992132456bb}, {0xe3231912d5bf60e6, 0x10e1fff697ed6c6a},
+                {0x8df5efabc5979c8f, 0xca8d3ffa1ef463c2}, {0xb1736b96b6fd83b3, 0xbd308ff8a6b17cb3},
+                {0xddd0467c64bce4a0, 0xac7cb3f6d05ddbdf}, {0x8aa22c0dbef60ee4, 0x6bcdf07a423aa96c},
+                {0xad4ab7112eb3929d, 0x86c16c98d2c953c7}, {0xd89d64d57a607744, 0xe871c7bf077ba8b8},
+                {0x87625f056c7c4a8b, 0x11471cd764ad4973}, {0xa93af6c6c79b5d2d, 0xd598e40d3dd89bd0},
+                {0xd389b47879823479, 0x4aff1d108d4ec2c4}, {0x843610cb4bf160cb, 0xcedf722a585139bb},
+                {0xa54394fe1eedb8fe, 0xc2974eb4ee658829}, {0xce947a3da6a9273e, 0x733d226229feea33},
+                {0x811ccc668829b887, 0x0806357d5a3f5260}, {0xa163ff802a3426a8, 0xca07c2dcb0cf26f8},
+                {0xc9bcff6034c13052, 0xfc89b393dd02f0b6}, {0xfc2c3f3841f17c67, 0xbbac2078d443ace3},
+                {0x9d9ba7832936edc0, 0xd54b944b84aa4c0e}, {0xc5029163f384a931, 0x0a9e795e65d4df12},
+                {0xf64335bcf065d37d, 0x4d4617b5ff4a16d6}, {0x99ea0196163fa42e, 0x504bced1bf8e4e46},
+                {0xc06481fb9bcf8d39, 0xe45ec2862f71e1d7}, {0xf07da27a82c37088, 0x5d767327bb4e5a4d},
+                {0x964e858c91ba2655, 0x3a6a07f8d510f870}, {0xbbe226efb628afea, 0x890489f70a55368c},
+                {0xeadab0aba3b2dbe5, 0x2b45ac74ccea842f}, {0x92c8ae6b464fc96f, 0x3b0b8bc90012929e},
+                {0xb77ada0617e3bbcb, 0x09ce6ebb40173745}, {0xe55990879ddcaabd, 0xcc420a6a101d0516},
+                {0x8f57fa54c2a9eab6, 0x9fa946824a12232e}, {0xb32df8e9f3546564, 0x47939822dc96abfa},
+                {0xdff9772470297ebd, 0x59787e2b93bc56f8}, {0x8bfbea76c619ef36, 0x57eb4edb3c55b65b},
+                {0xaefae51477a06b03, 0xede622920b6b23f2}, {0xdab99e59958885c4, 0xe95fab368e45ecee},
+                {0x88b402f7fd75539b, 0x11dbcb0218ebb415}, {0xaae103b5fcd2a881, 0xd652bdc29f26a11a},
+                {0xd59944a37c0752a2, 0x4be76d3346f04960}, {0x857fcae62d8493a5, 0x6f70a4400c562ddc},
+                {0xa6dfbd9fb8e5b88e, 0xcb4ccd500f6bb953}, {0xd097ad07a71f26b2, 0x7e2000a41346a7a8},
+                {0x825ecc24c873782f, 0x8ed400668c0c28c9}, {0xa2f67f2dfa90563b, 0x728900802f0f32fb},
+                {0xcbb41ef979346bca, 0x4f2b40a03ad2ffba}, {0xfea126b7d78186bc, 0xe2f610c84987bfa9},
+                {0x9f24b832e6b0f436, 0x0dd9ca7d2df4d7ca}, {0xc6ede63fa05d3143, 0x91503d1c79720dbc},
+                {0xf8a95fcf88747d94, 0x75a44c6397ce912b}, {0x9b69dbe1b548ce7c, 0xc986afbe3ee11abb},
+                {0xc24452da229b021b, 0xfbe85badce996169}, {0xf2d56790ab41c2a2, 0xfae27299423fb9c4},
+                {0x97c560ba6b0919a5, 0xdccd879fc967d41b}, {0xbdb6b8e905cb600f, 0x5400e987bbc1c921},
+                {0xed246723473e3813, 0x290123e9aab23b69}, {0x9436c0760c86e30b, 0xf9a0b6720aaf6522},
+                {0xb94470938fa89bce, 0xf808e40e8d5b3e6a}, {0xe7958cb87392c2c2, 0xb60b1d1230b20e05},
+                {0x90bd77f3483bb9b9, 0xb1c6f22b5e6f48c3}, {0xb4ecd5f01a4aa828, 0x1e38aeb6360b1af4},
+                {0xe2280b6c20dd5232, 0x25c6da63c38de1b1}, {0x8d590723948a535f, 0x579c487e5a38ad0f},
+                {0xb0af48ec79ace837, 0x2d835a9df0c6d852}, {0xdcdb1b2798182244, 0xf8e431456cf88e66},
+                {0x8a08f0f8bf0f156b, 0x1b8e9ecb641b5900}, {0xac8b2d36eed2dac5, 0xe272467e3d222f40},
+                {0xd7adf884aa879177, 0x5b0ed81dcc6abb10}, {0x86ccbb52ea94baea, 0x98e947129fc2b4ea},
+                {0xa87fea27a539e9a5, 0x3f2398d747b36225}, {0xd29fe4b18e88640e, 0x8eec7f0d19a03aae},
+                {0x83a3eeeef9153e89, 0x1953cf68300424ad}, {0xa48ceaaab75a8e2b, 0x5fa8c3423c052dd8},
+                {0xcdb02555653131b6, 0x3792f412cb06794e}, {0x808e17555f3ebf11, 0xe2bbd88bbee40bd1},
+                {0xa0b19d2ab70e6ed6, 0x5b6aceaeae9d0ec5}, {0xc8de047564d20a8b, 0xf245825a5a445276},
+                {0xfb158592be068d2e, 0xeed6e2f0f0d56713}, {0x9ced737bb6c4183d, 0x55464dd69685606c},
+                {0xc428d05aa4751e4c, 0xaa97e14c3c26b887}, {0xf53304714d9265df, 0xd53dd99f4b3066a9},
+                {0x993fe2c6d07b7fab, 0xe546a8038efe402a}, {0xbf8fdb78849a5f96, 0xde98520472bdd034},
+                {0xef73d256a5c0f77c, 0x963e66858f6d4441}, {0x95a8637627989aad, 0xdde7001379a44aa9},
+                {0xbb127c53b17ec159, 0x5560c018580d5d53}, {0xe9d71b689dde71af, 0xaab8f01e6e10b4a7},
+                {0x9226712162ab070d, 0xcab3961304ca70e9}, {0xb6b00d69bb55c8d1, 0x3d607b97c5fd0d23},
+                {0xe45c10c42a2b3b05, 0x8cb89a7db77c506b}, {0x8eb98a7a9a5b04e3, 0x77f3608e92adb243},
+                {0xb267ed1940f1c61c, 0x55f038b237591ed4}, {0xdf01e85f912e37a3, 0x6b6c46dec52f6689},
+                {0x8b61313bbabce2c6, 0x2323ac4b3b3da016}, {0xae397d8aa96c1b77, 0xabec975e0a0d081b},
+                {0xd9c7dced53c72255, 0x96e7bd358c904a22}, {0x881cea14545c7575, 0x7e50d64177da2e55},
+                {0xaa242499697392d2, 0xdde50bd1d5d0b9ea}, {0xd4ad2dbfc3d07787, 0x955e4ec64b44e865},
+                {0x84ec3c97da624ab4, 0xbd5af13bef0b113f}, {0xa6274bbdd0fadd61, 0xecb1ad8aeacdd58f},
+                {0xcfb11ead453994ba, 0x67de18eda5814af3}, {0x81ceb32c4b43fcf4, 0x80eacf948770ced8},
+                {0xa2425ff75e14fc31, 0xa1258379a94d028e}, {0xcad2f7f5359a3b3e, 0x096ee45813a04331},
+                {0xfd87b5f28300ca0d, 0x8bca9d6e188853fd}, {0x9e74d1b791e07e48, 0x775ea264cf55347e},
+                {0xc612062576589dda, 0x95364afe032a819e}, {0xf79687aed3eec551, 0x3a83ddbd83f52205},
+                {0x9abe14cd44753b52, 0xc4926a9672793543}, {0xc16d9a0095928a27, 0x75b7053c0f178294},
+                {0xf1c90080baf72cb1, 0x5324c68b12dd6339}, {0x971da05074da7bee, 0xd3f6fc16ebca5e04},
+                {0xbce5086492111aea, 0x88f4bb1ca6bcf585}, {0xec1e4a7db69561a5, 0x2b31e9e3d06c32e6},
+                {0x9392ee8e921d5d07, 0x3aff322e62439fd0}, {0xb877aa3236a4b449, 0x09befeb9fad487c3},
+                {0xe69594bec44de15b, 0x4c2ebe687989a9b4}, {0x901d7cf73ab0acd9, 0x0f9d37014bf60a11},
+                {0xb424dc35095cd80f, 0x538484c19ef38c95}, {0xe12e13424bb40e13, 0x2865a5f206b06fba},
+                {0x8cbccc096f5088cb, 0xf93f87b7442e45d4}, {0xafebff0bcb24aafe, 0xf78f69a51539d749},
+                {0xdbe6fecebdedd5be, 0xb573440e5a884d1c}, {0x89705f4136b4a597, 0x31680a88f8953031},
+                {0xabcc77118461cefc, 0xfdc20d2b36ba7c3e}, {0xd6bf94d5e57a42bc, 0x3d32907604691b4d},
+                {0x8637bd05af6c69b5, 0xa63f9a49c2c1b110}, {0xa7c5ac471b478423, 0x0fcf80dc33721d54},
+                {0xd1b71758e219652b, 0xd3c36113404ea4a9}, {0x83126e978d4fdf3b, 0x645a1cac083126ea},
+                {0xa3d70a3d70a3d70a, 0x3d70a3d70a3d70a4}, {0xcccccccccccccccc, 0xcccccccccccccccd},
+                {0x8000000000000000, 0x0000000000000000}, {0xa000000000000000, 0x0000000000000000},
+                {0xc800000000000000, 0x0000000000000000}, {0xfa00000000000000, 0x0000000000000000},
+                {0x9c40000000000000, 0x0000000000000000}, {0xc350000000000000, 0x0000000000000000},
+                {0xf424000000000000, 0x0000000000000000}, {0x9896800000000000, 0x0000000000000000},
+                {0xbebc200000000000, 0x0000000000000000}, {0xee6b280000000000, 0x0000000000000000},
+                {0x9502f90000000000, 0x0000000000000000}, {0xba43b74000000000, 0x0000000000000000},
+                {0xe8d4a51000000000, 0x0000000000000000}, {0x9184e72a00000000, 0x0000000000000000},
+                {0xb5e620f480000000, 0x0000000000000000}, {0xe35fa931a0000000, 0x0000000000000000},
+                {0x8e1bc9bf04000000, 0x0000000000000000}, {0xb1a2bc2ec5000000, 0x0000000000000000},
+                {0xde0b6b3a76400000, 0x0000000000000000}, {0x8ac7230489e80000, 0x0000000000000000},
+                {0xad78ebc5ac620000, 0x0000000000000000}, {0xd8d726b7177a8000, 0x0000000000000000},
+                {0x878678326eac9000, 0x0000000000000000}, {0xa968163f0a57b400, 0x0000000000000000},
+                {0xd3c21bcecceda100, 0x0000000000000000}, {0x84595161401484a0, 0x0000000000000000},
+                {0xa56fa5b99019a5c8, 0x0000000000000000}, {0xcecb8f27f4200f3a, 0x0000000000000000},
+                {0x813f3978f8940984, 0x4000000000000000}, {0xa18f07d736b90be5, 0x5000000000000000},
+                {0xc9f2c9cd04674ede, 0xa400000000000000}, {0xfc6f7c4045812296, 0x4d00000000000000},
+                {0x9dc5ada82b70b59d, 0xf020000000000000}, {0xc5371912364ce305, 0x6c28000000000000},
+                {0xf684df56c3e01bc6, 0xc732000000000000}, {0x9a130b963a6c115c, 0x3c7f400000000000},
+                {0xc097ce7bc90715b3, 0x4b9f100000000000}, {0xf0bdc21abb48db20, 0x1e86d40000000000},
+                {0x96769950b50d88f4, 0x1314448000000000}, {0xbc143fa4e250eb31, 0x17d955a000000000},
+                {0xeb194f8e1ae525fd, 0x5dcfab0800000000}, {0x92efd1b8d0cf37be, 0x5aa1cae500000000},
+                {0xb7abc627050305ad, 0xf14a3d9e40000000}, {0xe596b7b0c643c719, 0x6d9ccd05d0000000},
+                {0x8f7e32ce7bea5c6f, 0xe4820023a2000000}, {0xb35dbf821ae4f38b, 0xdda2802c8a800000},
+                {0xe0352f62a19e306e, 0xd50b2037ad200000}, {0x8c213d9da502de45, 0x4526f422cc340000},
+                {0xaf298d050e4395d6, 0x9670b12b7f410000}, {0xdaf3f04651d47b4c, 0x3c0cdd765f114000},
+                {0x88d8762bf324cd0f, 0xa5880a69fb6ac800}, {0xab0e93b6efee0053, 0x8eea0d047a457a00},
+                {0xd5d238a4abe98068, 0x72a4904598d6d880}, {0x85a36366eb71f041, 0x47a6da2b7f864750},
+                {0xa70c3c40a64e6c51, 0x999090b65f67d924}, {0xd0cf4b50cfe20765, 0xfff4b4e3f741cf6d},
+                {0x82818f1281ed449f, 0xbff8f10e7a8921a5}, {0xa321f2d7226895c7, 0xaff72d52192b6a0e},
+                {0xcbea6f8ceb02bb39, 0x9bf4f8a69f764491}, {0xfee50b7025c36a08, 0x02f236d04753d5b5},
+                {0x9f4f2726179a2245, 0x01d762422c946591}, {0xc722f0ef9d80aad6, 0x424d3ad2b7b97ef6},
+                {0xf8ebad2b84e0d58b, 0xd2e0898765a7deb3}, {0x9b934c3b330c8577, 0x63cc55f49f88eb30},
+                {0xc2781f49ffcfa6d5, 0x3cbf6b71c76b25fc}, {0xf316271c7fc3908a, 0x8bef464e3945ef7b},
+                {0x97edd871cfda3a56, 0x97758bf0e3cbb5ad}, {0xbde94e8e43d0c8ec, 0x3d52eeed1cbea318},
+                {0xed63a231d4c4fb27, 0x4ca7aaa863ee4bde}, {0x945e455f24fb1cf8, 0x8fe8caa93e74ef6b},
+                {0xb975d6b6ee39e436, 0xb3e2fd538e122b45}, {0xe7d34c64a9c85d44, 0x60dbbca87196b617},
+                {0x90e40fbeea1d3a4a, 0xbc8955e946fe31ce}, {0xb51d13aea4a488dd, 0x6babab6398bdbe42},
+                {0xe264589a4dcdab14, 0xc696963c7eed2dd2}, {0x8d7eb76070a08aec, 0xfc1e1de5cf543ca3},
+                {0xb0de65388cc8ada8, 0x3b25a55f43294bcc}, {0xdd15fe86affad912, 0x49ef0eb713f39ebf},
+                {0x8a2dbf142dfcc7ab, 0x6e3569326c784338}, {0xacb92ed9397bf996, 0x49c2c37f07965405},
+                {0xd7e77a8f87daf7fb, 0xdc33745ec97be907}, {0x86f0ac99b4e8dafd, 0x69a028bb3ded71a4},
+                {0xa8acd7c0222311bc, 0xc40832ea0d68ce0d}, {0xd2d80db02aabd62b, 0xf50a3fa490c30191},
+                {0x83c7088e1aab65db, 0x792667c6da79e0fb}, {0xa4b8cab1a1563f52, 0x577001b891185939},
+                {0xcde6fd5e09abcf26, 0xed4c0226b55e6f87}, {0x80b05e5ac60b6178, 0x544f8158315b05b5},
+                {0xa0dc75f1778e39d6, 0x696361ae3db1c722}, {0xc913936dd571c84c, 0x03bc3a19cd1e38ea},
+                {0xfb5878494ace3a5f, 0x04ab48a04065c724}, {0x9d174b2dcec0e47b, 0x62eb0d64283f9c77},
+                {0xc45d1df942711d9a, 0x3ba5d0bd324f8395}, {0xf5746577930d6500, 0xca8f44ec7ee3647a},
+                {0x9968bf6abbe85f20, 0x7e998b13cf4e1ecc}, {0xbfc2ef456ae276e8, 0x9e3fedd8c321a67f},
+                {0xefb3ab16c59b14a2, 0xc5cfe94ef3ea101f}, {0x95d04aee3b80ece5, 0xbba1f1d158724a13},
+                {0xbb445da9ca61281f, 0x2a8a6e45ae8edc98}, {0xea1575143cf97226, 0xf52d09d71a3293be},
+                {0x924d692ca61be758, 0x593c2626705f9c57}, {0xb6e0c377cfa2e12e, 0x6f8b2fb00c77836d},
+                {0xe498f455c38b997a, 0x0b6dfb9c0f956448}, {0x8edf98b59a373fec, 0x4724bd4189bd5ead},
+                {0xb2977ee300c50fe7, 0x58edec91ec2cb658}, {0xdf3d5e9bc0f653e1, 0x2f2967b66737e3ee},
+                {0x8b865b215899f46c, 0xbd79e0d20082ee75}, {0xae67f1e9aec07187, 0xecd8590680a3aa12},
+                {0xda01ee641a708de9, 0xe80e6f4820cc9496}, {0x884134fe908658b2, 0x3109058d147fdcde},
+                {0xaa51823e34a7eede, 0xbd4b46f0599fd416}, {0xd4e5e2cdc1d1ea96, 0x6c9e18ac7007c91b},
+                {0x850fadc09923329e, 0x03e2cf6bc604ddb1}, {0xa6539930bf6bff45, 0x84db8346b786151d},
+                {0xcfe87f7cef46ff16, 0xe612641865679a64}, {0x81f14fae158c5f6e, 0x4fcb7e8f3f60c07f},
+                {0xa26da3999aef7749, 0xe3be5e330f38f09e}, {0xcb090c8001ab551c, 0x5cadf5bfd3072cc6},
+                {0xfdcb4fa002162a63, 0x73d9732fc7c8f7f7}, {0x9e9f11c4014dda7e, 0x2867e7fddcdd9afb},
+                {0xc646d63501a1511d, 0xb281e1fd541501b9}, {0xf7d88bc24209a565, 0x1f225a7ca91a4227},
+                {0x9ae757596946075f, 0x3375788de9b06959}, {0xc1a12d2fc3978937, 0x0052d6b1641c83af},
+                {0xf209787bb47d6b84, 0xc0678c5dbd23a49b}, {0x9745eb4d50ce6332, 0xf840b7ba963646e1},
+                {0xbd176620a501fbff, 0xb650e5a93bc3d899}, {0xec5d3fa8ce427aff, 0xa3e51f138ab4cebf},
+                {0x93ba47c980e98cdf, 0xc66f336c36b10138}, {0xb8a8d9bbe123f017, 0xb80b0047445d4185},
+                {0xe6d3102ad96cec1d, 0xa60dc059157491e6}, {0x9043ea1ac7e41392, 0x87c89837ad68db30},
+                {0xb454e4a179dd1877, 0x29babe4598c311fc}, {0xe16a1dc9d8545e94, 0xf4296dd6fef3d67b},
+                {0x8ce2529e2734bb1d, 0x1899e4a65f58660d}, {0xb01ae745b101e9e4, 0x5ec05dcff72e7f90},
+                {0xdc21a1171d42645d, 0x76707543f4fa1f74}, {0x899504ae72497eba, 0x6a06494a791c53a9},
+                {0xabfa45da0edbde69, 0x0487db9d17636893}, {0xd6f8d7509292d603, 0x45a9d2845d3c42b7},
+                {0x865b86925b9bc5c2, 0x0b8a2392ba45a9b3}, {0xa7f26836f282b732, 0x8e6cac7768d7141f},
+                {0xd1ef0244af2364ff, 0x3207d795430cd927}, {0x8335616aed761f1f, 0x7f44e6bd49e807b9},
+                {0xa402b9c5a8d3a6e7, 0x5f16206c9c6209a7}, {0xcd036837130890a1, 0x36dba887c37a8c10},
+                {0x802221226be55a64, 0xc2494954da2c978a}, {0xa02aa96b06deb0fd, 0xf2db9baa10b7bd6d},
+                {0xc83553c5c8965d3d, 0x6f92829494e5acc8}, {0xfa42a8b73abbf48c, 0xcb772339ba1f17fa},
+                {0x9c69a97284b578d7, 0xff2a760414536efc}, {0xc38413cf25e2d70d, 0xfef5138519684abb},
+                {0xf46518c2ef5b8cd1, 0x7eb258665fc25d6a}, {0x98bf2f79d5993802, 0xef2f773ffbd97a62},
+                {0xbeeefb584aff8603, 0xaafb550ffacfd8fb}, {0xeeaaba2e5dbf6784, 0x95ba2a53f983cf39},
+                {0x952ab45cfa97a0b2, 0xdd945a747bf26184}, {0xba756174393d88df, 0x94f971119aeef9e5},
+                {0xe912b9d1478ceb17, 0x7a37cd5601aab85e}, {0x91abb422ccb812ee, 0xac62e055c10ab33b},
+                {0xb616a12b7fe617aa, 0x577b986b314d600a}, {0xe39c49765fdf9d94, 0xed5a7e85fda0b80c},
+                {0x8e41ade9fbebc27d, 0x14588f13be847308}, {0xb1d219647ae6b31c, 0x596eb2d8ae258fc9},
+                {0xde469fbd99a05fe3, 0x6fca5f8ed9aef3bc}, {0x8aec23d680043bee, 0x25de7bb9480d5855},
+                {0xada72ccc20054ae9, 0xaf561aa79a10ae6b}, {0xd910f7ff28069da4, 0x1b2ba1518094da05},
+                {0x87aa9aff79042286, 0x90fb44d2f05d0843}, {0xa99541bf57452b28, 0x353a1607ac744a54},
+                {0xd3fa922f2d1675f2, 0x42889b8997915ce9}, {0x847c9b5d7c2e09b7, 0x69956135febada12},
+                {0xa59bc234db398c25, 0x43fab9837e699096}, {0xcf02b2c21207ef2e, 0x94f967e45e03f4bc},
+                {0x8161afb94b44f57d, 0x1d1be0eebac278f6}, {0xa1ba1ba79e1632dc, 0x6462d92a69731733},
+                {0xca28a291859bbf93, 0x7d7b8f7503cfdcff}, {0xfcb2cb35e702af78, 0x5cda735244c3d43f},
+                {0x9defbf01b061adab, 0x3a0888136afa64a8}, {0xc56baec21c7a1916, 0x088aaa1845b8fdd1},
+                {0xf6c69a72a3989f5b, 0x8aad549e57273d46}, {0x9a3c2087a63f6399, 0x36ac54e2f678864c},
+                {0xc0cb28a98fcf3c7f, 0x84576a1bb416a7de}, {0xf0fdf2d3f3c30b9f, 0x656d44a2a11c51d6},
+                {0x969eb7c47859e743, 0x9f644ae5a4b1b326}, {0xbc4665b596706114, 0x873d5d9f0dde1fef},
+                {0xeb57ff22fc0c7959, 0xa90cb506d155a7eb}, {0x9316ff75dd87cbd8, 0x09a7f12442d588f3},
+                {0xb7dcbf5354e9bece, 0x0c11ed6d538aeb30}, {0xe5d3ef282a242e81, 0x8f1668c8a86da5fb},
+                {0x8fa475791a569d10, 0xf96e017d694487bd}, {0xb38d92d760ec4455, 0x37c981dcc395a9ad},
+                {0xe070f78d3927556a, 0x85bbe253f47b1418}, {0x8c469ab843b89562, 0x93956d7478ccec8f},
+                {0xaf58416654a6babb, 0x387ac8d1970027b3}, {0xdb2e51bfe9d0696a, 0x06997b05fcc0319f},
+                {0x88fcf317f22241e2, 0x441fece3bdf81f04}, {0xab3c2fddeeaad25a, 0xd527e81cad7626c4},
+                {0xd60b3bd56a5586f1, 0x8a71e223d8d3b075}, {0x85c7056562757456, 0xf6872d5667844e4a},
+                {0xa738c6bebb12d16c, 0xb428f8ac016561dc}, {0xd106f86e69d785c7, 0xe13336d701beba53},
+                {0x82a45b450226b39c, 0xecc0024661173474}, {0xa34d721642b06084, 0x27f002d7f95d0191},
+                {0xcc20ce9bd35c78a5, 0x31ec038df7b441f5}, {0xff290242c83396ce, 0x7e67047175a15272},
+                {0x9f79a169bd203e41, 0x0f0062c6e984d387}, {0xc75809c42c684dd1, 0x52c07b78a3e60869},
+                {0xf92e0c3537826145, 0xa7709a56ccdf8a83}, {0x9bbcc7a142b17ccb, 0x88a66076400bb692},
+                {0xc2abf989935ddbfe, 0x6acff893d00ea436}, {0xf356f7ebf83552fe, 0x0583f6b8c4124d44},
+                {0x98165af37b2153de, 0xc3727a337a8b704b}, {0xbe1bf1b059e9a8d6, 0x744f18c0592e4c5d},
+                {0xeda2ee1c7064130c, 0x1162def06f79df74}, {0x9485d4d1c63e8be7, 0x8addcb5645ac2ba9},
+                {0xb9a74a0637ce2ee1, 0x6d953e2bd7173693}, {0xe8111c87c5c1ba99, 0xc8fa8db6ccdd0438},
+                {0x910ab1d4db9914a0, 0x1d9c9892400a22a3}, {0xb54d5e4a127f59c8, 0x2503beb6d00cab4c},
+                {0xe2a0b5dc971f303a, 0x2e44ae64840fd61e}, {0x8da471a9de737e24, 0x5ceaecfed289e5d3},
+                {0xb10d8e1456105dad, 0x7425a83e872c5f48}, {0xdd50f1996b947518, 0xd12f124e28f7771a},
+                {0x8a5296ffe33cc92f, 0x82bd6b70d99aaa70}, {0xace73cbfdc0bfb7b, 0x636cc64d1001550c},
+                {0xd8210befd30efa5a, 0x3c47f7e05401aa4f}, {0x8714a775e3e95c78, 0x65acfaec34810a72},
+                {0xa8d9d1535ce3b396, 0x7f1839a741a14d0e}, {0xd31045a8341ca07c, 0x1ede48111209a051},
+                {0x83ea2b892091e44d, 0x934aed0aab460433}, {0xa4e4b66b68b65d60, 0xf81da84d56178540},
+                {0xce1de40642e3f4b9, 0x36251260ab9d668f}, {0x80d2ae83e9ce78f3, 0xc1d72b7c6b42601a},
+                {0xa1075a24e4421730, 0xb24cf65b8612f820}, {0xc94930ae1d529cfc, 0xdee033f26797b628},
+                {0xfb9b7cd9a4a7443c, 0x169840ef017da3b2}, {0x9d412e0806e88aa5, 0x8e1f289560ee864f},
+                {0xc491798a08a2ad4e, 0xf1a6f2bab92a27e3}, {0xf5b5d7ec8acb58a2, 0xae10af696774b1dc},
+                {0x9991a6f3d6bf1765, 0xacca6da1e0a8ef2a}, {0xbff610b0cc6edd3f, 0x17fd090a58d32af4},
+                {0xeff394dcff8a948e, 0xddfc4b4cef07f5b1}, {0x95f83d0a1fb69cd9, 0x4abdaf101564f98f},
+                {0xbb764c4ca7a4440f, 0x9d6d1ad41abe37f2}, {0xea53df5fd18d5513, 0x84c86189216dc5ee},
+                {0x92746b9be2f8552c, 0x32fd3cf5b4e49bb5}, {0xb7118682dbb66a77, 0x3fbc8c33221dc2a2},
+                {0xe4d5e82392a40515, 0x0fabaf3feaa5334b}, {0x8f05b1163ba6832d, 0x29cb4d87f2a7400f},
+                {0xb2c71d5bca9023f8, 0x743e20e9ef511013}, {0xdf78e4b2bd342cf6, 0x914da9246b255417},
+                {0x8bab8eefb6409c1a, 0x1ad089b6c2f7548f}, {0xae9672aba3d0c320, 0xa184ac2473b529b2},
+                {0xda3c0f568cc4f3e8, 0xc9e5d72d90a2741f}, {0x8865899617fb1871, 0x7e2fa67c7a658893},
+                {0xaa7eebfb9df9de8d, 0xddbb901b98feeab8}, {0xd51ea6fa85785631, 0x552a74227f3ea566},
+                {0x8533285c936b35de, 0xd53a88958f872760}, {0xa67ff273b8460356, 0x8a892abaf368f138},
+                {0xd01fef10a657842c, 0x2d2b7569b0432d86}, {0x8213f56a67f6b29b, 0x9c3b29620e29fc74},
+                {0xa298f2c501f45f42, 0x8349f3ba91b47b90}, {0xcb3f2f7642717713, 0x241c70a936219a74},
+                {0xfe0efb53d30dd4d7, 0xed238cd383aa0111}, {0x9ec95d1463e8a506, 0xf4363804324a40ab},
+                {0xc67bb4597ce2ce48, 0xb143c6053edcd0d6}, {0xf81aa16fdc1b81da, 0xdd94b7868e94050b},
+                {0x9b10a4e5e9913128, 0xca7cf2b4191c8327}, {0xc1d4ce1f63f57d72, 0xfd1c2f611f63a3f1},
+                {0xf24a01a73cf2dccf, 0xbc633b39673c8ced}, {0x976e41088617ca01, 0xd5be0503e085d814},
+                {0xbd49d14aa79dbc82, 0x4b2d8644d8a74e19}, {0xec9c459d51852ba2, 0xddf8e7d60ed1219f},
+                {0x93e1ab8252f33b45, 0xcabb90e5c942b504}, {0xb8da1662e7b00a17, 0x3d6a751f3b936244},
+                {0xe7109bfba19c0c9d, 0x0cc512670a783ad5}, {0x906a617d450187e2, 0x27fb2b80668b24c6},
+                {0xb484f9dc9641e9da, 0xb1f9f660802dedf7}, {0xe1a63853bbd26451, 0x5e7873f8a0396974},
+                {0x8d07e33455637eb2, 0xdb0b487b6423e1e9}, {0xb049dc016abc5e5f, 0x91ce1a9a3d2cda63},
+                {0xdc5c5301c56b75f7, 0x7641a140cc7810fc}, {0x89b9b3e11b6329ba, 0xa9e904c87fcb0a9e},
+                {0xac2820d9623bf429, 0x546345fa9fbdcd45}, {0xd732290fbacaf133, 0xa97c177947ad4096},
+                {0x867f59a9d4bed6c0, 0x49ed8eabcccc485e}, {0xa81f301449ee8c70, 0x5c68f256bfff5a75},
+                {0xd226fc195c6a2f8c, 0x73832eec6fff3112}, {0x83585d8fd9c25db7, 0xc831fd53c5ff7eac},
+                {0xa42e74f3d032f525, 0xba3e7ca8b77f5e56}, {0xcd3a1230c43fb26f, 0x28ce1bd2e55f35ec},
+                {0x80444b5e7aa7cf85, 0x7980d163cf5b81b4}, {0xa0555e361951c366, 0xd7e105bcc3326220},
+                {0xc86ab5c39fa63440, 0x8dd9472bf3fefaa8}, {0xfa856334878fc150, 0xb14f98f6f0feb952},
+                {0x9c935e00d4b9d8d2, 0x6ed1bf9a569f33d4}, {0xc3b8358109e84f07, 0x0a862f80ec4700c9},
+                {0xf4a642e14c6262c8, 0xcd27bb612758c0fb}, {0x98e7e9cccfbd7dbd, 0x8038d51cb897789d},
+                {0xbf21e44003acdd2c, 0xe0470a63e6bd56c4}, {0xeeea5d5004981478, 0x1858ccfce06cac75},
+                {0x95527a5202df0ccb, 0x0f37801e0c43ebc9}, {0xbaa718e68396cffd, 0xd30560258f54e6bb},
+                {0xe950df20247c83fd, 0x47c6b82ef32a206a}, {0x91d28b7416cdd27e, 0x4cdc331d57fa5442},
+                {0xb6472e511c81471d, 0xe0133fe4adf8e953}, {0xe3d8f9e563a198e5, 0x58180fddd97723a7},
+                {0x8e679c2f5e44ff8f, 0x570f09eaa7ea7649}, {0xb201833b35d63f73, 0x2cd2cc6551e513db},
+                {0xde81e40a034bcf4f, 0xf8077f7ea65e58d2}, {0x8b112e86420f6191, 0xfb04afaf27faf783},
+                {0xadd57a27d29339f6, 0x79c5db9af1f9b564}, {0xd94ad8b1c7380874, 0x18375281ae7822bd},
+                {0x87cec76f1c830548, 0x8f2293910d0b15b6}, {0xa9c2794ae3a3c69a, 0xb2eb3875504ddb23},
+                {0xd433179d9c8cb841, 0x5fa60692a46151ec}, {0x849feec281d7f328, 0xdbc7c41ba6bcd334},
+                {0xa5c7ea73224deff3, 0x12b9b522906c0801}, {0xcf39e50feae16bef, 0xd768226b34870a01},
+                {0x81842f29f2cce375, 0xe6a1158300d46641}, {0xa1e53af46f801c53, 0x60495ae3c1097fd1},
+                {0xca5e89b18b602368, 0x385bb19cb14bdfc5}, {0xfcf62c1dee382c42, 0x46729e03dd9ed7b6},
+                {0x9e19db92b4e31ba9, 0x6c07a2c26a8346d2}, {0xc5a05277621be293, 0xc7098b7305241886},
+                {0xf70867153aa2db38, 0xb8cbee4fc66d1ea8}};
+        };
+
+        // Compressed cache for double
+        struct compressed_cache_detail {
+            static constexpr int compression_ratio = 27;
+            static constexpr std::size_t compressed_table_size =
+                (cache_holder<ieee754_binary64>::max_k - cache_holder<ieee754_binary64>::min_k +
+                 compression_ratio) /
+                compression_ratio;
+
+            struct cache_holder_t {
+                wuint::uint128 table[compressed_table_size];
+            };
+            static constexpr cache_holder_t cache = [] {
+                cache_holder_t res{};
+                for (std::size_t i = 0; i < compressed_table_size; ++i) {
+                    res.table[i] = cache_holder<ieee754_binary64>::cache[i * compression_ratio];
+                }
+                return res;
+            }();
+
+            struct pow5_holder_t {
+                std::uint64_t table[compression_ratio];
+            };
+            static constexpr pow5_holder_t pow5 = [] {
+                pow5_holder_t res{};
+                std::uint64_t p = 1;
+                for (std::size_t i = 0; i < compression_ratio; ++i) {
+                    res.table[i] = p;
+                    p *= 5;
+                }
+                return res;
+            }();
+        };
+    }
+
+
+    ////////////////////////////////////////////////////////////////////////////////////////
+    // Policies.
+    ////////////////////////////////////////////////////////////////////////////////////////
+
+    namespace detail {
+        // Forward declare the implementation class.
+        template <class Float, class FloatTraits = default_float_traits<Float>>
+        struct impl;
+
+        namespace policy_impl {
+            // Sign policies.
+            namespace sign {
+                struct base {};
+
+                struct ignore : base {
+                    using sign_policy = ignore;
+                    static constexpr bool return_has_sign = false;
+
+                    template <class SignedSignificandBits, class ReturnType>
+                    static constexpr void handle_sign(SignedSignificandBits, ReturnType&) noexcept {
+                    }
+                };
+
+                struct return_sign : base {
+                    using sign_policy = return_sign;
+                    static constexpr bool return_has_sign = true;
+
+                    template <class SignedSignificandBits, class ReturnType>
+                    static constexpr void handle_sign(SignedSignificandBits s,
+                                                      ReturnType& r) noexcept {
+                        r.is_negative = s.is_negative();
+                    }
+                };
+            }
+
+            // Trailing zero policies.
+            namespace trailing_zero {
+                struct base {};
+
+                struct ignore : base {
+                    using trailing_zero_policy = ignore;
+                    static constexpr bool report_trailing_zeros = false;
+
+                    template <class Impl, class ReturnType>
+                    static constexpr void on_trailing_zeros(ReturnType&) noexcept {}
+
+                    template <class Impl, class ReturnType>
+                    static constexpr void no_trailing_zeros(ReturnType&) noexcept {}
+                };
+
+                struct remove : base {
+                    using trailing_zero_policy = remove;
+                    static constexpr bool report_trailing_zeros = false;
+
+                    template <class Impl, class ReturnType>
+                    JKJ_FORCEINLINE static constexpr void
+                    on_trailing_zeros(ReturnType& r) noexcept {
+                        r.exponent += Impl::remove_trailing_zeros(r.significand);
+                    }
+
+                    template <class Impl, class ReturnType>
+                    static constexpr void no_trailing_zeros(ReturnType&) noexcept {}
+                };
+
+                struct report : base {
+                    using trailing_zero_policy = report;
+                    static constexpr bool report_trailing_zeros = true;
+
+                    template <class Impl, class ReturnType>
+                    static constexpr void on_trailing_zeros(ReturnType& r) noexcept {
+                        r.may_have_trailing_zeros = true;
+                    }
+
+                    template <class Impl, class ReturnType>
+                    static constexpr void no_trailing_zeros(ReturnType& r) noexcept {
+                        r.may_have_trailing_zeros = false;
+                    }
+                };
+            }
+
+            // Decimal-to-binary rounding mode policies.
+            namespace decimal_to_binary_rounding {
+                struct base {};
+
+                enum class tag_t { to_nearest, left_closed_directed, right_closed_directed };
+                namespace interval_type {
+                    struct symmetric_boundary {
+                        static constexpr bool is_symmetric = true;
+                        bool is_closed;
+                        constexpr bool include_left_endpoint() const noexcept { return is_closed; }
+                        constexpr bool include_right_endpoint() const noexcept { return is_closed; }
+                    };
+                    struct asymmetric_boundary {
+                        static constexpr bool is_symmetric = false;
+                        bool is_left_closed;
+                        constexpr bool include_left_endpoint() const noexcept {
+                            return is_left_closed;
+                        }
+                        constexpr bool include_right_endpoint() const noexcept {
+                            return !is_left_closed;
+                        }
+                    };
+                    struct closed {
+                        static constexpr bool is_symmetric = true;
+                        static constexpr bool include_left_endpoint() noexcept { return true; }
+                        static constexpr bool include_right_endpoint() noexcept { return true; }
+                    };
+                    struct open {
+                        static constexpr bool is_symmetric = true;
+                        static constexpr bool include_left_endpoint() noexcept { return false; }
+                        static constexpr bool include_right_endpoint() noexcept { return false; }
+                    };
+                    struct left_closed_right_open {
+                        static constexpr bool is_symmetric = false;
+                        static constexpr bool include_left_endpoint() noexcept { return true; }
+                        static constexpr bool include_right_endpoint() noexcept { return false; }
+                    };
+                    struct right_closed_left_open {
+                        static constexpr bool is_symmetric = false;
+                        static constexpr bool include_left_endpoint() noexcept { return false; }
+                        static constexpr bool include_right_endpoint() noexcept { return true; }
+                    };
+                }
+
+                struct nearest_to_even : base {
+                    using decimal_to_binary_rounding_policy = nearest_to_even;
+                    static constexpr auto tag = tag_t::to_nearest;
+                    using normal_interval_type = interval_type::symmetric_boundary;
+                    using shorter_interval_type = interval_type::closed;
+
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits, Func&& f) noexcept {
+                        return f(nearest_to_even{});
+                    }
+
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static constexpr auto
+                    invoke_normal_interval_case(SignedSignificandBits s, Func&& f) noexcept {
+                        return f(s.has_even_significand_bits());
+                    }
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static constexpr auto
+                    invoke_shorter_interval_case(SignedSignificandBits, Func&& f) noexcept {
+                        return f();
+                    }
+                };
+                struct nearest_to_odd : base {
+                    using decimal_to_binary_rounding_policy = nearest_to_odd;
+                    static constexpr auto tag = tag_t::to_nearest;
+                    using normal_interval_type = interval_type::symmetric_boundary;
+                    using shorter_interval_type = interval_type::open;
+
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits, Func&& f) noexcept {
+                        return f(nearest_to_odd{});
+                    }
+
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static constexpr auto
+                    invoke_normal_interval_case(SignedSignificandBits s, Func&& f) noexcept {
+                        return f(!s.has_even_significand_bits());
+                    }
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static constexpr auto
+                    invoke_shorter_interval_case(SignedSignificandBits, Func&& f) noexcept {
+                        return f();
+                    }
+                };
+                struct nearest_toward_plus_infinity : base {
+                    using decimal_to_binary_rounding_policy = nearest_toward_plus_infinity;
+                    static constexpr auto tag = tag_t::to_nearest;
+                    using normal_interval_type = interval_type::asymmetric_boundary;
+                    using shorter_interval_type = interval_type::asymmetric_boundary;
+
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits, Func&& f) noexcept {
+                        return f(nearest_toward_plus_infinity{});
+                    }
+
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static constexpr auto
+                    invoke_normal_interval_case(SignedSignificandBits s, Func&& f) noexcept {
+                        return f(!s.is_negative());
+                    }
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static constexpr auto
+                    invoke_shorter_interval_case(SignedSignificandBits s, Func&& f) noexcept {
+                        return f(!s.is_negative());
+                    }
+                };
+                struct nearest_toward_minus_infinity : base {
+                    using decimal_to_binary_rounding_policy = nearest_toward_minus_infinity;
+                    static constexpr auto tag = tag_t::to_nearest;
+                    using normal_interval_type = interval_type::asymmetric_boundary;
+                    using shorter_interval_type = interval_type::asymmetric_boundary;
+
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits, Func&& f) noexcept {
+                        return f(nearest_toward_minus_infinity{});
+                    }
+
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static constexpr auto
+                    invoke_normal_interval_case(SignedSignificandBits s, Func&& f) noexcept {
+                        return f(s.is_negative());
+                    }
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static constexpr auto
+                    invoke_shorter_interval_case(SignedSignificandBits s, Func&& f) noexcept {
+                        return f(s.is_negative());
+                    }
+                };
+                struct nearest_toward_zero : base {
+                    using decimal_to_binary_rounding_policy = nearest_toward_zero;
+                    static constexpr auto tag = tag_t::to_nearest;
+                    using normal_interval_type = interval_type::right_closed_left_open;
+                    using shorter_interval_type = interval_type::right_closed_left_open;
+
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits, Func&& f) noexcept {
+                        return f(nearest_toward_zero{});
+                    }
+
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static constexpr auto
+                    invoke_normal_interval_case(SignedSignificandBits, Func&& f) noexcept {
+                        return f();
+                    }
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static constexpr auto
+                    invoke_shorter_interval_case(SignedSignificandBits, Func&& f) noexcept {
+                        return f();
+                    }
+                };
+                struct nearest_away_from_zero : base {
+                    using decimal_to_binary_rounding_policy = nearest_away_from_zero;
+                    static constexpr auto tag = tag_t::to_nearest;
+                    using normal_interval_type = interval_type::left_closed_right_open;
+                    using shorter_interval_type = interval_type::left_closed_right_open;
+
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits, Func&& f) noexcept {
+                        return f(nearest_away_from_zero{});
+                    }
+
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static constexpr auto
+                    invoke_normal_interval_case(SignedSignificandBits, Func&& f) noexcept {
+                        return f();
+                    }
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static constexpr auto
+                    invoke_shorter_interval_case(SignedSignificandBits, Func&& f) noexcept {
+                        return f();
+                    }
+                };
+
+                namespace detail {
+                    struct nearest_always_closed {
+                        static constexpr auto tag = tag_t::to_nearest;
+                        using normal_interval_type = interval_type::closed;
+                        using shorter_interval_type = interval_type::closed;
+
+                        template <class SignedSignificandBits, class Func>
+                        JKJ_FORCEINLINE static constexpr auto
+                        invoke_normal_interval_case(SignedSignificandBits, Func&& f) noexcept {
+                            return f();
+                        }
+                        template <class SignedSignificandBits, class Func>
+                        JKJ_FORCEINLINE static constexpr auto
+                        invoke_shorter_interval_case(SignedSignificandBits, Func&& f) noexcept {
+                            return f();
+                        }
+                    };
+                    struct nearest_always_open {
+                        static constexpr auto tag = tag_t::to_nearest;
+                        using normal_interval_type = interval_type::open;
+                        using shorter_interval_type = interval_type::open;
+
+                        template <class SignedSignificandBits, class Func>
+                        JKJ_FORCEINLINE static constexpr auto
+                        invoke_normal_interval_case(SignedSignificandBits, Func&& f) noexcept {
+                            return f();
+                        }
+                        template <class SignedSignificandBits, class Func>
+                        JKJ_FORCEINLINE static constexpr auto
+                        invoke_shorter_interval_case(SignedSignificandBits, Func&& f) noexcept {
+                            return f();
+                        }
+                    };
+                }
+
+                struct nearest_to_even_static_boundary : base {
+                    using decimal_to_binary_rounding_policy = nearest_to_even_static_boundary;
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits s,
+                                                         Func&& f) noexcept {
+                        if (s.has_even_significand_bits()) {
+                            return f(detail::nearest_always_closed{});
+                        }
+                        else {
+                            return f(detail::nearest_always_open{});
+                        }
+                    }
+                };
+                struct nearest_to_odd_static_boundary : base {
+                    using decimal_to_binary_rounding_policy = nearest_to_odd_static_boundary;
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits s,
+                                                         Func&& f) noexcept {
+                        if (s.has_even_significand_bits()) {
+                            return f(detail::nearest_always_open{});
+                        }
+                        else {
+                            return f(detail::nearest_always_closed{});
+                        }
+                    }
+                };
+                struct nearest_toward_plus_infinity_static_boundary : base {
+                    using decimal_to_binary_rounding_policy =
+                        nearest_toward_plus_infinity_static_boundary;
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits s,
+                                                         Func&& f) noexcept {
+                        if (s.is_negative()) {
+                            return f(nearest_toward_zero{});
+                        }
+                        else {
+                            return f(nearest_away_from_zero{});
+                        }
+                    }
+                };
+                struct nearest_toward_minus_infinity_static_boundary : base {
+                    using decimal_to_binary_rounding_policy =
+                        nearest_toward_minus_infinity_static_boundary;
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits s,
+                                                         Func&& f) noexcept {
+                        if (s.is_negative()) {
+                            return f(nearest_away_from_zero{});
+                        }
+                        else {
+                            return f(nearest_toward_zero{});
+                        }
+                    }
+                };
+
+                namespace detail {
+                    struct left_closed_directed {
+                        static constexpr auto tag = tag_t::left_closed_directed;
+                    };
+                    struct right_closed_directed {
+                        static constexpr auto tag = tag_t::right_closed_directed;
+                    };
+                }
+
+                struct toward_plus_infinity : base {
+                    using decimal_to_binary_rounding_policy = toward_plus_infinity;
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits s,
+                                                         Func&& f) noexcept {
+                        if (s.is_negative()) {
+                            return f(detail::left_closed_directed{});
+                        }
+                        else {
+                            return f(detail::right_closed_directed{});
+                        }
+                    }
+                };
+                struct toward_minus_infinity : base {
+                    using decimal_to_binary_rounding_policy = toward_minus_infinity;
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits s,
+                                                         Func&& f) noexcept {
+                        if (s.is_negative()) {
+                            return f(detail::right_closed_directed{});
+                        }
+                        else {
+                            return f(detail::left_closed_directed{});
+                        }
+                    }
+                };
+                struct toward_zero : base {
+                    using decimal_to_binary_rounding_policy = toward_zero;
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits, Func&& f) noexcept {
+                        return f(detail::left_closed_directed{});
+                    }
+                };
+                struct away_from_zero : base {
+                    using decimal_to_binary_rounding_policy = away_from_zero;
+                    template <class SignedSignificandBits, class Func>
+                    JKJ_FORCEINLINE static auto delegate(SignedSignificandBits, Func&& f) noexcept {
+                        return f(detail::right_closed_directed{});
+                    }
+                };
+            }
+
+            // Binary-to-decimal rounding policies.
+            // (Always assumes nearest rounding modes.)
+            namespace binary_to_decimal_rounding {
+                struct base {};
+
+                enum class tag_t { do_not_care, to_even, to_odd, away_from_zero, toward_zero };
+
+                struct do_not_care : base {
+                    using binary_to_decimal_rounding_policy = do_not_care;
+                    static constexpr auto tag = tag_t::do_not_care;
+
+                    template <class ReturnType>
+                    static constexpr bool prefer_round_down(ReturnType const&) noexcept {
+                        return false;
+                    }
+                };
+
+                struct to_even : base {
+                    using binary_to_decimal_rounding_policy = to_even;
+                    static constexpr auto tag = tag_t::to_even;
+
+                    template <class ReturnType>
+                    static constexpr bool prefer_round_down(ReturnType const& r) noexcept {
+                        return r.significand % 2 != 0;
+                    }
+                };
+
+                struct to_odd : base {
+                    using binary_to_decimal_rounding_policy = to_odd;
+                    static constexpr auto tag = tag_t::to_odd;
+
+                    template <class ReturnType>
+                    static constexpr bool prefer_round_down(ReturnType const& r) noexcept {
+                        return r.significand % 2 == 0;
+                    }
+                };
+
+                struct away_from_zero : base {
+                    using binary_to_decimal_rounding_policy = away_from_zero;
+                    static constexpr auto tag = tag_t::away_from_zero;
+
+                    template <class ReturnType>
+                    static constexpr bool prefer_round_down(ReturnType const&) noexcept {
+                        return false;
+                    }
+                };
+
+                struct toward_zero : base {
+                    using binary_to_decimal_rounding_policy = toward_zero;
+                    static constexpr auto tag = tag_t::toward_zero;
+
+                    template <class ReturnType>
+                    static constexpr bool prefer_round_down(ReturnType const&) noexcept {
+                        return true;
+                    }
+                };
+            }
+
+            // Cache policies.
+            namespace cache {
+                struct base {};
+
+                struct full : base {
+                    using cache_policy = full;
+                    template <class FloatFormat>
+                    static constexpr typename cache_holder<FloatFormat>::cache_entry_type
+                    get_cache(int k) noexcept {
+                        assert(k >= cache_holder<FloatFormat>::min_k &&
+                               k <= cache_holder<FloatFormat>::max_k);
+                        return cache_holder<FloatFormat>::cache[std::size_t(
+                            k - cache_holder<FloatFormat>::min_k)];
+                    }
+                };
+
+                struct compact : base {
+                    using cache_policy = compact;
+                    template <class FloatFormat>
+                    static constexpr typename cache_holder<FloatFormat>::cache_entry_type
+                    get_cache(int k) noexcept {
+                        assert(k >= cache_holder<FloatFormat>::min_k &&
+                               k <= cache_holder<FloatFormat>::max_k);
+
+                        if constexpr (std::is_same_v<FloatFormat, ieee754_binary64>) {
+                            // Compute the base index.
+                            auto const cache_index =
+                                int(std::uint32_t(k - cache_holder<FloatFormat>::min_k) /
+                                    compressed_cache_detail::compression_ratio);
+                            auto const kb =
+                                cache_index * compressed_cache_detail::compression_ratio +
+                                cache_holder<FloatFormat>::min_k;
+                            auto const offset = k - kb;
+
+                            // Get the base cache.
+                            auto const base_cache =
+                                compressed_cache_detail::cache.table[cache_index];
+
+                            if (offset == 0) {
+                                return base_cache;
+                            }
+                            else {
+                                // Compute the required amount of bit-shift.
+                                auto const alpha = log::floor_log2_pow10(kb + offset) -
+                                                   log::floor_log2_pow10(kb) - offset;
+                                assert(alpha > 0 && alpha < 64);
+
+                                // Try to recover the real cache.
+                                auto const pow5 = compressed_cache_detail::pow5.table[offset];
+                                auto recovered_cache = wuint::umul128(base_cache.high(), pow5);
+                                auto const middle_low = wuint::umul128(base_cache.low(), pow5);
+
+                                recovered_cache += middle_low.high();
+
+                                auto const high_to_middle = recovered_cache.high() << (64 - alpha);
+                                auto const middle_to_low = recovered_cache.low() << (64 - alpha);
+
+                                recovered_cache = wuint::uint128{
+                                    (recovered_cache.low() >> alpha) | high_to_middle,
+                                    ((middle_low.low() >> alpha) | middle_to_low)};
+
+                                assert(recovered_cache.low() + 1 != 0);
+                                recovered_cache = {recovered_cache.high(),
+                                                   recovered_cache.low() + 1};
+
+                                return recovered_cache;
+                            }
+                        }
+                        else {
+                            // Just use the full cache for anything other than binary64
+                            return cache_holder<FloatFormat>::cache[std::size_t(
+                                k - cache_holder<FloatFormat>::min_k)];
+                        }
+                    }
+                };
+            }
+        }
+    }
+
+    namespace policy {
+        namespace sign {
+            inline constexpr auto ignore = detail::policy_impl::sign::ignore{};
+            inline constexpr auto return_sign = detail::policy_impl::sign::return_sign{};
+        }
+
+        namespace trailing_zero {
+            inline constexpr auto ignore = detail::policy_impl::trailing_zero::ignore{};
+            inline constexpr auto remove = detail::policy_impl::trailing_zero::remove{};
+            inline constexpr auto report = detail::policy_impl::trailing_zero::report{};
+        }
+
+        namespace decimal_to_binary_rounding {
+            inline constexpr auto nearest_to_even =
+                detail::policy_impl::decimal_to_binary_rounding::nearest_to_even{};
+            inline constexpr auto nearest_to_odd =
+                detail::policy_impl::decimal_to_binary_rounding::nearest_to_odd{};
+            inline constexpr auto nearest_toward_plus_infinity =
+                detail::policy_impl::decimal_to_binary_rounding::nearest_toward_plus_infinity{};
+            inline constexpr auto nearest_toward_minus_infinity =
+                detail::policy_impl::decimal_to_binary_rounding::nearest_toward_minus_infinity{};
+            inline constexpr auto nearest_toward_zero =
+                detail::policy_impl::decimal_to_binary_rounding::nearest_toward_zero{};
+            inline constexpr auto nearest_away_from_zero =
+                detail::policy_impl::decimal_to_binary_rounding::nearest_away_from_zero{};
+
+            inline constexpr auto nearest_to_even_static_boundary =
+                detail::policy_impl::decimal_to_binary_rounding::nearest_to_even_static_boundary{};
+            inline constexpr auto nearest_to_odd_static_boundary =
+                detail::policy_impl::decimal_to_binary_rounding::nearest_to_odd_static_boundary{};
+            inline constexpr auto nearest_toward_plus_infinity_static_boundary =
+                detail::policy_impl::decimal_to_binary_rounding::
+                    nearest_toward_plus_infinity_static_boundary{};
+            inline constexpr auto nearest_toward_minus_infinity_static_boundary =
+                detail::policy_impl::decimal_to_binary_rounding::
+                    nearest_toward_minus_infinity_static_boundary{};
+
+            inline constexpr auto toward_plus_infinity =
+                detail::policy_impl::decimal_to_binary_rounding::toward_plus_infinity{};
+            inline constexpr auto toward_minus_infinity =
+                detail::policy_impl::decimal_to_binary_rounding::toward_minus_infinity{};
+            inline constexpr auto toward_zero =
+                detail::policy_impl::decimal_to_binary_rounding::toward_zero{};
+            inline constexpr auto away_from_zero =
+                detail::policy_impl::decimal_to_binary_rounding::away_from_zero{};
+        }
+
+        namespace binary_to_decimal_rounding {
+            inline constexpr auto do_not_care =
+                detail::policy_impl::binary_to_decimal_rounding::do_not_care{};
+            inline constexpr auto to_even =
+                detail::policy_impl::binary_to_decimal_rounding::to_even{};
+            inline constexpr auto to_odd =
+                detail::policy_impl::binary_to_decimal_rounding::to_odd{};
+            inline constexpr auto away_from_zero =
+                detail::policy_impl::binary_to_decimal_rounding::away_from_zero{};
+            inline constexpr auto toward_zero =
+                detail::policy_impl::binary_to_decimal_rounding::toward_zero{};
+        }
+
+        namespace cache {
+            inline constexpr auto full = detail::policy_impl::cache::full{};
+            inline constexpr auto compact = detail::policy_impl::cache::compact{};
+        }
+    }
+
+    namespace detail {
+        ////////////////////////////////////////////////////////////////////////////////////////
+        // The main algorithm.
+        ////////////////////////////////////////////////////////////////////////////////////////
+
+        template <class Float, class FloatTraits>
+        struct impl : private FloatTraits, private FloatTraits::format {
+            using format = typename FloatTraits::format;
+            using carrier_uint = typename FloatTraits::carrier_uint;
+
+            using FloatTraits::carrier_bits;
+            using format::significand_bits;
+            using format::min_exponent;
+            using format::max_exponent;
+            using format::exponent_bias;
+            using format::decimal_digits;
+
+            static constexpr int kappa = std::is_same_v<format, ieee754_binary32> ? 1 : 2;
+            static_assert(kappa >= 1);
+            static_assert(carrier_bits >= significand_bits + 2 + log::floor_log2_pow10(kappa + 1));
+
+            static constexpr int min_k = [] {
+                constexpr auto a = -log::floor_log10_pow2_minus_log10_4_over_3(
+                    int(max_exponent - significand_bits));
+                constexpr auto b =
+                    -log::floor_log10_pow2(int(max_exponent - significand_bits)) + kappa;
+                return a < b ? a : b;
+            }();
+            static_assert(min_k >= cache_holder<format>::min_k);
+
+            static constexpr int max_k = [] {
+                // We do invoke shorter_interval_case for exponent == min_exponent case,
+                // so we should not add 1 here.
+                constexpr auto a = -log::floor_log10_pow2_minus_log10_4_over_3(
+                    int(min_exponent - significand_bits /*+ 1*/));
+                constexpr auto b =
+                    -log::floor_log10_pow2(int(min_exponent - significand_bits)) + kappa;
+                return a > b ? a : b;
+            }();
+            static_assert(max_k <= cache_holder<format>::max_k);
+
+            using cache_entry_type = typename cache_holder<format>::cache_entry_type;
+            static constexpr auto cache_bits = cache_holder<format>::cache_bits;
+
+            static constexpr int case_shorter_interval_left_endpoint_lower_threshold = 2;
+            static constexpr int case_shorter_interval_left_endpoint_upper_threshold =
+                2 +
+                log::floor_log2(
+                    compute_power<
+                        count_factors<5>((carrier_uint(1) << (significand_bits + 2)) - 1) + 1>(10) /
+                    3);
+
+            static constexpr int case_shorter_interval_right_endpoint_lower_threshold = 0;
+            static constexpr int case_shorter_interval_right_endpoint_upper_threshold =
+                2 +
+                log::floor_log2(
+                    compute_power<
+                        count_factors<5>((carrier_uint(1) << (significand_bits + 1)) + 1) + 1>(10) /
+                    3);
+
+            static constexpr int shorter_interval_tie_lower_threshold =
+                -log::floor_log5_pow2_minus_log5_3(significand_bits + 4) - 2 - significand_bits;
+            static constexpr int shorter_interval_tie_upper_threshold =
+                -log::floor_log5_pow2(significand_bits + 2) - 2 - significand_bits;
+
+            struct compute_mul_result {
+                carrier_uint result;
+                bool is_integer;
+            };
+            struct compute_mul_parity_result {
+                bool parity;
+                bool is_integer;
+            };
+
+            //// The main algorithm assumes the input is a normal/subnormal finite number
+
+            template <class ReturnType, class IntervalType, class TrailingZeroPolicy,
+                      class BinaryToDecimalRoundingPolicy, class CachePolicy,
+                      class... AdditionalArgs>
+            JKJ_SAFEBUFFERS static ReturnType
+            compute_nearest_normal(carrier_uint const two_fc, int const exponent,
+                                   AdditionalArgs... additional_args) noexcept {
+                //////////////////////////////////////////////////////////////////////
+                // Step 1: Schubfach multiplier calculation
+                //////////////////////////////////////////////////////////////////////
+
+                ReturnType ret_value;
+                IntervalType interval_type{additional_args...};
+
+                // Compute k and beta.
+                int const minus_k = log::floor_log10_pow2(exponent) - kappa;
+                auto const cache = CachePolicy::template get_cache<format>(-minus_k);
+                int const beta = exponent + log::floor_log2_pow10(-minus_k);
+
+                // Compute zi and deltai.
+                // 10^kappa <= deltai < 10^(kappa + 1)
+                auto const deltai = compute_delta(cache, beta);
+                // For the case of binary32, the result of integer check is not correct for
+                // 29711844 * 2^-82
+                // = 6.1442653300000000008655037797566933477355632930994033813476... * 10^-18
+                // and 29711844 * 2^-81
+                // = 1.2288530660000000001731007559513386695471126586198806762695... * 10^-17,
+                // and they are the unique counterexamples. However, since 29711844 is even,
+                // this does not cause any problem for the endpoints calculations; it can only
+                // cause a problem when we need to perform integer check for the center.
+                // Fortunately, with these inputs, that branch is never executed, so we are fine.
+                auto const [zi, is_z_integer] = compute_mul((two_fc | 1) << beta, cache);
+
+
+                //////////////////////////////////////////////////////////////////////
+                // Step 2: Try larger divisor; remove trailing zeros if necessary
+                //////////////////////////////////////////////////////////////////////
+
+                constexpr auto big_divisor = compute_power<kappa + 1>(std::uint32_t(10));
+                constexpr auto small_divisor = compute_power<kappa>(std::uint32_t(10));
+
+                // Using an upper bound on zi, we might be able to optimize the division
+                // better than the compiler; we are computing zi / big_divisor here.
+                ret_value.significand =
+                    div::divide_by_pow10<kappa + 1, carrier_uint,
+                                         (carrier_uint(1) << (significand_bits + 1)) * big_divisor -
+                                             1>(zi);
+                auto r = std::uint32_t(zi - big_divisor * ret_value.significand);
+
+                if (r < deltai) {
+                    // Exclude the right endpoint if necessary.
+                    if (r == 0 && (is_z_integer & !interval_type.include_right_endpoint())) {
+                        if constexpr (BinaryToDecimalRoundingPolicy::tag ==
+                                      policy_impl::binary_to_decimal_rounding::tag_t::do_not_care) {
+                            ret_value.significand *= 10;
+                            ret_value.exponent = minus_k + kappa;
+                            --ret_value.significand;
+                            TrailingZeroPolicy::template no_trailing_zeros<impl>(ret_value);
+                            return ret_value;
+                        }
+                        else {
+                            --ret_value.significand;
+                            r = big_divisor;
+                            goto small_divisor_case_label;
+                        }
+                    }
+                }
+                else if (r > deltai) {
+                    goto small_divisor_case_label;
+                }
+                else {
+                    // r == deltai; compare fractional parts.
+                    auto const [xi_parity, x_is_integer] =
+                        compute_mul_parity(two_fc - 1, cache, beta);
+
+                    if (!(xi_parity | (x_is_integer & interval_type.include_left_endpoint()))) {
+                        goto small_divisor_case_label;
+                    }
+                }
+                ret_value.exponent = minus_k + kappa + 1;
+
+                // We may need to remove trailing zeros.
+                TrailingZeroPolicy::template on_trailing_zeros<impl>(ret_value);
+                return ret_value;
+
+
+                //////////////////////////////////////////////////////////////////////
+                // Step 3: Find the significand with the smaller divisor
+                //////////////////////////////////////////////////////////////////////
+
+            small_divisor_case_label:
+                TrailingZeroPolicy::template no_trailing_zeros<impl>(ret_value);
+                ret_value.significand *= 10;
+                ret_value.exponent = minus_k + kappa;
+
+                if constexpr (BinaryToDecimalRoundingPolicy::tag ==
+                              policy_impl::binary_to_decimal_rounding::tag_t::do_not_care) {
+                    // Normally, we want to compute
+                    // ret_value.significand += r / small_divisor
+                    // and return, but we need to take care of the case that the resulting
+                    // value is exactly the right endpoint, while that is not included in the
+                    // interval.
+                    if (!interval_type.include_right_endpoint()) {
+                        // Is r divisible by 10^kappa?
+                        if (is_z_integer && div::check_divisibility_and_divide_by_pow10<kappa>(r)) {
+                            // This should be in the interval.
+                            ret_value.significand += r - 1;
+                        }
+                        else {
+                            ret_value.significand += r;
+                        }
+                    }
+                    else {
+                        ret_value.significand += div::small_division_by_pow10<kappa>(r);
+                    }
+                }
+                else {
+                    auto dist = r - (deltai / 2) + (small_divisor / 2);
+                    bool const approx_y_parity = ((dist ^ (small_divisor / 2)) & 1) != 0;
+
+                    // Is dist divisible by 10^kappa?
+                    bool const divisible_by_small_divisor =
+                        div::check_divisibility_and_divide_by_pow10<kappa>(dist);
+
+                    // Add dist / 10^kappa to the significand.
+                    ret_value.significand += dist;
+
+                    if (divisible_by_small_divisor) {
+                        // Check z^(f) >= epsilon^(f).
+                        // We have either yi == zi - epsiloni or yi == (zi - epsiloni) - 1,
+                        // where yi == zi - epsiloni if and only if z^(f) >= epsilon^(f).
+                        // Since there are only 2 possibilities, we only need to care about the
+                        // parity. Also, zi and r should have the same parity since the divisor is
+                        // an even number.
+                        auto const [yi_parity, is_y_integer] =
+                            compute_mul_parity(two_fc, cache, beta);
+                        if (yi_parity != approx_y_parity) {
+                            --ret_value.significand;
+                        }
+                        else {
+                            // If z^(f) >= epsilon^(f), we might have a tie
+                            // when z^(f) == epsilon^(f), or equivalently, when y is an integer.
+                            // For tie-to-up case, we can just choose the upper one.
+                            if (BinaryToDecimalRoundingPolicy::prefer_round_down(ret_value) &
+                                is_y_integer) {
+                                --ret_value.significand;
+                            }
+                        }
+                    }
+                }
+                return ret_value;
+            }
+
+            template <class ReturnType, class IntervalType, class TrailingZeroPolicy,
+                      class BinaryToDecimalRoundingPolicy, class CachePolicy,
+                      class... AdditionalArgs>
+            JKJ_SAFEBUFFERS static ReturnType
+            compute_nearest_shorter(int const exponent,
+                                    AdditionalArgs... additional_args) noexcept {
+                ReturnType ret_value;
+                IntervalType interval_type{additional_args...};
+
+                // Compute k and beta.
+                int const minus_k = log::floor_log10_pow2_minus_log10_4_over_3(exponent);
+                int const beta = exponent + log::floor_log2_pow10(-minus_k);
+
+                // Compute xi and zi.
+                auto const cache = CachePolicy::template get_cache<format>(-minus_k);
+
+                auto xi = compute_left_endpoint_for_shorter_interval_case(cache, beta);
+                auto zi = compute_right_endpoint_for_shorter_interval_case(cache, beta);
+
+                // If we don't accept the right endpoint and
+                // if the right endpoint is an integer, decrease it.
+                if (!interval_type.include_right_endpoint() &&
+                    is_right_endpoint_integer_shorter_interval(exponent)) {
+                    --zi;
+                }
+                // If we don't accept the left endpoint or
+                // if the left endpoint is not an integer, increase it.
+                if (!interval_type.include_left_endpoint() ||
+                    !is_left_endpoint_integer_shorter_interval(exponent)) {
+                    ++xi;
+                }
+
+                // Try bigger divisor.
+                ret_value.significand = zi / 10;
+
+                // If succeed, remove trailing zeros if necessary and return.
+                if (ret_value.significand * 10 >= xi) {
+                    ret_value.exponent = minus_k + 1;
+                    TrailingZeroPolicy::template on_trailing_zeros<impl>(ret_value);
+                    return ret_value;
+                }
+
+                // Otherwise, compute the round-up of y.
+                TrailingZeroPolicy::template no_trailing_zeros<impl>(ret_value);
+                ret_value.significand = compute_round_up_for_shorter_interval_case(cache, beta);
+                ret_value.exponent = minus_k;
+
+                // When tie occurs, choose one of them according to the rule.
+                if (BinaryToDecimalRoundingPolicy::prefer_round_down(ret_value) &&
+                    exponent >= shorter_interval_tie_lower_threshold &&
+                    exponent <= shorter_interval_tie_upper_threshold) {
+                    --ret_value.significand;
+                }
+                else if (ret_value.significand < xi) {
+                    ++ret_value.significand;
+                }
+                return ret_value;
+            }
+
+            template <class ReturnType, class TrailingZeroPolicy, class CachePolicy>
+            JKJ_SAFEBUFFERS static ReturnType
+            compute_left_closed_directed(carrier_uint const two_fc, int exponent) noexcept {
+                //////////////////////////////////////////////////////////////////////
+                // Step 1: Schubfach multiplier calculation
+                //////////////////////////////////////////////////////////////////////
+
+                ReturnType ret_value;
+
+                // Compute k and beta.
+                int const minus_k = log::floor_log10_pow2(exponent) - kappa;
+                auto const cache = CachePolicy::template get_cache<format>(-minus_k);
+                int const beta = exponent + log::floor_log2_pow10(-minus_k);
+
+                // Compute xi and deltai.
+                // 10^kappa <= deltai < 10^(kappa + 1)
+                auto const deltai = compute_delta(cache, beta);
+                auto [xi, is_x_integer] = compute_mul(two_fc << beta, cache);
+
+                // Deal with the unique exceptional cases
+                // 29711844 * 2^-82
+                // = 6.1442653300000000008655037797566933477355632930994033813476... * 10^-18
+                // and 29711844 * 2^-81
+                // = 1.2288530660000000001731007559513386695471126586198806762695... * 10^-17
+                // for binary32.
+                if constexpr (std::is_same_v<format, ieee754_binary32>) {
+                    if (exponent <= -80) {
+                        is_x_integer = false;
+                    }
+                }
+
+                if (!is_x_integer) {
+                    ++xi;
+                }
+
+                //////////////////////////////////////////////////////////////////////
+                // Step 2: Try larger divisor; remove trailing zeros if necessary
+                //////////////////////////////////////////////////////////////////////
+
+                constexpr auto big_divisor = compute_power<kappa + 1>(std::uint32_t(10));
+
+                // Using an upper bound on xi, we might be able to optimize the division
+                // better than the compiler; we are computing xi / big_divisor here.
+                ret_value.significand =
+                    div::divide_by_pow10<kappa + 1, carrier_uint,
+                                         (carrier_uint(1) << (significand_bits + 1)) * big_divisor -
+                                             1>(xi);
+                auto r = std::uint32_t(xi - big_divisor * ret_value.significand);
+
+                if (r != 0) {
+                    ++ret_value.significand;
+                    r = big_divisor - r;
+                }
+
+                if (r > deltai) {
+                    goto small_divisor_case_label;
+                }
+                else if (r == deltai) {
+                    // Compare the fractional parts.
+                    // This branch is never taken for the exceptional cases
+                    // 2f_c = 29711482, e = -81
+                    // (6.1442649164096937243516663440523473127541365101933479309082... * 10^-18)
+                    // and 2f_c = 29711482, e = -80
+                    // (1.2288529832819387448703332688104694625508273020386695861816... * 10^-17).
+                    auto const [zi_parity, is_z_integer] =
+                        compute_mul_parity(two_fc + 2, cache, beta);
+                    if (zi_parity || is_z_integer) {
+                        goto small_divisor_case_label;
+                    }
+                }
+
+                // The ceiling is inside, so we are done.
+                ret_value.exponent = minus_k + kappa + 1;
+                TrailingZeroPolicy::template on_trailing_zeros<impl>(ret_value);
+                return ret_value;
+
+
+                //////////////////////////////////////////////////////////////////////
+                // Step 3: Find the significand with the smaller divisor
+                //////////////////////////////////////////////////////////////////////
+
+            small_divisor_case_label:
+                ret_value.significand *= 10;
+                ret_value.significand -= div::small_division_by_pow10<kappa>(r);
+                ret_value.exponent = minus_k + kappa;
+                TrailingZeroPolicy::template no_trailing_zeros<impl>(ret_value);
+                return ret_value;
+            }
+
+            template <class ReturnType, class TrailingZeroPolicy, class CachePolicy>
+            JKJ_SAFEBUFFERS static ReturnType
+            compute_right_closed_directed(carrier_uint const two_fc, int const exponent,
+                                          bool shorter_interval) noexcept {
+                //////////////////////////////////////////////////////////////////////
+                // Step 1: Schubfach multiplier calculation
+                //////////////////////////////////////////////////////////////////////
+
+                ReturnType ret_value;
+
+                // Compute k and beta.
+                int const minus_k =
+                    log::floor_log10_pow2(exponent - (shorter_interval ? 1 : 0)) - kappa;
+                auto const cache = CachePolicy::template get_cache<format>(-minus_k);
+                int const beta = exponent + log::floor_log2_pow10(-minus_k);
+
+                // Compute zi and deltai.
+                // 10^kappa <= deltai < 10^(kappa + 1)
+                auto const deltai =
+                    shorter_interval ? compute_delta(cache, beta - 1) : compute_delta(cache, beta);
+                carrier_uint const zi = compute_mul(two_fc << beta, cache).result;
+
+
+                //////////////////////////////////////////////////////////////////////
+                // Step 2: Try larger divisor; remove trailing zeros if necessary
+                //////////////////////////////////////////////////////////////////////
+
+                constexpr auto big_divisor = compute_power<kappa + 1>(std::uint32_t(10));
+
+                // Using an upper bound on zi, we might be able to optimize the division better than
+                // the compiler; we are computing zi / big_divisor here.
+                ret_value.significand =
+                    div::divide_by_pow10<kappa + 1, carrier_uint,
+                                         (carrier_uint(1) << (significand_bits + 1)) * big_divisor -
+                                             1>(zi);
+                auto const r = std::uint32_t(zi - big_divisor * ret_value.significand);
+
+                if (r > deltai) {
+                    goto small_divisor_case_label;
+                }
+                else if (r == deltai) {
+                    // Compare the fractional parts.
+                    if (!compute_mul_parity(two_fc - (shorter_interval ? 1 : 2), cache, beta)
+                             .parity) {
+                        goto small_divisor_case_label;
+                    }
+                }
+
+                // The floor is inside, so we are done.
+                ret_value.exponent = minus_k + kappa + 1;
+                TrailingZeroPolicy::template on_trailing_zeros<impl>(ret_value);
+                return ret_value;
+
+
+                //////////////////////////////////////////////////////////////////////
+                // Step 3: Find the significand with the small divisor
+                //////////////////////////////////////////////////////////////////////
+
+            small_divisor_case_label:
+                ret_value.significand *= 10;
+                ret_value.significand += div::small_division_by_pow10<kappa>(r);
+                ret_value.exponent = minus_k + kappa;
+                TrailingZeroPolicy::template no_trailing_zeros<impl>(ret_value);
+                return ret_value;
+            }
+
+            // Remove trailing zeros from n and return the number of zeros removed.
+            JKJ_FORCEINLINE static int remove_trailing_zeros(carrier_uint& n) noexcept {
+                assert(n != 0);
+
+                if constexpr (std::is_same_v<format, ieee754_binary32>) {
+                    constexpr auto mod_inv_5 = std::uint32_t(0xcccc'cccd);
+                    constexpr auto mod_inv_25 = mod_inv_5 * mod_inv_5;
+
+                    int s = 0;
+                    while (true) {
+                        auto q = bits::rotr(n * mod_inv_25, 2);
+                        if (q <= std::numeric_limits<std::uint32_t>::max() / 100) {
+                            n = q;
+                            s += 2;
+                        }
+                        else {
+                            break;
+                        }
+                    }
+                    auto q = bits::rotr(n * mod_inv_5, 1);
+                    if (q <= std::numeric_limits<std::uint32_t>::max() / 10) {
+                        n = q;
+                        s |= 1;
+                    }
+
+                    return s;
+                }
+                else {
+                    static_assert(std::is_same_v<format, ieee754_binary64>);
+
+                    // Divide by 10^8 and reduce to 32-bits if divisible.
+                    // Since ret_value.significand <= (2^53 * 1000 - 1) / 1000 < 10^16,
+                    // n is at most of 16 digits.
+
+                    // This magic number is ceil(2^90 / 10^8).
+                    constexpr auto magic_number = std::uint64_t(12379400392853802749ull);
+                    auto nm = wuint::umul128(n, magic_number);
+
+                    // Is n is divisible by 10^8?
+                    if ((nm.high() & ((std::uint64_t(1) << (90 - 64)) - 1)) == 0 &&
+                        nm.low() < magic_number) {
+                        // If yes, work with the quotient.
+                        auto n32 = std::uint32_t(nm.high() >> (90 - 64));
+
+                        constexpr auto mod_inv_5 = std::uint32_t(0xcccc'cccd);
+                        constexpr auto mod_inv_25 = mod_inv_5 * mod_inv_5;
+
+                        int s = 8;
+                        while (true) {
+                            auto q = bits::rotr(n32 * mod_inv_25, 2);
+                            if (q <= std::numeric_limits<std::uint32_t>::max() / 100) {
+                                n32 = q;
+                                s += 2;
+                            }
+                            else {
+                                break;
+                            }
+                        }
+                        auto q = bits::rotr(n32 * mod_inv_5, 1);
+                        if (q <= std::numeric_limits<std::uint32_t>::max() / 10) {
+                            n32 = q;
+                            s |= 1;
+                        }
+
+                        n = n32;
+                        return s;
+                    }
+
+                    // If n is not divisible by 10^8, work with n itself.
+                    constexpr auto mod_inv_5 = std::uint64_t(0xcccc'cccc'cccc'cccd);
+                    constexpr auto mod_inv_25 = mod_inv_5 * mod_inv_5;
+
+                    int s = 0;
+                    while (true) {
+                        auto q = bits::rotr(n * mod_inv_25, 2);
+                        if (q <= std::numeric_limits<std::uint64_t>::max() / 100) {
+                            n = q;
+                            s += 2;
+                        }
+                        else {
+                            break;
+                        }
+                    }
+                    auto q = bits::rotr(n * mod_inv_5, 1);
+                    if (q <= std::numeric_limits<std::uint64_t>::max() / 10) {
+                        n = q;
+                        s |= 1;
+                    }
+
+                    return s;
+                }
+            }
+
+            static compute_mul_result compute_mul(carrier_uint u,
+                                                  cache_entry_type const& cache) noexcept {
+                if constexpr (std::is_same_v<format, ieee754_binary32>) {
+                    auto r = wuint::umul96_upper64(u, cache);
+                    return {carrier_uint(r >> 32), carrier_uint(r) == 0};
+                }
+                else {
+                    static_assert(std::is_same_v<format, ieee754_binary64>);
+                    auto r = wuint::umul192_upper128(u, cache);
+                    return {r.high(), r.low() == 0};
+                }
+            }
+
+            static constexpr std::uint32_t compute_delta(cache_entry_type const& cache,
+                                                         int beta) noexcept {
+                if constexpr (std::is_same_v<format, ieee754_binary32>) {
+                    return std::uint32_t(cache >> (cache_bits - 1 - beta));
+                }
+                else {
+                    static_assert(std::is_same_v<format, ieee754_binary64>);
+                    return std::uint32_t(cache.high() >> (carrier_bits - 1 - beta));
+                }
+            }
+
+            static compute_mul_parity_result compute_mul_parity(carrier_uint two_f,
+                                                                cache_entry_type const& cache,
+                                                                int beta) noexcept {
+                assert(beta >= 1);
+                assert(beta < 64);
+
+                if constexpr (std::is_same_v<format, ieee754_binary32>) {
+                    auto r = wuint::umul96_lower64(two_f, cache);
+                    return {((r >> (64 - beta)) & 1) != 0, std::uint32_t(r >> (32 - beta)) == 0};
+                }
+                else {
+                    static_assert(std::is_same_v<format, ieee754_binary64>);
+                    auto r = wuint::umul192_lower128(two_f, cache);
+                    return {((r.high() >> (64 - beta)) & 1) != 0,
+                            ((r.high() << beta) | (r.low() >> (64 - beta))) == 0};
+                }
+            }
+
+            static constexpr carrier_uint
+            compute_left_endpoint_for_shorter_interval_case(cache_entry_type const& cache,
+                                                            int beta) noexcept {
+                if constexpr (std::is_same_v<format, ieee754_binary32>) {
+                    return carrier_uint((cache - (cache >> (significand_bits + 2))) >>
+                                        (cache_bits - significand_bits - 1 - beta));
+                }
+                else {
+                    static_assert(std::is_same_v<format, ieee754_binary64>);
+                    return (cache.high() - (cache.high() >> (significand_bits + 2))) >>
+                           (carrier_bits - significand_bits - 1 - beta);
+                }
+            }
+
+            static constexpr carrier_uint
+            compute_right_endpoint_for_shorter_interval_case(cache_entry_type const& cache,
+                                                             int beta) noexcept {
+                if constexpr (std::is_same_v<format, ieee754_binary32>) {
+                    return carrier_uint((cache + (cache >> (significand_bits + 1))) >>
+                                        (cache_bits - significand_bits - 1 - beta));
+                }
+                else {
+                    static_assert(std::is_same_v<format, ieee754_binary64>);
+                    return (cache.high() + (cache.high() >> (significand_bits + 1))) >>
+                           (carrier_bits - significand_bits - 1 - beta);
+                }
+            }
+
+            static constexpr carrier_uint
+            compute_round_up_for_shorter_interval_case(cache_entry_type const& cache,
+                                                       int beta) noexcept {
+                if constexpr (std::is_same_v<format, ieee754_binary32>) {
+                    return (carrier_uint(cache >> (cache_bits - significand_bits - 2 - beta)) + 1) /
+                           2;
+                }
+                else {
+                    static_assert(std::is_same_v<format, ieee754_binary64>);
+                    return ((cache.high() >> (carrier_bits - significand_bits - 2 - beta)) + 1) / 2;
+                }
+            }
+
+            static constexpr bool
+            is_right_endpoint_integer_shorter_interval(int exponent) noexcept {
+                return exponent >= case_shorter_interval_right_endpoint_lower_threshold &&
+                       exponent <= case_shorter_interval_right_endpoint_upper_threshold;
+            }
+
+            static constexpr bool is_left_endpoint_integer_shorter_interval(int exponent) noexcept {
+                return exponent >= case_shorter_interval_left_endpoint_lower_threshold &&
+                       exponent <= case_shorter_interval_left_endpoint_upper_threshold;
+            }
+        };
+
+
+        ////////////////////////////////////////////////////////////////////////////////////////
+        // Policy holder.
+        ////////////////////////////////////////////////////////////////////////////////////////
+
+        namespace policy_impl {
+            // The library will specify a list of accepted kinds of policies and their defaults, and
+            // the user will pass a list of policies. The aim of helper classes/functions here is to
+            // do the following:
+            //   1. Check if the policy parameters given by the user are all valid; that means,
+            //      each of them should be of the kinds specified by the library.
+            //      If that's not the case, then the compilation fails.
+            //   2. Check if multiple policy parameters for the same kind is specified by the user.
+            //      If that's the case, then the compilation fails.
+            //   3. Build a class deriving from all policies the user have given, and also from
+            //      the default policies if the user did not specify one for some kinds.
+            // A policy belongs to a certain kind if it is deriving from a base class.
+
+            // For a given kind, find a policy belonging to that kind.
+            // Check if there are more than one such policies.
+            enum class policy_found_info { not_found, unique, repeated };
+            template <class Policy, policy_found_info info>
+            struct found_policy_pair {
+                using policy = Policy;
+                static constexpr auto found_info = info;
+            };
+
+            template <class Base, class DefaultPolicy>
+            struct base_default_pair {
+                using base = Base;
+
+                template <class FoundPolicyInfo>
+                static constexpr FoundPolicyInfo get_policy_impl(FoundPolicyInfo) {
+                    return {};
+                }
+                template <class FoundPolicyInfo, class FirstPolicy, class... RemainingPolicies>
+                static constexpr auto get_policy_impl(FoundPolicyInfo, FirstPolicy,
+                                                      RemainingPolicies... remainings) {
+                    if constexpr (std::is_base_of_v<Base, FirstPolicy>) {
+                        if constexpr (FoundPolicyInfo::found_info == policy_found_info::not_found) {
+                            return get_policy_impl(
+                                found_policy_pair<FirstPolicy, policy_found_info::unique>{},
+                                remainings...);
+                        }
+                        else {
+                            return get_policy_impl(
+                                found_policy_pair<FirstPolicy, policy_found_info::repeated>{},
+                                remainings...);
+                        }
+                    }
+                    else {
+                        return get_policy_impl(FoundPolicyInfo{}, remainings...);
+                    }
+                }
+
+                template <class... Policies>
+                static constexpr auto get_policy(Policies... policies) {
+                    return get_policy_impl(
+                        found_policy_pair<DefaultPolicy, policy_found_info::not_found>{},
+                        policies...);
+                }
+            };
+            template <class... BaseDefaultPairs>
+            struct base_default_pair_list {};
+
+            // Check if a given policy belongs to one of the kinds specified by the library.
+            template <class Policy>
+            constexpr bool check_policy_validity(Policy, base_default_pair_list<>) {
+                return false;
+            }
+            template <class Policy, class FirstBaseDefaultPair, class... RemainingBaseDefaultPairs>
+            constexpr bool check_policy_validity(
+                Policy,
+                base_default_pair_list<FirstBaseDefaultPair, RemainingBaseDefaultPairs...>) {
+                return std::is_base_of_v<typename FirstBaseDefaultPair::base, Policy> ||
+                       check_policy_validity(
+                           Policy{}, base_default_pair_list<RemainingBaseDefaultPairs...>{});
+            }
+
+            template <class BaseDefaultPairList>
+            constexpr bool check_policy_list_validity(BaseDefaultPairList) {
+                return true;
+            }
+
+            template <class BaseDefaultPairList, class FirstPolicy, class... RemainingPolicies>
+            constexpr bool check_policy_list_validity(BaseDefaultPairList, FirstPolicy,
+                                                      RemainingPolicies... remaining_policies) {
+                return check_policy_validity(FirstPolicy{}, BaseDefaultPairList{}) &&
+                       check_policy_list_validity(BaseDefaultPairList{}, remaining_policies...);
+            }
+
+            // Build policy_holder.
+            template <bool repeated_, class... FoundPolicyPairs>
+            struct found_policy_pair_list {
+                static constexpr bool repeated = repeated_;
+            };
+
+            template <class... Policies>
+            struct policy_holder : Policies... {};
+
+            template <bool repeated, class... FoundPolicyPairs, class... Policies>
+            constexpr auto
+            make_policy_holder_impl(base_default_pair_list<>,
+                                    found_policy_pair_list<repeated, FoundPolicyPairs...>,
+                                    Policies...) {
+                return found_policy_pair_list<repeated, FoundPolicyPairs...>{};
+            }
+
+            template <class FirstBaseDefaultPair, class... RemainingBaseDefaultPairs, bool repeated,
+                      class... FoundPolicyPairs, class... Policies>
+            constexpr auto make_policy_holder_impl(
+                base_default_pair_list<FirstBaseDefaultPair, RemainingBaseDefaultPairs...>,
+                found_policy_pair_list<repeated, FoundPolicyPairs...>, Policies... policies) {
+                using new_found_policy_pair =
+                    decltype(FirstBaseDefaultPair::get_policy(policies...));
+
+                return make_policy_holder_impl(
+                    base_default_pair_list<RemainingBaseDefaultPairs...>{},
+                    found_policy_pair_list < repeated ||
+                        new_found_policy_pair::found_info == policy_found_info::repeated,
+                    new_found_policy_pair, FoundPolicyPairs... > {}, policies...);
+            }
+
+            template <bool repeated, class... RawPolicies>
+            constexpr auto convert_to_policy_holder(found_policy_pair_list<repeated>,
+                                                    RawPolicies...) {
+                return policy_holder<RawPolicies...>{};
+            }
+
+            template <bool repeated, class FirstFoundPolicyPair, class... RemainingFoundPolicyPairs,
+                      class... RawPolicies>
+            constexpr auto
+            convert_to_policy_holder(found_policy_pair_list<repeated, FirstFoundPolicyPair,
+                                                            RemainingFoundPolicyPairs...>,
+                                     RawPolicies... policies) {
+                return convert_to_policy_holder(
+                    found_policy_pair_list<repeated, RemainingFoundPolicyPairs...>{},
+                    typename FirstFoundPolicyPair::policy{}, policies...);
+            }
+
+            template <class BaseDefaultPairList, class... Policies>
+            constexpr auto make_policy_holder(BaseDefaultPairList, Policies... policies) {
+                static_assert(check_policy_list_validity(BaseDefaultPairList{}, Policies{}...),
+                              "jkj::dragonbox: an invalid policy is specified");
+
+                using policy_pair_list = decltype(make_policy_holder_impl(
+                    BaseDefaultPairList{}, found_policy_pair_list<false>{}, policies...));
+
+                static_assert(!policy_pair_list::repeated,
+                              "jkj::dragonbox: each policy should be specified at most once");
+
+                return convert_to_policy_holder(policy_pair_list{});
+            }
+        }
+    }
+
+
+    ////////////////////////////////////////////////////////////////////////////////////////
+    // The interface function.
+    ////////////////////////////////////////////////////////////////////////////////////////
+
+    template <class Float, class FloatTraits = default_float_traits<Float>, class... Policies>
+    JKJ_FORCEINLINE JKJ_SAFEBUFFERS auto
+    to_decimal(signed_significand_bits<Float, FloatTraits> signed_significand_bits,
+               unsigned int exponent_bits, Policies... policies) noexcept {
+        // Build policy holder type.
+        using namespace detail::policy_impl;
+        using policy_holder = decltype(make_policy_holder(
+            base_default_pair_list<base_default_pair<sign::base, sign::return_sign>,
+                                   base_default_pair<trailing_zero::base, trailing_zero::remove>,
+                                   base_default_pair<decimal_to_binary_rounding::base,
+                                                     decimal_to_binary_rounding::nearest_to_even>,
+                                   base_default_pair<binary_to_decimal_rounding::base,
+                                                     binary_to_decimal_rounding::to_even>,
+                                   base_default_pair<cache::base, cache::full>>{},
+            policies...));
+
+        using return_type =
+            decimal_fp<typename FloatTraits::carrier_uint, policy_holder::return_has_sign,
+                       policy_holder::report_trailing_zeros>;
+
+        return_type ret = policy_holder::delegate(
+            signed_significand_bits,
+            [exponent_bits, signed_significand_bits](auto interval_type_provider) {
+                using format = typename FloatTraits::format;
+                constexpr auto tag = decltype(interval_type_provider)::tag;
+
+                auto two_fc = signed_significand_bits.remove_sign_bit_and_shift();
+                auto exponent = int(exponent_bits);
+
+                if constexpr (tag == decimal_to_binary_rounding::tag_t::to_nearest) {
+                    // Is the input a normal number?
+                    if (exponent != 0) {
+                        exponent += format::exponent_bias - format::significand_bits;
+
+                        // Shorter interval case; proceed like Schubfach.
+                        // One might think this condition is wrong, since when exponent_bits == 1
+                        // and two_fc == 0, the interval is actually regular. However, it turns out
+                        // that this seemingly wrong condition is actually fine, because the end
+                        // result is anyway the same.
+                        //
+                        // [binary32]
+                        // (fc-1/2) * 2^e = 1.175'494'28... * 10^-38
+                        // (fc-1/4) * 2^e = 1.175'494'31... * 10^-38
+                        //    fc    * 2^e = 1.175'494'35... * 10^-38
+                        // (fc+1/2) * 2^e = 1.175'494'42... * 10^-38
+                        //
+                        // Hence, shorter_interval_case will return 1.175'494'4 * 10^-38.
+                        // 1.175'494'3 * 10^-38 is also a correct shortest representation that will
+                        // be rejected if we assume shorter interval, but 1.175'494'4 * 10^-38 is
+                        // closer to the true value so it doesn't matter.
+                        //
+                        // [binary64]
+                        // (fc-1/2) * 2^e = 2.225'073'858'507'201'13... * 10^-308
+                        // (fc-1/4) * 2^e = 2.225'073'858'507'201'25... * 10^-308
+                        //    fc    * 2^e = 2.225'073'858'507'201'38... * 10^-308
+                        // (fc+1/2) * 2^e = 2.225'073'858'507'201'63... * 10^-308
+                        //
+                        // Hence, shorter_interval_case will return 2.225'073'858'507'201'4 *
+                        // 10^-308. This is indeed of the shortest length, and it is the unique one
+                        // closest to the true value among valid representations of the same length.
+                        static_assert(std::is_same_v<format, ieee754_binary32> ||
+                                      std::is_same_v<format, ieee754_binary64>);
+
+                        if (two_fc == 0) {
+                            return decltype(interval_type_provider)::invoke_shorter_interval_case(
+                                signed_significand_bits, [exponent](auto... additional_args) {
+                                    return detail::impl<Float, FloatTraits>::
+                                        template compute_nearest_shorter<
+                                            return_type,
+                                            typename decltype(interval_type_provider)::
+                                                shorter_interval_type,
+                                            typename policy_holder::trailing_zero_policy,
+                                            typename policy_holder::
+                                                binary_to_decimal_rounding_policy,
+                                            typename policy_holder::cache_policy>(
+                                            exponent, additional_args...);
+                                });
+                        }
+
+                        two_fc |= (decltype(two_fc)(1) << (format::significand_bits + 1));
+                    }
+                    // Is the input a subnormal number?
+                    else {
+                        exponent = format::min_exponent - format::significand_bits;
+                    }
+
+                    return decltype(interval_type_provider)::invoke_normal_interval_case(
+                        signed_significand_bits, [two_fc, exponent](auto... additional_args) {
+                            return detail::impl<Float, FloatTraits>::
+                                template compute_nearest_normal<
+                                    return_type,
+                                    typename decltype(interval_type_provider)::normal_interval_type,
+                                    typename policy_holder::trailing_zero_policy,
+                                    typename policy_holder::binary_to_decimal_rounding_policy,
+                                    typename policy_holder::cache_policy>(two_fc, exponent,
+                                                                          additional_args...);
+                        });
+                }
+                else if constexpr (tag == decimal_to_binary_rounding::tag_t::left_closed_directed) {
+                    // Is the input a normal number?
+                    if (exponent != 0) {
+                        exponent += format::exponent_bias - format::significand_bits;
+                        two_fc |= (decltype(two_fc)(1) << (format::significand_bits + 1));
+                    }
+                    // Is the input a subnormal number?
+                    else {
+                        exponent = format::min_exponent - format::significand_bits;
+                    }
+
+                    return detail::impl<Float>::template compute_left_closed_directed<
+                        return_type, typename policy_holder::trailing_zero_policy,
+                        typename policy_holder::cache_policy>(two_fc, exponent);
+                }
+                else {
+                    static_assert(tag == decimal_to_binary_rounding::tag_t::right_closed_directed);
+
+                    bool shorter_interval = false;
+
+                    // Is the input a normal number?
+                    if (exponent != 0) {
+                        if (two_fc == 0 && exponent != 1) {
+                            shorter_interval = true;
+                        }
+                        exponent += format::exponent_bias - format::significand_bits;
+                        two_fc |= (decltype(two_fc)(1) << (format::significand_bits + 1));
+                    }
+                    // Is the input a subnormal number?
+                    else {
+                        exponent = format::min_exponent - format::significand_bits;
+                    }
+
+                    return detail::impl<Float>::template compute_right_closed_directed<
+                        return_type, typename policy_holder::trailing_zero_policy,
+                        typename policy_holder::cache_policy>(two_fc, exponent, shorter_interval);
+                }
+            });
+
+        policy_holder::handle_sign(signed_significand_bits, ret);
+        return ret;
+    }
+
+    template <class Float, class FloatTraits = default_float_traits<Float>, class... Policies>
+    JKJ_FORCEINLINE JKJ_SAFEBUFFERS auto to_decimal(Float x, Policies... policies) noexcept {
+        auto const br = float_bits<Float, FloatTraits>(x);
+        auto const exponent_bits = br.extract_exponent_bits();
+        auto const s = br.remove_exponent_bits(exponent_bits);
+        assert(br.is_finite());
+
+        return to_decimal<Float, FloatTraits>(s, exponent_bits, policies...);
+    }
+}
+
+#undef JKJ_FORCEINLINE
+#undef JKJ_SAFEBUFFERS
+#undef JKJ_DRAGONBOX_HAS_BUILTIN
+
+#endif
diff --git a/server/dragonbox/dragonbox_to_chars.h b/server/dragonbox/dragonbox_to_chars.h
new file mode 100644
index 0000000..ca5384f
--- /dev/null
+++ b/server/dragonbox/dragonbox_to_chars.h
@@ -0,0 +1,108 @@
+// Copyright 2020-2022 Junekey Jeon
+//
+// The contents of this file may be used under the terms of
+// the Apache License v2.0 with LLVM Exceptions.
+//
+//    (See accompanying file LICENSE-Apache or copy at
+//     https://llvm.org/foundation/relicensing/LICENSE.txt)
+//
+// Alternatively, the contents of this file may be used under the terms of
+// the Boost Software License, Version 1.0.
+//    (See accompanying file LICENSE-Boost or copy at
+//     https://www.boost.org/LICENSE_1_0.txt)
+//
+// Unless required by applicable law or agreed to in writing, this software
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.
+
+#ifndef JKJ_HEADER_DRAGONBOX_TO_CHARS
+#define JKJ_HEADER_DRAGONBOX_TO_CHARS
+
+#include "dragonbox.h"
+
+namespace jkj::dragonbox {
+    namespace to_chars_detail {
+        template <class Float, class FloatTraits>
+        extern char* to_chars(typename FloatTraits::carrier_uint significand, int exponent,
+                              char* buffer) noexcept;
+
+        // Avoid needless ABI overhead incurred by tag dispatch.
+        template <class PolicyHolder, class Float, class FloatTraits>
+        char* to_chars_n_impl(float_bits<Float, FloatTraits> br, char* buffer) noexcept {
+            auto const exponent_bits = br.extract_exponent_bits();
+            auto const s = br.remove_exponent_bits(exponent_bits);
+
+            if (br.is_finite(exponent_bits)) {
+                if (s.is_negative()) {
+                    *buffer = '-';
+                    ++buffer;
+                }
+                if (br.is_nonzero()) {
+                    auto result = to_decimal<Float, FloatTraits>(
+                        s, exponent_bits, policy::sign::ignore, policy::trailing_zero::ignore,
+                        typename PolicyHolder::decimal_to_binary_rounding_policy{},
+                        typename PolicyHolder::binary_to_decimal_rounding_policy{},
+                        typename PolicyHolder::cache_policy{});
+                    return to_chars_detail::to_chars<Float, FloatTraits>(result.significand,
+                                                                         result.exponent, buffer);
+                }
+                else {
+                    *buffer = '0';
+                    return buffer + 1;
+                }
+            }
+            else {
+                if (s.has_all_zero_significand_bits()) {
+                    if (s.is_negative()) {
+                        *buffer = '-';
+                        ++buffer;
+                    }
+                    std::memcpy(buffer, "Infinity", 8);
+                    return buffer + 8;
+                }
+                else {
+                    std::memcpy(buffer, "NaN", 3);
+                    return buffer + 3;
+                }
+            }
+        }
+    }
+
+    // Returns the next-to-end position
+    template <class Float, class FloatTraits = default_float_traits<Float>, class... Policies>
+    char* to_chars_n(Float x, char* buffer, Policies... policies) noexcept {
+        using namespace jkj::dragonbox::detail::policy_impl;
+        using policy_holder = decltype(make_policy_holder(
+            base_default_pair_list<base_default_pair<decimal_to_binary_rounding::base,
+                                                     decimal_to_binary_rounding::nearest_to_even>,
+                                   base_default_pair<binary_to_decimal_rounding::base,
+                                                     binary_to_decimal_rounding::to_even>,
+                                   base_default_pair<cache::base, cache::full>>{},
+            policies...));
+
+        return to_chars_detail::to_chars_n_impl<policy_holder>(float_bits<Float, FloatTraits>(x),
+                                                               buffer);
+    }
+
+    // Null-terminate and bypass the return value of fp_to_chars_n
+    template <class Float, class FloatTraits = default_float_traits<Float>, class... Policies>
+    char* to_chars(Float x, char* buffer, Policies... policies) noexcept {
+        auto ptr = to_chars_n<Float, FloatTraits>(x, buffer, policies...);
+        *ptr = '\0';
+        return ptr;
+    }
+
+    // Maximum required buffer size (excluding null-terminator)
+    template <class FloatFormat>
+    inline constexpr std::size_t max_output_string_length =
+        std::is_same_v<FloatFormat, ieee754_binary32>
+            ?
+            // sign(1) + significand(9) + decimal_point(1) + exp_marker(1) + exp_sign(1) + exp(2)
+            (1 + 9 + 1 + 1 + 1 + 2)
+            :
+            // format == ieee754_format::binary64
+            // sign(1) + significand(17) + decimal_point(1) + exp_marker(1) + exp_sign(1) + exp(3)
+            (1 + 17 + 1 + 1 + 1 + 3);
+}
+
+#endif
diff --git a/server/dragonbox/dragonbox_to_chars.hpp b/server/dragonbox/dragonbox_to_chars.hpp
new file mode 100644
index 0000000..7199d74
--- /dev/null
+++ b/server/dragonbox/dragonbox_to_chars.hpp
@@ -0,0 +1,521 @@
+// Copyright 2020-2022 Junekey Jeon
+//
+// The contents of this file may be used under the terms of
+// the Apache License v2.0 with LLVM Exceptions.
+//
+//    (See accompanying file LICENSE-Apache or copy at
+//     https://llvm.org/foundation/relicensing/LICENSE.txt)
+//
+// Alternatively, the contents of this file may be used under the terms of
+// the Boost Software License, Version 1.0.
+//    (See accompanying file LICENSE-Boost or copy at
+//     https://www.boost.org/LICENSE_1_0.txt)
+//
+// Unless required by applicable law or agreed to in writing, this software
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.
+
+#pragma once 
+
+#include "dragonbox_to_chars.h"
+
+#if defined(__GNUC__) || defined(__clang__)
+    #define JKJ_FORCEINLINE inline __attribute__((always_inline))
+#elif defined(_MSC_VER)
+    #define JKJ_FORCEINLINE __forceinline
+#else
+    #define JKJ_FORCEINLINE inline
+#endif
+
+namespace jkj::dragonbox {
+    namespace to_chars_detail {
+        // These "//"'s are to prevent clang-format to ruin this nice alignment.
+        // Thanks to reddit user u/mcmcc:
+        // https://www.reddit.com/r/cpp/comments/so3wx9/dragonbox_110_is_released_a_fast_floattostring/hw8z26r/?context=3
+        static constexpr char radix_100_table[] = {
+            '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', //
+            '0', '5', '0', '6', '0', '7', '0', '8', '0', '9', //
+            '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', //
+            '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', //
+            '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', //
+            '2', '5', '2', '6', '2', '7', '2', '8', '2', '9', //
+            '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', //
+            '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', //
+            '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', //
+            '4', '5', '4', '6', '4', '7', '4', '8', '4', '9', //
+            '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', //
+            '5', '5', '5', '6', '5', '7', '5', '8', '5', '9', //
+            '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', //
+            '6', '5', '6', '6', '6', '7', '6', '8', '6', '9', //
+            '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', //
+            '7', '5', '7', '6', '7', '7', '7', '8', '7', '9', //
+            '8', '0', '8', '1', '8', '2', '8', '3', '8', '4', //
+            '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', //
+            '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', //
+            '9', '5', '9', '6', '9', '7', '9', '8', '9', '9'  //
+        };
+        static constexpr char radix_100_head_table[] = {
+            '0', '.', '1', '.', '2', '.', '3', '.', '4', '.', //
+            '5', '.', '6', '.', '7', '.', '8', '.', '9', '.', //
+            '1', '.', '1', '.', '1', '.', '1', '.', '1', '.', //
+            '1', '.', '1', '.', '1', '.', '1', '.', '1', '.', //
+            '2', '.', '2', '.', '2', '.', '2', '.', '2', '.', //
+            '2', '.', '2', '.', '2', '.', '2', '.', '2', '.', //
+            '3', '.', '3', '.', '3', '.', '3', '.', '3', '.', //
+            '3', '.', '3', '.', '3', '.', '3', '.', '3', '.', //
+            '4', '.', '4', '.', '4', '.', '4', '.', '4', '.', //
+            '4', '.', '4', '.', '4', '.', '4', '.', '4', '.', //
+            '5', '.', '5', '.', '5', '.', '5', '.', '5', '.', //
+            '5', '.', '5', '.', '5', '.', '5', '.', '5', '.', //
+            '6', '.', '6', '.', '6', '.', '6', '.', '6', '.', //
+            '6', '.', '6', '.', '6', '.', '6', '.', '6', '.', //
+            '7', '.', '7', '.', '7', '.', '7', '.', '7', '.', //
+            '7', '.', '7', '.', '7', '.', '7', '.', '7', '.', //
+            '8', '.', '8', '.', '8', '.', '8', '.', '8', '.', //
+            '8', '.', '8', '.', '8', '.', '8', '.', '8', '.', //
+            '9', '.', '9', '.', '9', '.', '9', '.', '9', '.', //
+            '9', '.', '9', '.', '9', '.', '9', '.', '9', '.'  //
+        };
+
+        // These digit generation routines are inspired by James Anhalt's itoa algorithm:
+        // https://github.com/jeaiii/itoa
+        // The main idea is for given n, find y such that floor(10^k * y / 2^32) = n holds,
+        // where k is an appropriate integer depending on the length of n.
+        // For example, if n = 1234567, we set k = 6. In this case, we have
+        // floor(y / 2^32) = 1,
+        // floor(10^2 * ((10^0 * y) mod 2^32) / 2^32) = 23,
+        // floor(10^2 * ((10^2 * y) mod 2^32) / 2^32) = 45, and
+        // floor(10^2 * ((10^4 * y) mod 2^32) / 2^32) = 67.
+        // See https://jk-jeon.github.io/posts/2022/02/jeaiii-algorithm/ for more explanation.
+
+        JKJ_FORCEINLINE static void print_9_digits(std::uint32_t s32, int& exponent,
+                                                   char*& buffer) noexcept {
+            // -- IEEE-754 binary32
+            // Since we do not cut trailing zeros in advance, s32 must be of 6~9 digits
+            // unless the original input was subnormal.
+            // In particular, when it is of 9 digits it shouldn't have any trailing zeros.
+            // -- IEEE-754 binary64
+            // In this case, s32 must be of 7~9 digits unless the input is subnormal,
+            // and it shouldn't have any trailing zeros if it is of 9 digits.
+            if (s32 >= 1'0000'0000) {
+                // 9 digits.
+                // 1441151882 = ceil(2^57 / 1'0000'0000) + 1
+                auto prod = s32 * std::uint64_t(1441151882);
+                prod >>= 25;
+                std::memcpy(buffer, radix_100_head_table + std::uint32_t(prod >> 32) * 2, 2);
+
+                prod = std::uint32_t(prod) * std::uint64_t(100);
+                std::memcpy(buffer + 2, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+                prod = std::uint32_t(prod) * std::uint64_t(100);
+                std::memcpy(buffer + 4, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+                prod = std::uint32_t(prod) * std::uint64_t(100);
+                std::memcpy(buffer + 6, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+                prod = std::uint32_t(prod) * std::uint64_t(100);
+                std::memcpy(buffer + 8, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+
+                exponent += 8;
+                buffer += 10;
+            }
+            else if (s32 >= 100'0000) {
+                // 7 or 8 digits.
+                // 281474978 = ceil(2^48 / 100'0000) + 1
+                auto prod = s32 * std::uint64_t(281474978);
+                prod >>= 16;
+                auto two_digits = std::uint32_t(prod >> 32);
+                // If s32 is of 8 digits, increase the exponent by 7.
+                // Otherwise, increase it by 6.
+                exponent += (6 + unsigned(two_digits >= 10));
+
+                // Write the first digit and the decimal point.
+                std::memcpy(buffer, radix_100_head_table + two_digits * 2, 2);
+                // This third character may be overwritten later but we don't care.
+                buffer[2] = radix_100_table[two_digits * 2 + 1];
+
+                // Remaining 6 digits are all zero?
+                if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 100'0000)) {
+                    // The number of characters actually written is:
+                    //   1, if only the first digit is nonzero, which means that either s32 is of 7
+                    //   digits or it is of 8 digits but the second digit is zero, or
+                    //   3, otherwise.
+                    // Note that buffer[2] is never zero if s32 is of 7 digits, because the input is
+                    // never zero.
+                    buffer += (1 + (unsigned(two_digits >= 10) & unsigned(buffer[2] > '0')) * 2);
+                }
+                else {
+                    // At least one of the remaining 6 digits are nonzero.
+                    // After this adjustment, now the first destination becomes buffer + 2.
+                    buffer += unsigned(two_digits >= 10);
+
+                    // Obtain the next two digits.
+                    prod = std::uint32_t(prod) * std::uint64_t(100);
+                    two_digits = std::uint32_t(prod >> 32);
+                    std::memcpy(buffer + 2, radix_100_table + two_digits * 2, 2);
+
+                    // Remaining 4 digits are all zero?
+                    if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 1'0000)) {
+                        buffer += (3 + unsigned(buffer[3] > '0'));
+                    }
+                    else {
+                        // At least one of the remaining 4 digits are nonzero.
+
+                        // Obtain the next two digits.
+                        prod = std::uint32_t(prod) * std::uint64_t(100);
+                        two_digits = std::uint32_t(prod >> 32);
+                        std::memcpy(buffer + 4, radix_100_table + two_digits * 2, 2);
+
+                        // Remaining 2 digits are all zero?
+                        if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 100)) {
+                            buffer += (5 + unsigned(buffer[5] > '0'));
+                        }
+                        else {
+                            // Obtain the last two digits.
+                            prod = std::uint32_t(prod) * std::uint64_t(100);
+                            two_digits = std::uint32_t(prod >> 32);
+                            std::memcpy(buffer + 6, radix_100_table + two_digits * 2, 2);
+
+                            buffer += (7 + unsigned(buffer[7] > '0'));
+                        }
+                    }
+                }
+            }
+            else if (s32 >= 1'0000) {
+                // 5 or 6 digits.
+                // 429497 = ceil(2^32 / 1'0000)
+                auto prod = s32 * std::uint64_t(429497);
+                auto two_digits = std::uint32_t(prod >> 32);
+
+                // If s32 is of 6 digits, increase the exponent by 5.
+                // Otherwise, increase it by 4.
+                exponent += (4 + unsigned(two_digits >= 10));
+
+                // Write the first digit and the decimal point.
+                std::memcpy(buffer, radix_100_head_table + two_digits * 2, 2);
+                // This third character may be overwritten later but we don't care.
+                buffer[2] = radix_100_table[two_digits * 2 + 1];
+
+                // Remaining 4 digits are all zero?
+                if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 1'0000)) {
+                    // The number of characters actually written is 1 or 3, similarly to the case of
+                    // 7 or 8 digits.
+                    buffer += (1 + (unsigned(two_digits >= 10) & unsigned(buffer[2] > '0')) * 2);
+                }
+                else {
+                    // At least one of the remaining 4 digits are nonzero.
+                    // After this adjustment, now the first destination becomes buffer + 2.
+                    buffer += unsigned(two_digits >= 10);
+
+                    // Obtain the next two digits.
+                    prod = std::uint32_t(prod) * std::uint64_t(100);
+                    two_digits = std::uint32_t(prod >> 32);
+                    std::memcpy(buffer + 2, radix_100_table + two_digits * 2, 2);
+
+                    // Remaining 2 digits are all zero?
+                    if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 100)) {
+                        buffer += (3 + unsigned(buffer[3] > '0'));
+                    }
+                    else {
+                        // Obtain the last two digits.
+                        prod = std::uint32_t(prod) * std::uint64_t(100);
+                        two_digits = std::uint32_t(prod >> 32);
+                        std::memcpy(buffer + 4, radix_100_table + two_digits * 2, 2);
+
+                        buffer += (5 + unsigned(buffer[5] > '0'));
+                    }
+                }
+            }
+            else if (s32 >= 100) {
+                // 3 or 4 digits.
+                // 42949673 = ceil(2^32 / 100)
+                auto prod = s32 * std::uint64_t(42949673);
+                auto two_digits = std::uint32_t(prod >> 32);
+
+                // If s32 is of 4 digits, increase the exponent by 3.
+                // Otherwise, increase it by 2.
+                exponent += (2 + int(two_digits >= 10));
+
+                // Write the first digit and the decimal point.
+                std::memcpy(buffer, radix_100_head_table + two_digits * 2, 2);
+                // This third character may be overwritten later but we don't care.
+                buffer[2] = radix_100_table[two_digits * 2 + 1];
+
+                // Remaining 2 digits are all zero?
+                if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 100)) {
+                    // The number of characters actually written is 1 or 3, similarly to the case of
+                    // 7 or 8 digits.
+                    buffer += (1 + (unsigned(two_digits >= 10) & unsigned(buffer[2] > '0')) * 2);
+                }
+                else {
+                    // At least one of the remaining 2 digits are nonzero.
+                    // After this adjustment, now the first destination becomes buffer + 2.
+                    buffer += unsigned(two_digits >= 10);
+
+                    // Obtain the last two digits.
+                    prod = std::uint32_t(prod) * std::uint64_t(100);
+                    two_digits = std::uint32_t(prod >> 32);
+                    std::memcpy(buffer + 2, radix_100_table + two_digits * 2, 2);
+
+                    buffer += (3 + unsigned(buffer[3] > '0'));
+                }
+            }
+            else {
+                // 1 or 2 digits.
+                // If s32 is of 2 digits, increase the exponent by 1.
+                exponent += int(s32 >= 10);
+
+                // Write the first digit and the decimal point.
+                std::memcpy(buffer, radix_100_head_table + s32 * 2, 2);
+                // This third character may be overwritten later but we don't care.
+                buffer[2] = radix_100_table[s32 * 2 + 1];
+
+                // The number of characters actually written is 1 or 3, similarly to the case of
+                // 7 or 8 digits.
+                buffer += (1 + (unsigned(s32 >= 10) & unsigned(buffer[2] > '0')) * 2);
+            }
+        }
+
+        template <>
+        char* to_chars<float, default_float_traits<float>>(std::uint32_t s32, int exponent,
+                                                           char* buffer) noexcept {
+            // Print significand.
+            print_9_digits(s32, exponent, buffer);
+
+            // Print exponent and return
+            if (exponent < 0) {
+                std::memcpy(buffer, "E-", 2);
+                buffer += 2;
+                exponent = -exponent;
+            }
+            else if (exponent > 0) {
+                buffer[0] = 'E';
+                buffer += 1;
+            }
+            else {
+                return buffer;
+            }
+
+            if (exponent >= 10) {
+                std::memcpy(buffer, &radix_100_table[exponent * 2], 2);
+                buffer += 2;
+            }
+            else {
+                buffer[0] = char('0' + exponent);
+                buffer += 1;
+            }
+
+            return buffer;
+        }
+
+        template <>
+        char* to_chars<double, default_float_traits<double>>(std::uint64_t const significand,
+                                                             int exponent, char* buffer) noexcept {
+            // Print significand by decomposing it into a 9-digit block and a 8-digit block.
+            std::uint32_t first_block, second_block;
+            bool no_second_block;
+
+            if (significand >= 1'0000'0000) {
+                first_block = std::uint32_t(significand / 1'0000'0000);
+                second_block = std::uint32_t(significand) - first_block * 1'0000'0000;
+                exponent += 8;
+                no_second_block = (second_block == 0);
+            }
+            else {
+                first_block = std::uint32_t(significand);
+                no_second_block = true;
+            }
+
+            if (no_second_block) {
+                print_9_digits(first_block, exponent, buffer);
+            }
+            else {
+                // We proceed similarly to print_9_digits(), but since we do not need to remove
+                // trailing zeros, the procedure is a bit simpler.
+                if (first_block >= 1'0000'0000) {
+                    // The input is of 17 digits, thus there should be no trailing zero at all.
+                    // The first block is of 9 digits.
+                    // 1441151882 = ceil(2^57 / 1'0000'0000) + 1
+                    auto prod = first_block * std::uint64_t(1441151882);
+                    prod >>= 25;
+                    std::memcpy(buffer, radix_100_head_table + std::uint32_t(prod >> 32) * 2, 2);
+
+                    prod = std::uint32_t(prod) * std::uint64_t(100);
+                    std::memcpy(buffer + 2, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+                    prod = std::uint32_t(prod) * std::uint64_t(100);
+                    std::memcpy(buffer + 4, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+                    prod = std::uint32_t(prod) * std::uint64_t(100);
+                    std::memcpy(buffer + 6, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+                    prod = std::uint32_t(prod) * std::uint64_t(100);
+                    std::memcpy(buffer + 8, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+
+                    // The second block is of 8 digits.
+                    // 281474978 = ceil(2^48 / 100'0000) + 1
+                    prod = second_block * std::uint64_t(281474978);
+                    prod >>= 16;
+                    prod += 1;
+                    std::memcpy(buffer + 10, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+                    prod = std::uint32_t(prod) * std::uint64_t(100);
+                    std::memcpy(buffer + 12, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+                    prod = std::uint32_t(prod) * std::uint64_t(100);
+                    std::memcpy(buffer + 14, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+                    prod = std::uint32_t(prod) * std::uint64_t(100);
+                    std::memcpy(buffer + 16, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+
+                    exponent += 8;
+                    buffer += 18;
+                }
+                else {
+                    if (first_block >= 100'0000) {
+                        // 7 or 8 digits.
+                        // 281474978 = ceil(2^48 / 100'0000) + 1
+                        auto prod = first_block * std::uint64_t(281474978);
+                        prod >>= 16;
+                        auto two_digits = std::uint32_t(prod >> 32);
+
+                        std::memcpy(buffer, radix_100_head_table + two_digits * 2, 2);
+                        buffer[2] = radix_100_table[two_digits * 2 + 1];
+
+                        exponent += (6 + unsigned(two_digits >= 10));
+                        buffer += unsigned(two_digits >= 10);
+
+                        // Print remaining 6 digits.
+                        prod = std::uint32_t(prod) * std::uint64_t(100);
+                        std::memcpy(buffer + 2, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+                        prod = std::uint32_t(prod) * std::uint64_t(100);
+                        std::memcpy(buffer + 4, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+                        prod = std::uint32_t(prod) * std::uint64_t(100);
+                        std::memcpy(buffer + 6, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+
+                        buffer += 8;
+                    }
+                    else if (first_block >= 1'0000) {
+                        // 5 or 6 digits.
+                        // 429497 = ceil(2^32 / 1'0000)
+                        auto prod = first_block * std::uint64_t(429497);
+                        auto two_digits = std::uint32_t(prod >> 32);
+
+                        std::memcpy(buffer, radix_100_head_table + two_digits * 2, 2);
+                        buffer[2] = radix_100_table[two_digits * 2 + 1];
+
+                        exponent += (4 + unsigned(two_digits >= 10));
+                        buffer += unsigned(two_digits >= 10);
+
+                        // Print remaining 4 digits.
+                        prod = std::uint32_t(prod) * std::uint64_t(100);
+                        std::memcpy(buffer + 2, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+                        prod = std::uint32_t(prod) * std::uint64_t(100);
+                        std::memcpy(buffer + 4, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+
+                        buffer += 6;
+                    }
+                    else if (first_block >= 100) {
+                        // 3 or 4 digits.
+                        // 42949673 = ceil(2^32 / 100)
+                        auto prod = first_block * std::uint64_t(42949673);
+                        auto two_digits = std::uint32_t(prod >> 32);
+
+                        std::memcpy(buffer, radix_100_head_table + two_digits * 2, 2);
+                        buffer[2] = radix_100_table[two_digits * 2 + 1];
+
+                        exponent += (2 + unsigned(two_digits >= 10));
+                        buffer += unsigned(two_digits >= 10);
+
+                        // Print remaining 2 digits.
+                        prod = std::uint32_t(prod) * std::uint64_t(100);
+                        std::memcpy(buffer + 2, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
+
+                        buffer += 4;
+                    }
+                    else {
+                        // 1 or 2 digits.
+                        std::memcpy(buffer, radix_100_head_table + first_block * 2, 2);
+                        buffer[2] = radix_100_table[first_block * 2 + 1];
+
+                        exponent += unsigned(first_block >= 10);
+                        buffer += (2 + unsigned(first_block >= 10));
+                    }
+
+                    // Next, print the second block.
+                    // The second block is of 8 digits, but we may have trailing zeros.
+                    // 281474978 = ceil(2^48 / 100'0000) + 1
+                    auto prod = second_block * std::uint64_t(281474978);
+                    prod >>= 16;
+                    prod += 1;
+                    auto two_digits = std::uint32_t(prod >> 32);
+                    std::memcpy(buffer, radix_100_table + two_digits * 2, 2);
+
+                    // Remaining 6 digits are all zero?
+                    if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 100'0000)) {
+                        buffer += (1 + unsigned(buffer[1] > '0'));
+                    }
+                    else {
+                        // Obtain the next two digits.
+                        prod = std::uint32_t(prod) * std::uint64_t(100);
+                        two_digits = std::uint32_t(prod >> 32);
+                        std::memcpy(buffer + 2, radix_100_table + two_digits * 2, 2);
+
+                        // Remaining 4 digits are all zero?
+                        if (std::uint32_t(prod) <=
+                            std::uint32_t((std::uint64_t(1) << 32) / 1'0000)) {
+                            buffer += (3 + unsigned(buffer[3] > '0'));
+                        }
+                        else {
+                            // Obtain the next two digits.
+                            prod = std::uint32_t(prod) * std::uint64_t(100);
+                            two_digits = std::uint32_t(prod >> 32);
+                            std::memcpy(buffer + 4, radix_100_table + two_digits * 2, 2);
+
+                            // Remaining 2 digits are all zero?
+                            if (std::uint32_t(prod) <=
+                                std::uint32_t((std::uint64_t(1) << 32) / 100)) {
+                                buffer += (5 + unsigned(buffer[5] > '0'));
+                            }
+                            else {
+                                // Obtain the last two digits.
+                                prod = std::uint32_t(prod) * std::uint64_t(100);
+                                two_digits = std::uint32_t(prod >> 32);
+                                std::memcpy(buffer + 6, radix_100_table + two_digits * 2, 2);
+                                buffer += (7 + unsigned(buffer[7] > '0'));
+                            }
+                        }
+                    }
+                }
+            }
+
+            // Print exponent and return
+            if (exponent < 0) {
+                std::memcpy(buffer, "E-", 2);
+                buffer += 2;
+                exponent = -exponent;
+            }
+            else if (exponent > 0) {
+                buffer[0] = 'E';
+                buffer += 1;
+            }
+            else {
+                return buffer;
+            }
+
+            if (exponent >= 100) {
+                // d1 = exponent / 10; d2 = exponent % 10;
+                // 6554 = ceil(2^16 / 10)
+                auto prod = std::uint32_t(exponent) * std::uint32_t(6554);
+                auto d1 = prod >> 16;
+                prod = std::uint16_t(prod) * std::uint32_t(5); // * 10
+                auto d2 = prod >> 15;                          // >> 16
+                std::memcpy(buffer, &radix_100_table[d1 * 2], 2);
+                buffer[2] = char('0' + d2);
+                buffer += 3;
+            }
+            else if (exponent >= 10) {
+                std::memcpy(buffer, &radix_100_table[exponent * 2], 2);
+                buffer += 2;
+            }
+            else {
+                buffer[0] = char('0' + exponent);
+                buffer += 1;
+            }
+
+            return buffer;
+        }
+    }
+}
+
diff --git a/server/gc.h b/server/gc.h
new file mode 100644
index 0000000..7bc8d8d
--- /dev/null
+++ b/server/gc.h
@@ -0,0 +1,63 @@
+#ifndef __AQ_USE_THREADEDGC__
+#include <atomic>
+class GC {
+private:;
+
+	size_t max_slots, 
+		   interval, forced_clean, 
+		   forceclean_timer = 0;
+	uint64_t max_size;
+	bool running, alive;
+//  ptr, dealloc, ref, sz
+	uint32_t threshould;
+	void *q, *q_back;
+	void* handle;
+	std::atomic<uint32_t> slot_pos;
+	std::atomic<uint32_t> alive_cnt;
+	std::atomic<uint64_t> current_size;
+	volatile bool lock;
+	// maybe use volatile std::thread::id instead
+protected:
+	void acquire_lock();
+	void release_lock();
+	void gc();
+	void daemon();
+	void start_deamon();
+	void terminate_daemon();
+
+public:
+	void reg(void* v, uint32_t sz = 1, 
+			void(*f)(void*) = free
+		);
+
+	GC(
+		uint64_t max_size = 0xfffffff, uint32_t max_slots = 4096, 
+		uint32_t interval = 10000, uint32_t forced_clean = 1000000,
+		uint32_t threshould = 64 //one seconds
+	) : max_size(max_size), max_slots(max_slots), 
+		interval(interval), forced_clean(forced_clean), 
+		threshould(threshould) {
+
+		start_deamon();
+		GC::gc_handle = this;
+	} // 256 MB
+
+	~GC(){
+		terminate_daemon();
+	}
+	static GC* gc_handle;
+    constexpr static void(*_free) (void*) = free;
+};
+
+#else
+class GC {
+public:
+	GC(uint32_t) = default;
+	void reg(
+		void* v, uint32_t = 0, 
+		void(*f)(void*) = free
+	) const { f(v); }
+	static GC* gc;
+    constexpr static void(*_free) (void*) = free;
+}
+#endif
diff --git a/server/gc.hpp b/server/gc.hpp
deleted file mode 100644
index 4c66060..0000000
--- a/server/gc.hpp
+++ /dev/null
@@ -1,53 +0,0 @@
-#pragma once
-#include <vector_type>
-#include <utility>
-#include <thread>
-#include <chrono>
-class GC {
-	template<class T>
-	using vector = vector_type<T>;
-	template<class ...T>
-	using tuple = std::tuple<T...>;
-	size_t current_size, max_size, interval, forced_clean;
-	bool running, alive;
-//  ptr, dealloc, ref, sz
-	vector<tuple<void*, void (*)(void*)>> q;
-	std::thread handle;
-	void gc()
-	{
-		
-	}
-	void reg(void* v, uint32_t ref, uint32_t sz, 
-		void(*f)(void*) = [](void* v) {free (v); }) {
-		current_size += sz;
-		if (current_size > max_size)
-			gc();
-		q.push_back({ v, f });
-	}
-	void daemon() {
-		using namespace std::chrono;
-		while (alive) {
-			if (running) {
-				gc();
-				std::this_thread::sleep_for(microseconds(interval));
-			}
-			else {
-				std::this_thread::sleep_for(10ms);
-			}
-		}
-	}
-	void start_deamon() {
-		handle = std::thread(&daemon);
-		alive = true;
-	}
-	void terminate_daemon() {
-		running = false;
-		alive = false;
-		using namespace std::chrono;
-
-		if (handle.joinable()) {
-			std::this_thread::sleep_for(microseconds(1000 + std::max(static_cast<size_t>(10000), interval)));
-			handle.join();
-		}
-	}
-};
\ No newline at end of file
diff --git a/server/jeaiii_to_text.h b/server/jeaiii_to_text.h
new file mode 100644
index 0000000..63fd9ee
--- /dev/null
+++ b/server/jeaiii_to_text.h
@@ -0,0 +1,121 @@
+#pragma once 
+// Copyright (c) 2022 James Edward Anhalt III - https://github.com/jeaiii/itoa
+using u32 = decltype(0xffffffff);
+using u64 = decltype(0xffffffffffffffff);
+
+static_assert(u32(-1) > 0, "u32 must be unsigned");
+static_assert(u32(0xffffffff) + u32(1) == u32(0), "u32 must be 32 bits");
+static_assert(u64(-1) > 0, "u64 must be unsigned");
+static_assert(u64(0xffffffffffffffff) + u32(1) == u32(0), "u64 must be 64 bits");
+
+constexpr auto digits_00_99 =
+    "00010203040506070809" "10111213141516171819" "20212223242526272829" "30313233343536373839"	"40414243444546474849"
+    "50515253545556575859" "60616263646566676869" "70717273747576777879" "80818283848586878889"	"90919293949596979899";
+
+struct pair { char t, o; };
+
+#define JEAIII_W(I, U) *(pair*)&b[I] = *(pair*)&digits_00_99[(U) * 2]
+#define JEAIII_A(I, N) t = (u64(1) << (32 + N / 5 * N * 53 / 16)) / u32(1e##N) + 1 + N / 6 - N / 8, t *= u, t >>= N / 5 * N * 53 / 16, t += N / 6 * 4, JEAIII_W(I, t >> 32)
+#define JEAIII_S(I) b[I] = char(u64(10) * u32(t) >> 32) + '0'
+#define JEAIII_D(I) t = u64(100) * u32(t), JEAIII_W(I, t >> 32)
+
+#define JEAIII_C0(I) b[I] = char(u) + '0'
+#define JEAIII_C1(I) JEAIII_W(I, u)
+#define JEAIII_C2(I) JEAIII_A(I, 1), JEAIII_S(I + 2)
+#define JEAIII_C3(I) JEAIII_A(I, 2), JEAIII_D(I + 2)
+#define JEAIII_C4(I) JEAIII_A(I, 3), JEAIII_D(I + 2), JEAIII_S(I + 4)
+#define JEAIII_C5(I) JEAIII_A(I, 4), JEAIII_D(I + 2), JEAIII_D(I + 4)
+#define JEAIII_C6(I) JEAIII_A(I, 5), JEAIII_D(I + 2), JEAIII_D(I + 4), JEAIII_S(I + 6)
+#define JEAIII_C7(I) JEAIII_A(I, 6), JEAIII_D(I + 2), JEAIII_D(I + 4), JEAIII_D(I + 6)
+#define JEAIII_C8(I) JEAIII_A(I, 7), JEAIII_D(I + 2), JEAIII_D(I + 4), JEAIII_D(I + 6), JEAIII_S(I + 8)
+#define JEAIII_C9(I) JEAIII_A(I, 8), JEAIII_D(I + 2), JEAIII_D(I + 4), JEAIII_D(I + 6), JEAIII_D(I + 8)
+
+#define JEAIII_L(N, A, B) u < u32(1e##N) ? A : B
+#define JEAIII_L09(F) JEAIII_L(2, JEAIII_L(1, F(0), F(1)), JEAIII_L(6, JEAIII_L(4, JEAIII_L(3, F(2), F(3)), JEAIII_L(5, F(4), F(5))), JEAIII_L(8, JEAIII_L(7, F(6), F(7)), JEAIII_L(9, F(8), F(9)))))
+#define JEAIII_L03(F) JEAIII_L(2, JEAIII_L(1, F(0), F(1)), JEAIII_L(3, F(2), F(3)))
+
+#define JEAIII_K(N) (JEAIII_C##N(0), b + N + 1)
+#define JEAIII_KX(N) (JEAIII_C##N(0), u = x, JEAIII_C7(N + 1), b + N + 9)
+#define JEAIII_KYX(N) (JEAIII_C##N(0), u = y, JEAIII_C7(N + 1), u = x, JEAIII_C7(N + 9), b + N + 17)
+
+template<bool B, class T, class F> struct _cond { using type = F; };
+template<class T, class F> struct _cond<true, T, F> { using type = T; };
+template<bool B, class T, class F> using cond = typename _cond<B, T, F>::type;
+
+template<class T> inline char* to_text_from_integer(char* b, T i)
+{
+    u64 t = u64(i);
+
+    if (i < T(0))
+        t = u64(0) - t, b[0] = '-', ++b;
+
+    u32 u = cond<T(1) != T(2), cond<sizeof(T) != 1, cond<sizeof(T) != sizeof(short), u32, unsigned short>, unsigned char>, bool>(t);
+
+    // if our input type fits in 32bits, or its value does, ctreat as 32bit (the line above ensures the compiler can still know the range limits of the input type)
+    // and optimize out cases for small integer types (if only c++ had a builtin way to get the unsigned type from a signed type)
+    if (sizeof(i) <= sizeof(u) || u == t)
+        return JEAIII_L09(JEAIII_K);
+
+    u32 x = t % 100000000u;
+    u = u32(t /= 100000000u);
+
+    // t / 10^8 (fits in 32 bit), t % 10^8 -> ~17.5 digits
+    if (u == t)
+        return JEAIII_L09(JEAIII_KX);
+
+    // t / 10^16 (1-4 digits), t / 10^8 % 10^8, t % 10^8
+    u32 y = t % 100000000u;
+    u = u32(t / 100000000u);
+    return JEAIII_L03(JEAIII_KYX);
+}
+
+inline char* to_text(char text[], signed char i) { return to_text_from_integer(text, i); }
+inline char* to_text(char text[], unsigned char i) { return to_text_from_integer(text, i); }
+inline char* to_text(char text[], short i) { return to_text_from_integer(text, i); }
+inline char* to_text(char text[], unsigned short i) { return to_text_from_integer(text, i); }
+inline char* to_text(char text[], int i) { return to_text_from_integer(text, i); }
+inline char* to_text(char text[], unsigned int i) { return to_text_from_integer(text, i); }
+inline char* to_text(char text[], long i) { return to_text_from_integer(text, i); }
+inline char* to_text(char text[], unsigned long i) { return to_text_from_integer(text, i); }
+inline char* to_text(char text[], long long i) { return to_text_from_integer(text, i); }
+inline char* to_text(char text[], unsigned long long i) { return to_text_from_integer(text, i); }
+
+// Copyright (c) 2022 Bill Sun
+
+//#if defined(SIZEOF___INT128) || (defined(SIZEOF___INT128_T) && defined(SIZEOF___UINT128_T))
+constexpr static __uint128_t _10_19 = 10000000000000000000ull, 
+    _10_37 = _10_19*_10_19 / 10;
+
+template<class T>
+char* jeaiii_i128(char* buf, T v){
+    if constexpr (std::is_signed_v<T>) {
+        if (v < 0){
+            *(buf++) = '0';
+            v = -v;
+        }
+    }
+    if (v > _10_37){
+        uint8_t vv = uint8_t(v/_10_37);
+        // vv <<= 1;
+        // if (vv < 20)
+        //     *buf ++ = digits_00_99[vv + 1];
+        // else{
+        //     memcpy(buf, digits_00_99 + vv, 2);
+        //     buf += 2;
+        // }  
+    
+        *(buf++) = vv%10 + '0';
+        vv/=10;
+        if (vv) {
+            *buf = *(buf-1);
+            *(buf++-1) = vv + '0';
+        }
+    }
+
+    if (v > _10_19)
+        buf = to_text(buf, uint64_t((v/_10_19) % _10_19));
+    
+    buf = to_text(buf, uint64_t(v % _10_19));
+    return buf;
+}
+// #endif
diff --git a/server/io.cpp b/server/libaquery.cpp
similarity index 59%
rename from server/io.cpp
rename to server/libaquery.cpp
index 878c0b6..93a03da 100644
--- a/server/io.cpp
+++ b/server/libaquery.cpp
@@ -1,20 +1,20 @@
 #include "pch_msc.hpp"
 
 #include "io.h"
-#include "table.h"
 #include <limits>
 
 #include <chrono>
 #include <ctime>
 
 #include "utils.h"
+#include "libaquery.h"
 #include <random>
 
 char* gbuf = nullptr;
 
 void setgbuf(char* buf) {
-	static char* b = 0;
-	if (buf == 0)
+	static char* b = nullptr;
+	if (buf == nullptr)
 		gbuf = b;
 	else {
 		gbuf = buf;
@@ -63,6 +63,7 @@ T getInt(const char*& buf){
 	}
 	return ret;
 }
+
 template<class T> 
 char* intToString(T val, char* buf){
 
@@ -275,6 +276,44 @@ inline const char* str(const bool& v) {
 	return v ? "true" : "false";
 }
 
+
+Context::Context() {
+    current.memory_map = new std::unordered_map<void*, deallocator_t>;
+    init_session();
+}
+
+Context::~Context() {
+    auto memmap = (std::unordered_map<void*, deallocator_t>*) this->current.memory_map;
+    delete memmap;
+}
+
+void Context::init_session(){
+    if (log_level == LOG_INFO){
+        memset(&(this->current.stats), 0, sizeof(Session::Statistic));
+    }
+    auto memmap = (std::unordered_map<void*, deallocator_t>*) this->current.memory_map;
+    memmap->clear();
+}
+
+void Context::end_session(){
+    auto memmap = (std::unordered_map<void*, deallocator_t>*) this->current.memory_map;
+    for (auto& mem : *memmap) {
+        mem.second(mem.first);
+    }
+    memmap->clear();
+}
+
+void* Context::get_module_function(const char* fname){
+    auto fmap = static_cast<std::unordered_map<std::string, void*>*>
+        (this->module_function_maps);
+    // printf("%p\n", fmap->find("mydiv")->second);
+    //  for (const auto& [key, value] : *fmap){
+    //      printf("%s %p\n", key.c_str(), value);
+    //  }
+    auto ret = fmap->find(fname);
+    return ret == fmap->end() ? nullptr : ret->second;
+}
+
 // template<typename _Ty>
 // inline void vector_type<_Ty>::out(uint32_t n, const char* sep) const
 // {
@@ -288,3 +327,195 @@ inline const char* str(const bool& v) {
 // 	}
 // 	std::cout << ')';
 // }
+
+#include "gc.h"
+#include <utility>
+#include <thread>
+#ifndef __AQ_USE_THREADEDGC__
+
+struct gcmemory_t{
+	void* memory;
+	void (*deallocator)(void*);
+};
+
+using memoryqueue_t = gcmemory_t*;
+void GC::acquire_lock() {
+	// auto this_tid = std::this_thread::get_id();
+	// while(lock != this_tid)
+	// {
+	// 	while(lock != this_tid && lock != std::thread::id()) {
+	// 		std::this_thread::sleep_for(std::chrono::milliseconds(0));
+	// 	}
+	// 	lock = this_tid;
+	// }
+}
+
+void GC::release_lock(){
+	// lock = std::thread::id();
+}
+
+void GC::gc()
+{
+	auto _q = static_cast<memoryqueue_t>(q);
+	auto _q_back = static_cast<memoryqueue_t>(q_back);
+	if (slot_pos == 0)
+		return;
+	auto t = _q;
+	lock = true;
+	while(alive_cnt != 0);
+	q = _q_back;
+	uint32_t _slot = slot_pos;
+	slot_pos = 0;
+	current_size = 0;
+	lock = false;
+	q_back = t;
+
+	for(uint32_t i = 0; i < _slot; ++i){
+		if (_q[i].memory != nullptr && _q[i].deallocator != nullptr)
+			_q[i].deallocator(_q[i].memory);
+	}
+	memset(_q, 0, sizeof(gcmemory_t) * _slot);
+	running = false;
+}
+
+void GC::daemon() {
+	using namespace std::chrono;
+
+	while (alive) {
+		if (running) {
+			if (current_size - max_size > 0 || 
+				forceclean_timer > forced_clean) 
+			{
+				gc();
+				forceclean_timer = 0;
+			}
+			std::this_thread::sleep_for(microseconds(interval));
+			forceclean_timer += interval;
+		}
+		else {
+			std::this_thread::sleep_for(10ms);
+			forceclean_timer += 10000;
+		}
+	}
+}
+
+void GC::start_deamon() {
+	q = new gcmemory_t[max_slots << 1];
+	q_back = new memoryqueue_t[max_slots << 1];
+	lock = false;
+	slot_pos = 0;
+	current_size = 0;
+	alive_cnt = 0;
+	alive = true;
+	handle = new std::thread(&GC::daemon, this);
+}
+
+void GC::terminate_daemon() {
+	running = false;
+	alive = false;
+	decltype(auto) _handle = static_cast<std::thread*>(handle);
+	delete[] static_cast<memoryqueue_t>(q);
+	delete[] static_cast<memoryqueue_t>(q_back);
+	using namespace std::chrono;
+	std::this_thread::sleep_for(microseconds(1000 + std::max(static_cast<size_t>(10000), interval)));
+
+	if (_handle->joinable()) {
+		_handle->join();
+	}
+	delete _handle;
+}
+
+void GC::reg(void* v, uint32_t sz, void(*f)(void*)) { //~ 40ns expected v. free ~ 75ns
+	if (v == nullptr || f == nullptr)
+		return;
+	if (sz < threshould){
+		f(v);
+		return;
+	}
+	auto _q = static_cast<memoryqueue_t>(q);
+	while(lock);
+	++alive_cnt;
+	current_size += sz;
+	auto _slot = (slot_pos += 1);
+	_q[_slot] = {v, f};
+	--alive_cnt;
+	running = true;
+}
+
+#endif
+
+GC* GC::gc_handle = nullptr;
+
+#include "dragonbox/dragonbox_to_chars.hpp" 
+
+
+template<>
+char*
+aq_to_chars<float>(void* value, char* buffer) { 
+    return jkj::dragonbox::to_chars_n(*static_cast<float*>(value), buffer);
+}
+template<>
+char*
+aq_to_chars<double>(void* value, char* buffer) { 
+    return jkj::dragonbox::to_chars_n(*static_cast<double*>(value), buffer);
+}
+
+template<>
+inline char*
+aq_to_chars<bool>(void* value, char* buffer) {
+	if (*static_cast<bool*>(value)){
+		memcpy(buffer, "true", 4);
+		return buffer + 4;
+	}
+	else{
+		memcpy(buffer, "false", 5);
+		return buffer + 5;
+	}
+}
+
+template<>
+char*
+aq_to_chars<char*>(void* value, char* buffer) {
+	const auto src = *static_cast<char**>(value);
+	const auto len = strlen(src);
+	memcpy(buffer, src, len);
+	return buffer + len;
+}
+
+template<>
+char*
+aq_to_chars<types::date_t>(void* value, char* buffer) {
+	const auto& src = *static_cast<types::date_t*>(value);
+	buffer = to_text(buffer, src.year);
+	*buffer++ = '-';
+	buffer = to_text(buffer, src.month);
+	*buffer++ = '-';
+	buffer = to_text(buffer, src.day);
+	return buffer;
+}
+
+template<>
+char*
+aq_to_chars<types::time_t>(void* value, char* buffer) {
+	const auto& src = *static_cast<types::time_t*>(value);
+	buffer = to_text(buffer, src.hours);
+	*buffer++ = ':';
+	buffer = to_text(buffer, src.minutes);
+	*buffer++ = ':';
+	buffer = to_text(buffer, src.seconds);
+	*buffer++ = ':';
+	buffer = to_text(buffer, src.ms);
+	return buffer;
+}
+
+template<>
+char*
+aq_to_chars<types::timestamp_t>(void* value, char* buffer) {
+	auto& src = *static_cast<types::timestamp_t*>(value);
+	buffer = aq_to_chars<types::date_t>(static_cast<void*>(&src.date), buffer);
+	*buffer++ = ' ';
+	buffer = aq_to_chars<types::time_t>(static_cast<void*>(&src.time), buffer);
+	return buffer;
+}
+
+
diff --git a/server/libaquery.h b/server/libaquery.h
index 551d205..cc0b5a9 100644
--- a/server/libaquery.h
+++ b/server/libaquery.h
@@ -1,8 +1,37 @@
 #ifndef _AQUERY_H
 #define _AQUERY_H
 
-#include "table.h"
+#ifdef __INTELLISENSE__
+	#define __AQUERY_ITC_USE_SEMPH__
+	#define THREADING
+	#define __AQ_THREADED_GC__
+#endif
+
 #include <unordered_map>
+#include <chrono>
+class aq_timer {
+private:
+	std::chrono::high_resolution_clock::time_point now;
+public:
+	aq_timer(){
+		now = std::chrono::high_resolution_clock::now();
+	}
+	void reset(){
+		now = std::chrono::high_resolution_clock::now();
+	}
+	long long elapsed(){
+		long long ret = (std::chrono::high_resolution_clock::now() - now).count();
+		reset();
+		return ret;
+	}
+	long long lap() const{
+		long long ret = (std::chrono::high_resolution_clock::now() - now).count();
+		return ret;
+	}
+};
+
+#include "table.h"
+
 
 enum Log_level {
 	LOG_INFO,
@@ -15,9 +44,16 @@ enum Backend_Type {
 	BACKEND_MonetDB,
 	BACKEND_MariaDB
 };
+
+struct QueryStats{
+	long long monet_time;
+	long long postproc_time;
+};
 struct Config{
-    int running, new_query, server_mode,
-	 	backend_type, has_dll, exec_time, n_buffers;
+    int running, new_query, server_mode, 
+	 	backend_type, has_dll, 
+		n_buffers;
+	QueryStats stats;
     int buffer_sizes[];
 };
 
@@ -47,7 +83,10 @@ struct Context{
 #ifdef THREADING
 	void* thread_pool;
 #endif	
-	printf_type print = printf;
+#ifdef __AQ_THREADED_GC__
+	void* gc;
+#endif
+	printf_type print = &printf;
 	Context();
 	virtual ~Context();
 	template <class ...Types>
@@ -67,6 +106,8 @@ struct Context{
     std::unordered_map<const char*, uColRef *> cols;
 };
 
+
+
 #ifdef _WIN32
 #define __DLLEXPORT__  __declspec(dllexport) __stdcall 
 #else 
@@ -76,4 +117,40 @@ struct Context{
 #define __AQEXPORT__(_Ty) extern "C" _Ty __DLLEXPORT__ 
 typedef void (*deallocator_t) (void*);
 
+
+#include <type_traits>
+#include "jeaiii_to_text.h"
+
+template<class T>
+inline std::enable_if_t<std::is_integral_v<T>, char *> 
+aq_to_chars(void* value, char* buffer) { 
+	return to_text(buffer, *static_cast<T*>(value));
+}
+
+template<class T>
+inline std::enable_if_t<!std::is_integral_v<T>, char *> 
+aq_to_chars(void* value, char* buffer) {
+	return buffer;
+}
+
+#ifdef __SIZEOF_INT128__
+template<>
+inline char*
+aq_to_chars<__int128_t>(void* value, char* buffer) {
+    return jeaiii_i128<__int128_t>(buffer, *static_cast<__int128_t*>(value));
+}
+
+template<>
+inline char*
+aq_to_chars<__uint128_t>(void* value, char* buffer) {
+    return jeaiii_i128<__uint128_t>(buffer, *static_cast<__uint128_t*>(value));
+}
+#endif
+
+template<> char* aq_to_chars<float>(void* , char*);
+template<> char* aq_to_chars<double>(void* , char*);
+template<> char* aq_to_chars<char*>(void* , char*);
+template<> char* aq_to_chars<types::date_t>(void* , char*);
+template<> char* aq_to_chars<types::time_t>(void* , char*);
+template<> char* aq_to_chars<types::timestamp_t>(void* , char*);
 #endif
diff --git a/server/monetdb_conn.cpp b/server/monetdb_conn.cpp
index b29f1a8..b3fbd1e 100644
--- a/server/monetdb_conn.cpp
+++ b/server/monetdb_conn.cpp
@@ -2,12 +2,14 @@
 
 #include "libaquery.h"
 #include <cstdio>
+#include <string>
 #include "monetdb_conn.h"
 #include "monetdbe.h"
 #include "table.h"
+
 #undef static_assert
 
-const char* monetdbe_type_str[] = {
+constexpr const char* monetdbe_type_str[] = {
 	"monetdbe_bool", "monetdbe_int8_t", "monetdbe_int16_t", "monetdbe_int32_t", "monetdbe_int64_t",
 #ifdef HAVE_HGE
 	"monetdbe_int128_t",
@@ -20,7 +22,7 @@ const char* monetdbe_type_str[] = {
 	"monetdbe_type_unknown"
 } ;
 
-const unsigned char monetdbe_type_szs[] = {
+inline constexpr static unsigned char monetdbe_type_szs[] = {
     sizeof(monetdbe_column_bool::null_value), sizeof(monetdbe_column_int8_t::null_value), 
     sizeof(monetdbe_column_int16_t::null_value), sizeof(monetdbe_column_int32_t::null_value), 
     sizeof(monetdbe_column_int64_t::null_value),
@@ -36,7 +38,19 @@ const unsigned char monetdbe_type_szs[] = {
     1
 };
 
+namespace types{
+    constexpr const Type_t monetdbe_type_aqtypes[] = {
+        ABOOL, AINT8, AINT16, AINT32, AINT64, 
+#ifdef HAVE_HGE
+        AINT128,
+#endif
+        AUINT64, AFLOAT, ADOUBLE, ASTR, 
+        // blob?
+        AINT64,
+        ADATE, ATIME, ATIMESTAMP, ERROR
 
+    };
+}
 
 Server::Server(Context* cxt){
     if (cxt){
@@ -80,7 +94,7 @@ void Server::connect(Context *cxt){
     else{
         if(server)
             free(server);
-        this->server = 0;
+        this->server = nullptr;
         status = false;
         puts(ret == -1 ? "Allocation Error." : "Internal Database Error.");
     }
@@ -103,7 +117,7 @@ void Server::exec(const char* q){
 
 bool Server::haserror(){
     if (last_error){
-        last_error = 0;
+        last_error = nullptr;
         return true;
     }
     else{
@@ -111,12 +125,53 @@ bool Server::haserror(){
     }
 }
 
+
+void Server::print_results(const char* sep, const char* end){
+
+    if (!haserror()){
+        auto _res = static_cast<monetdbe_result*> (res);
+        const auto& ncols = _res->ncols;
+        monetdbe_column** cols = static_cast<monetdbe_column**>(malloc(sizeof(monetdbe_column*) * ncols));
+        std::string* printf_string = new std::string[ncols];
+        const char** col_data = static_cast<const char**> (malloc(sizeof(char*) * ncols));
+        uint8_t* szs = static_cast<uint8_t*>(alloca(ncols));
+        std::string header_string = "";
+        const char* err_msg = nullptr;
+        for(uint32_t i = 0; i < ncols; ++i){
+            err_msg = monetdbe_result_fetch(_res, &cols[i], i);
+            printf_string[i] = 
+                std::string(types::printf_str[types::monetdbe_type_aqtypes[cols[i]->type]]) 
+                + (i < ncols - 1 ? sep : "");
+            puts(printf_string[i].c_str());
+            puts(monetdbe_type_str[cols[i]->type]);
+            col_data[i] = static_cast<char *>(cols[i]->data);
+            szs [i] = monetdbe_type_szs[cols[i]->type];
+            header_string = header_string + cols[i]->name + sep + '|' + sep;
+        }
+        const size_t l_sep = strlen(sep) + 1;
+		if (header_string.size() - l_sep >= 0)
+			header_string.resize(header_string.size() - l_sep);
+        header_string += end + std::string(header_string.size(), '=') + end;
+        fputs(header_string.c_str(), stdout);
+        for(uint64_t i = 0; i < cnt; ++i){
+            for(uint32_t j = 0; j < ncols; ++j){
+                printf(printf_string[j].c_str(), *((void**)col_data[j]));
+                col_data[j] += szs[j];
+            }
+            fputs(end, stdout);
+        }
+        free(cols);
+        delete[] printf_string;
+        free(col_data);
+    }
+}
+
 void Server::close(){
     if(this->server){
         auto server = static_cast<monetdbe_database*>(this->server);
         monetdbe_close(*(server));
         free(server);
-        this->server = 0;
+        this->server = nullptr;
     }
 }
 
@@ -130,7 +185,7 @@ void* Server::getCol(int col_idx){
             auto _ret_col = static_cast<monetdbe_column*>(this->ret_col);
             cnt = _ret_col->count;
              printf("Dbg: Getting col %s, type: %s\n", 
-                 _ret_col->name, monetdbe_type_str[_ret_col->type]);
+                _ret_col->name, monetdbe_type_str[_ret_col->type]);
             return _ret_col->data;
         }
         else{
@@ -140,7 +195,7 @@ void* Server::getCol(int col_idx){
     else{
         puts("Error: No result.");
     }
-    return 0;
+    return nullptr;
 }
 
 Server::~Server(){
@@ -149,10 +204,10 @@ Server::~Server(){
 
 bool Server::havehge() {
 #if defined(_MONETDBE_LIB_) and defined(HAVE_HGE)
-    puts("true");
+    // puts("true");
     return HAVE_HGE;
 #else
-    puts("false");
+    // puts("false");
     return false;
 #endif
 }
diff --git a/server/monetdb_conn.h b/server/monetdb_conn.h
index 467cb2c..9894218 100644
--- a/server/monetdb_conn.h
+++ b/server/monetdb_conn.h
@@ -22,6 +22,9 @@ struct Server{
     void close();
     bool haserror();
     static bool havehge();
+    void test(const char*);
+    void print_results(const char* sep = " ", const char* end = "\n");
+    friend void print_monetdb_results(Server* srv, const char* sep, const char* end, int limit);
     ~Server();
 };
 
diff --git a/server/server.cpp b/server/server.cpp
index 2105545..6514093 100644
--- a/server/server.cpp
+++ b/server/server.cpp
@@ -1,47 +1,137 @@
 #include "pch_msc.hpp"
 
-#include "../csv.h"
 #include <iostream>
 #include <string>
 #include <chrono>
+#include <thread>
 
 #include "libaquery.h"
 #include "monetdb_conn.h"
 #ifdef THREADING
 #include "threading.h"
 #endif
+
 #ifdef _WIN32
 #include "winhelper.h"
 #else 
 #include <dlfcn.h>
 #include <fcntl.h>
 #include <sys/mman.h>
+#include <atomic>
+
+// fast numeric to string conversion
+#include "jeaiii_to_text.h"
+#include "dragonbox/dragonbox_to_chars.h"
+
 struct SharedMemory
 {
+    std::atomic<bool> a;
     int hFileMap;
     void* pData;
-    SharedMemory(const char* fname) {
+    explicit SharedMemory(const char* fname) {
         hFileMap = open(fname, O_RDWR, 0);
         if (hFileMap != -1)
-            pData = mmap(NULL, 8, PROT_READ | PROT_WRITE, MAP_SHARED, hFileMap, 0);
+            pData = mmap(nullptr, 8, PROT_READ | PROT_WRITE, MAP_SHARED, hFileMap, 0);
         else 
-            pData = 0;
+            pData = nullptr;
     }
-    void FreeMemoryMap() {
+    void FreeMemoryMap() const {
+        // automatically unmapped in posix
+    }
+};
 
+#ifndef __USE_STD_SEMAPHORE__
+#ifdef __APPLE__
+#include <dispatch/dispatch.h>
+class A_Semaphore {
+private:
+	dispatch_semaphore_t native_handle;
+public:
+	A_Semaphore(bool v = false) {
+		native_handle = dispatch_semaphore_create(v);
+	}
+	void acquire() {
+        // puts("acquire");
+		dispatch_semaphore_wait(native_handle, DISPATCH_TIME_FOREVER);
+	}
+	void release() {
+        // puts("release");
+		dispatch_semaphore_signal(native_handle);
+	}
+	~A_Semaphore() {
+	}
+};
+#else
+#include <semaphore.h>
+class A_Semaphore {
+private:
+	sem_t native_handle;
+public:
+	A_Semaphore(bool v = false) {
+		sem_init(&native_handle, v, 1);
+	}
+	void acquire() {
+		sem_wait(&native_handle);
+	}
+	void release() {
+		sem_post(&native_handle);
+	}
+	~A_Semaphore() {
+		sem_destroy(&native_handle);
+	}
+};
+#endif
+#endif
+#endif
+
+#ifdef __USE_STD_SEMAPHORE__
+#define __AQUERY_ITC_USE_SEMPH__
+#include <semaphore>
+class A_Semaphore {
+private:
+    std::binary_semaphore native_handle;
+public:
+    A_Semaphore(bool v = false) {
+        native_handle = std::binary_semaphore(v);
+    }
+    void acquire() {
+        native_handle.acquire();
     }
+    void release() {
+        native_handle.release();
+    }
+    ~A_Semaphore() { }
 };
 #endif
 
-#include "aggregations.h"
+#ifdef __AQUERY_ITC_USE_SEMPH__
+A_Semaphore prompt{ true }, engine{ false };
+#define PROMPT_ACQUIRE() prompt.acquire()
+#define PROMPT_RELEASE() prompt.release()
+#define ENGINE_ACQUIRE() engine.acquire()
+#define ENGINE_RELEASE() engine.release()
+#else
+#define PROMPT_ACQUIRE() 
+#define PROMPT_RELEASE() std::this_thread::sleep_for(std::chrono::nanoseconds(0))
+#define ENGINE_ACQUIRE() 
+#define ENGINE_RELEASE() 
+#endif
+
 typedef int (*code_snippet)(void*);
 typedef void (*module_init_fn)(Context*);
 
-int test_main();
 
 int n_recv = 0;
 char** n_recvd = nullptr;
 
+__AQEXPORT__(void) wait_engine(){
+    PROMPT_ACQUIRE();
+}
+
+__AQEXPORT__(void) wake_engine(){
+    ENGINE_RELEASE();
+}
+
 extern "C" void __DLLEXPORT__ receive_args(int argc, char**argv){
     n_recv = argc;
     n_recvd = argv;
@@ -71,42 +161,99 @@ __AQEXPORT__(bool) have_hge(){
 #endif
 }
 
-Context::Context() {
-    current.memory_map = new std::unordered_map<void*, deallocator_t>;
-    init_session();
-}
+using prt_fn_t = char* (*)(void*, char*);
 
-Context::~Context() {
-    auto memmap = (std::unordered_map<void*, deallocator_t>*) this->current.memory_map;
-    delete memmap;
-}
 
-void Context::init_session(){
-    if (log_level == LOG_INFO){
-        memset(&(this->current.stats), 0, sizeof(Session::Statistic));
-    }
-    auto memmap = (std::unordered_map<void*, deallocator_t>*) this->current.memory_map;
-    memmap->clear();
-}
+constexpr prt_fn_t monetdbe_prtfns[] = {
+	aq_to_chars<bool>, aq_to_chars<int8_t>, aq_to_chars<int16_t>, aq_to_chars<int32_t>, 
+	aq_to_chars<int64_t>,
+#if __SIZEOF_INT128__
+	aq_to_chars<__int128_t>, 
+#endif
+	aq_to_chars<size_t>, aq_to_chars<float>, aq_to_chars<double>,
+	aq_to_chars<char*>, aq_to_chars<std::nullptr_t>,
+	aq_to_chars<types::date_t>, aq_to_chars<types::time_t>, aq_to_chars<types::timestamp_t>,
+
+	// should be last:
+	aq_to_chars<std::nullptr_t>
+};
+
+#include "monetdbe.h"
+inline constexpr static unsigned char monetdbe_type_szs[] = {
+    sizeof(monetdbe_column_bool::null_value), sizeof(monetdbe_column_int8_t::null_value), 
+    sizeof(monetdbe_column_int16_t::null_value), sizeof(monetdbe_column_int32_t::null_value), 
+    sizeof(monetdbe_column_int64_t::null_value),
+#ifdef __SIZEOF_INT128__
+    sizeof(monetdbe_column_int128_t::null_value),
+#endif
+    sizeof(monetdbe_column_size_t::null_value), sizeof(monetdbe_column_float::null_value),
+    sizeof(monetdbe_column_double::null_value),
+    sizeof(monetdbe_column_str::null_value), sizeof(monetdbe_column_blob::null_value),
+    sizeof(monetdbe_data_date), sizeof(monetdbe_data_time), sizeof(monetdbe_data_timestamp),
+
+    // should be last:
+    1
+};
+constexpr uint32_t output_buffer_size = 65536;
+void print_monetdb_results(Server* srv, const char* sep = " ", const char* end = "\n", 
+    uint32_t limit = std::numeric_limits<uint32_t>::max()) {
+    if (!srv->haserror() && srv->cnt && limit){
+        char buffer[output_buffer_size];
+        auto _res = static_cast<monetdbe_result*> (srv->res);
+        const auto& ncols = _res->ncols;
+        monetdbe_column** cols = static_cast<monetdbe_column**>(malloc(sizeof(monetdbe_column*) * ncols));
+        prt_fn_t *prtfns = (prt_fn_t*) alloca(sizeof(prt_fn_t) * ncols);
+        char** col_data = static_cast<char**> (alloca(sizeof(char*) * ncols));
+        uint8_t* szs = static_cast<uint8_t*>(alloca(ncols));
+        std::string header_string = "";
+        const char* err_msg = nullptr;
+        const size_t l_sep = strlen(sep);
+        const size_t l_end = strlen(end);
+        char* _buffer = buffer;
+
+        for(uint32_t i = 0; i < ncols; ++i){
+            err_msg = monetdbe_result_fetch(_res, &cols[i], i);
+            if(err_msg) { goto cleanup; }
+            col_data[i] = static_cast<char *>(cols[i]->data);
+            prtfns[i] = monetdbe_prtfns[cols[i]->type];
+            szs [i] = monetdbe_type_szs[cols[i]->type];
+            header_string = header_string + cols[i]->name + sep + '|' + sep;
+        }
 
-void Context::end_session(){
-    auto memmap = (std::unordered_map<void*, deallocator_t>*) this->current.memory_map;
-    for (auto& mem : *memmap) {
-        mem.second(mem.first);
+        if(l_sep > 512 || l_end > 512) {
+            puts("Error: separator or end string too long");
+            goto cleanup;
+        }
+		if (header_string.size() - l_sep - 1>= 0)
+			header_string.resize(header_string.size() - l_sep - 1);
+        header_string += end + std::string(header_string.size(), '=') + end;
+        fputs(header_string.c_str(), stdout);
+        for(uint64_t i = 0; i < srv->cnt; ++i){
+            for(uint32_t j = 0; j < ncols; ++j){
+                //copy the field to buf
+                _buffer = prtfns[j](col_data[j], _buffer);
+                if (j != ncols - 1){
+                    memcpy(_buffer, sep, l_sep);
+                    _buffer += l_sep;
+                }
+                col_data[j] += szs[j];
+            }
+            memcpy(_buffer, end, l_end);
+            _buffer += l_end;
+            if(output_buffer_size - (_buffer - buffer) <= 1024){
+                fwrite(buffer, 1, _buffer - buffer, stdout);
+                _buffer = buffer;
+            }
+        }
+        memcpy(_buffer, end, l_end);
+        _buffer += l_end;
+        if (_buffer != buffer)
+            fwrite(buffer, 1, _buffer - buffer, stdout);
+cleanup:        
+        free(cols);
     }
-    memmap->clear();
 }
 
-void* Context::get_module_function(const char* fname){
-    auto fmap = static_cast<std::unordered_map<std::string, void*>*>
-        (this->module_function_maps);
-    // printf("%p\n", fmap->find("mydiv")->second);
-    //  for (const auto& [key, value] : *fmap){
-    //      printf("%s %p\n", key.c_str(), value);
-    //  }
-    auto ret = fmap->find(fname);
-    return ret == fmap->end() ? nullptr : ret->second;
-}
 
 void initialize_module(const char* module_name, void* module_handle, Context* cxt){
     auto _init_module = reinterpret_cast<module_init_fn>(dlsym(module_handle, "init_session"));
@@ -119,15 +266,16 @@ void initialize_module(const char* module_name, void* module_handle, Context* cx
 }
 
 int dll_main(int argc, char** argv, Context* cxt){
+    aq_timer timer;
     Config *cfg = reinterpret_cast<Config *>(argv[0]);
     std::unordered_map<std::string, void*> user_module_map;
-    if (cxt->module_function_maps == 0)
+    if (cxt->module_function_maps == nullptr)
         cxt->module_function_maps = new std::unordered_map<std::string, void*>();
     auto module_fn_map = 
         static_cast<std::unordered_map<std::string, void*>*>(cxt->module_function_maps);
     
     auto buf_szs = cfg->buffer_sizes;
-    void** buffers = (void**)malloc(sizeof(void*) * cfg->n_buffers);
+    void** buffers = (void**) malloc (sizeof(void*) * cfg->n_buffers);
     for (int i = 0; i < cfg->n_buffers; i++) 
         buffers[i] = static_cast<void *>(argv[i + 1]);
 
@@ -135,19 +283,28 @@ int dll_main(int argc, char** argv, Context* cxt){
     cxt->cfg = cfg;
     cxt->n_buffers = cfg->n_buffers;
     cxt->sz_bufs = buf_szs;
-    cxt->alt_server = NULL;
-
+    if (cfg->backend_type == BACKEND_MonetDB && cxt->alt_server == nullptr)
+    {
+        auto alt_server = new Server(cxt);
+        alt_server->exec("SELECT '**** WELCOME TO AQUERY++! ****';");
+        puts(*(const char**)(alt_server->getCol(0)));
+        cxt->alt_server = alt_server;
+    }
     while(cfg->running){
+        ENGINE_ACQUIRE();
         if (cfg->new_query) {
-            void *handle = 0;
-            void *user_module_handle = 0;
+            cfg->stats.postproc_time = 0;
+            cfg->stats.monet_time = 0;
+
+            void *handle = nullptr;
+            void *user_module_handle = nullptr;
             if (cfg->backend_type == BACKEND_MonetDB){
-                if (cxt->alt_server == 0)
+                if (cxt->alt_server == nullptr)
                     cxt->alt_server = new Server(cxt);
                 Server* server = reinterpret_cast<Server*>(cxt->alt_server);
                 if(n_recv > 0){
                     if (cfg->backend_type == BACKEND_AQuery || cfg->has_dll) {
-                        handle = dlopen("./dll.so", RTLD_LAZY);
+                        handle = dlopen("./dll.so", RTLD_NOW);
                     }
                     for (const auto& module : user_module_map){
                         initialize_module(module.first.c_str(), module.second, cxt);
@@ -159,14 +316,18 @@ int dll_main(int argc, char** argv, Context* cxt){
                         switch(n_recvd[i][0]){
                         case 'Q': // SQL query for monetdbe
                             {
+                                timer.reset();
                                 server->exec(n_recvd[i] + 1);
-                                printf("Exec Q%d: %s", i, n_recvd[i]);
+                                cfg->stats.monet_time += timer.elapsed();
+                                // printf("Exec Q%d: %s", i, n_recvd[i]);
                             }
                             break;
                         case 'P': // Postprocessing procedure 
                             if(handle && !server->haserror()) {
                                 code_snippet c = reinterpret_cast<code_snippet>(dlsym(handle, n_recvd[i]+1));
+                                timer.reset();
                                 c(cxt);
+                                cfg->stats.postproc_time += timer.elapsed();
                             }
                             break;
                         case 'M': // Load Module
@@ -193,12 +354,21 @@ int dll_main(int argc, char** argv, Context* cxt){
                                 //printf("F::: %p\n", module_fn_map->find("mydiv") != module_fn_map->end() ? module_fn_map->find("mydiv")->second : nullptr);
                             }
                             break;
+                        case 'O':
+                            {
+                                if(!server->haserror()){
+                                    timer.reset();
+                                    print_monetdb_results(server);        
+                                    cfg->stats.postproc_time += timer.elapsed();
+                                }
+                            }
+                            break;
                         case 'U': // Unload Module
                             {
                                 auto mname = n_recvd[i] + 1;
                                 auto it = user_module_map.find(mname);
                                 if (user_module_handle == it->second)
-                                    user_module_handle = 0;
+                                    user_module_handle = nullptr;
                                 dlclose(it->second);
                                 user_module_map.erase(it);
                             }
@@ -207,8 +377,9 @@ int dll_main(int argc, char** argv, Context* cxt){
                     }
                     if(handle) {
                         dlclose(handle);
-                        handle = 0;
+                        handle = nullptr;
                     }
+                    printf("%lld, %lld", cfg->stats.monet_time, cfg->stats.postproc_time);
                     cxt->end_session();
                     n_recv = 0;
                 }
@@ -217,7 +388,7 @@ int dll_main(int argc, char** argv, Context* cxt){
                 }   
                 else{
                     server->last_error = nullptr;
-                    continue;
+                    //goto finalize;
                 } 
             }
             
@@ -230,9 +401,11 @@ int dll_main(int argc, char** argv, Context* cxt){
             if (handle) dlclose(handle);
             cfg->new_query = 0;
         }
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+        //puts(cfg->running? "true": "false");
+//finalize:
+        PROMPT_RELEASE();
     }
-
+    
     return 0;
 }
 
@@ -263,20 +436,21 @@ extern "C" int __DLLEXPORT__ main(int argc, char** argv) {
 #ifdef __AQ_BUILD_LAUNCHER__
    return launcher(argc, argv);
 #endif
-   puts("running");
+   // puts("running");
    Context* cxt = new Context();
-   cxt->log("%d %s\n", argc, argv[1]);
+   // cxt->log("%d %s\n", argc, argv[1]);
 
 #ifdef THREADING
     auto tp = new ThreadPool();
     cxt->thread_pool = tp;
 #endif
     
+#ifdef __AQ_THREADED_GC__
+    cxt->gc_thread = new std::thread(gc_thread, cxt);
+#endif    
    const char* shmname;
    if (argc < 0)
         return dll_main(argc, argv, cxt);
-   else if (argc <= 1)
-        return test_main();
    else
        shmname = argv[1];
    SharedMemory shm = SharedMemory(shmname);
@@ -310,56 +484,3 @@ extern "C" int __DLLEXPORT__ main(int argc, char** argv) {
    return 0;
 }
 
-#include "utils.h"
-#include "table_ext_monetdb.hpp"
-int test_main()
-{
-    Context* cxt = new Context();
-    if (cxt->alt_server == 0)
-        cxt->alt_server = new Server(cxt);
-    Server* server = reinterpret_cast<Server*>(cxt->alt_server);
-
-    const char* qs[]= {
-        "QCREATE TABLE trade(stocksymbol INT, time INT, quantity INT, price INT);",
-        "QCOPY OFFSET 2 INTO trade FROM  'w:/gg/AQuery++/data/trade_numerical.csv'  ON SERVER    USING DELIMITERS  ',';",
-        "QSELECT stocksymbol, (SUM((quantity * price)) / SUM(quantity)) AS weighted_average  FROM trade GROUP BY stocksymbol  ;",
-        "Pdll_5lYrMY",
-        "QSELECT stocksymbol, price  FROM trade ORDER BY time  ;",
-        "Pdll_4Sg6Ri",
-        "QSELECT stocksymbol, quantity, price  FROM trade ORDER BY time  ;",
-        "Pdll_5h4kL2",
-        "QSELECT stocksymbol, price  FROM trade ORDER BY time  ;",
-        "Pdll_7tEWCO",
-        "QSELECT query_c.weighted_moving_averages, query_c.stocksymbol  FROM query_c;",
-        "Pdll_7FCPnF"
-    };
-    n_recv = sizeof(qs)/(sizeof (char*));
-	n_recvd = const_cast<char**>(qs);
-            void* handle = 0;
-                    handle = dlopen("./dll.so", RTLD_LAZY);
-                    cxt->init_session();
-                    for (int i = 0; i < n_recv; ++i)
-                    {
-                        //printf("%s, %d\n", n_recvd[i], n_recvd[i][0] == 'Q');
-                        switch (n_recvd[i][0]) {
-                        case 'Q': // SQL query for monetdbe
-                        {
-                            server->exec(n_recvd[i] + 1);
-                            printf("Exec Q%d: %s\n", i, n_recvd[i]);
-                        }
-                        break;
-                        case 'P': // Postprocessing procedure 
-                            if (handle && !server->haserror()) {
-                                code_snippet c = reinterpret_cast<code_snippet>(dlsym(handle, n_recvd[i] + 1));
-                                c(cxt);
-                            }
-                            break;
-                        }
-                    }
-                    n_recv = 0;
-
-    //static_assert(std::is_same_v<decltype(fill_integer_array<5, 1>()), std::integer_sequence<bool, 1,1,1,1,1>>, "");
-    
-    return 0;
-}
-
diff --git a/server/table.h b/server/table.h
index 56c7a4b..af26ae7 100644
--- a/server/table.h
+++ b/server/table.h
@@ -9,6 +9,7 @@
 #include <string>
 #include <algorithm>
 #include <cstdarg>
+#include <vector>
 #include "io.h"
 #include "hasher.h"
 
@@ -74,7 +75,16 @@ public:
 		this->container = (_Ty*)container;
 		this->name = name;
 	}
-	template<template <typename ...> class VT, typename T>
+	template<template <typename> class VT, typename T>
+	void initfrom(VT<T>&& v, const char* name = "") {
+		ty = types::Types<_Ty>::getType();
+		this->size = v.size;
+		this->capacity = v.capacity;
+		this->container = (_Ty*)(v.container);
+		this->name = name;
+		v.capacity = 0;
+	}
+	template<template <typename> class VT, typename T>
 	void initfrom(const VT<T>& v, const char* name = "") {
 		ty = types::Types<_Ty>::getType();
 		this->size = v.size;
@@ -82,6 +92,21 @@ public:
 		this->container = (_Ty*)(v.container);
 		this->name = name;
 	}
+	void initfrom(vectortype_cstorage v, const char* name = "") {
+		ty = types::Types<_Ty>::getType();
+		this->size = v.size;
+		this->capacity = v.capacity;
+		this->container = (_Ty*)v.container;
+		this->name = name;
+	}
+	template<typename T>
+	void initfrom(const T& v, const char* name = "") {
+		ty = types::Types<_Ty>::getType();
+		this->size = 0;
+		this->capacity = 0;
+		this->emplace_back(v);
+		this->name = name;
+	}
 	template <class T>
 	ColRef<_Ty>& operator =(ColRef<T>&& vt) {
 		this->container = (_Ty*)vt.container;
@@ -115,8 +140,16 @@ public:
 	ColView<_Ty> operator [](const vector_type<uint32_t>& idxs) const {
 		return ColView<_Ty>(*this, idxs);
 	}
-
-	void out(uint32_t n = 4, const char* sep = " ") const {
+	vector_type<_Ty> operator [](const std::vector<bool>& idxs) const {
+		vector_type<_Ty> ret (this->size);
+		uint32_t i = 0;
+		for(const auto& f : idxs){
+			if(f) ret.emplace_back(this->operator[](i));
+			++i;
+		}
+		return ret;
+	}
+	void out(uint32_t n = 1000, const char* sep = " ") const {
 		const char* more = "";
 		if (n < this->size)
 			more = " ... ";
@@ -180,7 +213,7 @@ template<>
 class ColRef<void> : public ColRef<int> {};
 
 template<typename _Ty>
-class ColView {
+class ColView : public vector_base<_Ty> {
 public:
 	typedef ColRef<_Ty> Decayed_t;
 	const uint32_t size;
@@ -219,7 +252,7 @@ public:
 	Iterator_t end() const {
 		return Iterator_t(idxs.end(), orig);
 	}
-	void out(uint32_t n = 4, const char* sep = " ") const {
+	void out(uint32_t n = 1000, const char* sep = " ") const {
 		n = n > size ? size : n;
 		std::cout << '(';
 		for (uint32_t i = 0; i < n; ++i)
@@ -414,19 +447,27 @@ struct TableInfo {
 	}
 	template <int ...cols>
 	void print2(const char* __restrict sep = ",", const char* __restrict end = "\n",
-		const vector_type<uint32_t>* __restrict view = nullptr, FILE* __restrict fp = nullptr) const {
+		const vector_type<uint32_t>* __restrict view = nullptr, 
+		FILE* __restrict fp = nullptr, uint32_t limit = std::numeric_limits<uint32_t>::max()
+		) const {
 
 		std::string printf_string =
 			generate_printf_string<typename std::tuple_element<cols, tuple_type>::type ...>(sep, end);
+		// puts(printf_string.c_str());
 		std::string header_string = std::string();
 		constexpr static int a_cols[] = { cols... };
-		for (int i = 0; i < sizeof...(cols); ++i)
-			header_string += std::string(this->colrefs[a_cols[i]].name) + sep;
-		const size_t l_sep = strlen(sep);
-		if (header_string.size() - l_sep >= 0)
-			header_string.resize(header_string.size() - l_sep);
-
-		const auto& prt_loop = [&fp, &view, &printf_string, *this](const auto& f) {
+		if (fp == nullptr){
+			header_string = get_header_string(sep, end);
+			header_string.resize(header_string.size() - strlen(end));
+		}
+		else {
+			for (int i = 0; i < sizeof...(cols); ++i)
+				header_string += std::string(this->colrefs[a_cols[i]].name) + sep;
+			const size_t l_sep = strlen(sep);
+			if (header_string.size() - l_sep >= 0)
+				header_string.resize(header_string.size() - l_sep);
+		}
+		const auto& prt_loop = [&fp, &view, &printf_string, *this, &limit](const auto& f) {
 #ifdef __AQ__HAS__INT128__			
 			constexpr auto num_hge = count_type<__int128_t, __uint128_t>((tuple_type*)(0));
 #else
@@ -442,16 +483,21 @@ struct TableInfo {
 				+ 1 // padding for msvc not allowing empty arrays
 			];
 			setgbuf(cbuf);
-			if (view)
-				for (uint32_t i = 0; i < view->size; ++i) {
+			
+			if (view){
+				uint32_t outsz = limit > view->size ? view->size : limit;
+				for (uint32_t i = 0; i < outsz; ++i) {
 					print2_impl<cols...>(f, (*view)[i], printf_string.c_str());
 					setgbuf();
 				}
-			else
-				for (uint32_t i = 0; i < colrefs[0].size; ++i) {
+			}
+			else{
+				uint32_t outsz = limit > colrefs[0].size ? colrefs[0].size : limit;
+				for (uint32_t i = 0; i < outsz; ++i) {
 					print2_impl<cols...>(f, i, printf_string.c_str());
 					setgbuf();
 				}
+			}
 		};
 
 		if (fp)
@@ -466,15 +512,17 @@ struct TableInfo {
 	}
 	template <int ...vals> struct applier {
 		inline constexpr static void apply(const TableInfo<Types...>& t, const char* __restrict sep = ",", const char* __restrict end = "\n",
-			const vector_type<uint32_t>* __restrict view = nullptr, FILE* __restrict fp = nullptr)
+			const vector_type<uint32_t>* __restrict view = nullptr, FILE* __restrict fp = nullptr, uint32_t limit = std::numeric_limits<uint32_t>::max()
+			) 
 		{
-			t.template print2<vals ...>(sep, end, view, fp);
+			t.template print2<vals ...>(sep, end, view, fp, limit);
 		}
 	};
 
 	inline void printall(const char* __restrict sep = ",", const char* __restrict end = "\n",
-		const vector_type<uint32_t>* __restrict view = nullptr, FILE* __restrict fp = nullptr) {
-		applyIntegerSequence<sizeof...(Types), applier>::apply(*this, sep, end, view, fp);
+		const vector_type<uint32_t>* __restrict view = nullptr, FILE* __restrict fp = nullptr, 
+		uint32_t limit = std::numeric_limits<uint32_t>::max() ) const {
+		applyIntegerSequence<sizeof...(Types), applier>::apply(*this, sep, end, view, fp, limit);
 	}
 
 	TableInfo<Types...>* rename(const char* name) {
@@ -643,7 +691,9 @@ template <class ...Types>
 template <size_t j>
 inline typename std::enable_if<j == sizeof...(Types) - 1, void>::type
 TableInfo<Types ...>::print_impl(const uint32_t& i, const char* __restrict sep) const {
-	std::cout << (get<j>(*this))[i];
+	decltype(auto) t = (get<j>(*this))[i];
+//	print(t);
+	std::cout << t;
 }
 
 template<class ...Types>
@@ -658,6 +708,7 @@ inline typename std::enable_if < j < sizeof...(Types) - 1, void>::type
 template<class ...Types>
 inline void TableInfo<Types...>::print(const char* __restrict sep, const char* __restrict end) const {
 
+	//printall(sep, end);
 	std::string header_string = get_header_string(sep, end);
 	std::cout << header_string.c_str();
 
@@ -669,51 +720,56 @@ inline void TableInfo<Types...>::print(const char* __restrict sep, const char* _
 		std::cout << end;
 	}
 }
+
+// use std::is_base_of here and all vt classes should derive from vector_base
 template <class T1,
 			template<typename> class VT,
 			class TRet>
-using test_vt_support = typename std::enable_if_t<std::is_same_v<VT<T1>, ColRef<T1>> || 
-				std::is_same_v<VT<T1>, ColView<T1>> || 
-				std::is_same_v<VT<T1>, vector_type<T1>>, TRet>;
+using test_vt_support = typename std::enable_if_t<
+					std::is_base_of_v<vector_base<T1>, VT<T1>>, 
+					TRet>;
 
-template <class T1, class T2,
-			template<typename> class VT>
-using get_autoext_type = test_vt_support<T1, VT, 
-		decayed_t<VT, typename types::Coercion<T1, T2>::type>>;
 
-template <class T1, class T2,
-			template<typename> class VT>
-using get_long_type = test_vt_support<T1, VT, 
-		decayed_t<VT, types::GetLongType<typename types::Coercion<T1, T2>::type>>>;
+template <class T1, class T2, template<typename> class VT, 
+			test_vt_support<T1, VT, void>* = nullptr>
+using get_autoext_type = 
+		decayed_t<VT, typename types::Coercion<T1, T2>::type>;
 
-template <class T1, class T2,
-			template<typename> class VT>
-using get_fp_type = test_vt_support<T1, VT, 
-		decayed_t<VT, types::GetFPType<typename types::Coercion<T1, T2>::type>>>;
+template <class T1, class T2, template<typename> class VT, 
+		test_vt_support<T1, VT, void>* = nullptr>
+using get_long_type = 
+		decayed_t<VT, types::GetLongType<typename types::Coercion<T1, T2>::type>>;
+
+template <class T1, class T2, template<typename> class VT,
+		test_vt_support<T1, VT, void>* = nullptr>
+using get_fp_type = 
+		decayed_t<VT, types::GetFPType<typename types::Coercion<T1, T2>::type>>;
 
 template <class T1, 
 			template<typename> class VT, template<typename> class VT2,
 			class TRet>
-using test_vt_support2 = typename std::enable_if_t<(std::is_same_v<VT<T1>, ColRef<T1>> || 
-				std::is_same_v<VT<T1>, ColView<T1>> || 
-				std::is_same_v<VT<T1>, vector_type<T1>>) &&
-				(std::is_same_v<VT2<T1>, ColRef<T1>> || 
-				std::is_same_v<VT2<T1>, ColView<T1>> || 
-				std::is_same_v<VT2<T1>, vector_type<T1>>), TRet >;
+using test_vt_support2 = typename std::enable_if_t<
+				std::is_base_of_v<vector_base<T1>, VT<T1>> &&
+				std::is_base_of_v<vector_base<T1>, VT2<T1>>, 
+				TRet >;
+
 template <class T1, class T2,
-			template<typename> class VT, template<typename> class VT2>
-using get_autoext_type2 = test_vt_support2<T1, VT, VT2,
-		decayed_t<VT, typename types::Coercion<T1, T2>::type>>;
+			template<typename> class VT, template<typename> class VT2, 
+			test_vt_support2<T1, VT, VT2, void>* = nullptr >
+using get_autoext_type2 = 
+		decayed_t<VT, typename types::Coercion<T1, T2>::type>;
 
 template <class T1, class T2,
-			template<typename> class VT, template<typename> class VT2>
-using get_long_type2 = test_vt_support2<T1, VT, VT2,
-		decayed_t<VT, types::GetLongType<typename types::Coercion<T1, T2>::type>>>;
+			template<typename> class VT, template<typename> class VT2, 
+			test_vt_support2<T1, VT, VT2, void>* = nullptr >
+using get_long_type2 = 
+		decayed_t<VT, types::GetLongType<typename types::Coercion<T1, T2>::type>>;
 
 template <class T1, class T2,
-			template<typename> class VT, template<typename> class VT2>
-using get_fp_type2 = test_vt_support2<T1, VT, VT2,
-		decayed_t<VT, types::GetFPType<typename types::Coercion<T1, T2>::type>>>;
+			template<typename> class VT, template<typename> class VT2, 
+			test_vt_support2<T1, VT, VT2, void>* = nullptr >
+using get_fp_type2 = 
+		decayed_t<VT, types::GetFPType<typename types::Coercion<T1, T2>::type>>;
 
 template <class T1, class T2, template<typename> class VT, template<typename> class VT2>
 get_autoext_type2<T1, T2, VT, VT2>
@@ -835,7 +891,6 @@ VT<bool> operator >(const T2& lhs, const VT<T1>& rhs) {
 }
 
 
-
 template <class ...Types>
 void print(const TableInfo<Types...>& v, const char* delimiter = " ", const char* endline = "\n") {
 	v.print(delimiter, endline);
diff --git a/server/table_ext_monetdb.hpp b/server/table_ext_monetdb.hpp
index c128559..3c93c3f 100644
--- a/server/table_ext_monetdb.hpp
+++ b/server/table_ext_monetdb.hpp
@@ -45,16 +45,16 @@ void TableInfo<Ts ...>::monetdb_append_table(void* srv, const char* alt_name) {
 	puts("getcols...");
 	uint32_t cnt = 0;
 	const auto get_col = [&monetdbe_cols, &i, *this, &gc_vecs, &cnt](auto v) {
-		printf("%d %d\n", i, (ColRef<void>*)v - colrefs);
+		// printf("%d %d\n", i, (ColRef<void>*)v - colrefs);
 		monetdbe_cols[i++] = (monetdbe_column*)v->monetdb_get_col(gc_vecs, cnt);
 	};
 	(get_col((ColRef<Ts>*)(colrefs + i)), ...);
 	puts("getcols done");
-	for(int i = 0; i < sizeof...(Ts); ++i)
-	{
-		printf("no:%d name: %s count:%d data: %p type:%d \n", 
-		i, monetdbe_cols[i]->name, monetdbe_cols[i]->count, monetdbe_cols[i]->data, monetdbe_cols[i]->type);
-	}
+	// for(int i = 0; i < sizeof...(Ts); ++i)
+	// {
+	// 	printf("no:%d name: %s count:%d data: %p type:%d \n", 
+	// 	i, monetdbe_cols[i]->name, monetdbe_cols[i]->count, monetdbe_cols[i]->data, monetdbe_cols[i]->type);
+	// }
 	std::string create_table_str = "CREATE TABLE IF NOT EXISTS ";
 	create_table_str += alt_name;
 	create_table_str += " (";
diff --git a/server/types.h b/server/types.h
index 3ae14b6..20cb0fc 100644
--- a/server/types.h
+++ b/server/types.h
@@ -29,27 +29,37 @@ inline constexpr size_t aq_szof<void> = 0;
 template <class T1, class T2>
 struct aqis_same_impl {
 	constexpr static bool value = 
+		
 		std::conditional_t<
-			std::is_signed_v<T1> == std::is_signed_v<T2>,
+			std::is_same_v<T1, bool> || std::is_same_v<T2, bool>, 
+			Cond(
+				(std::is_same_v<T1, bool> && std::is_same_v<T2, bool>), 
+				std::true_type, 
+				std::false_type
+			),
 			Cond(
-				std::is_floating_point_v<T1> == std::is_floating_point_v<T2>,
+				std::is_signed_v<T1> == std::is_signed_v<T2>,
 				Cond(
-					aq_szof<T1> == aq_szof<T2>, // deal with sizeof(void)
-					std::true_type,
+					std::is_floating_point_v<T1> == std::is_floating_point_v<T2>,
+					Cond(
+						aq_szof<T1> == aq_szof<T2>, // deal with sizeof(void)
+						std::true_type,
+						std::false_type
+					),
 					std::false_type
 				),
 				std::false_type
-			),
-			std::false_type
+			)
 		>::value;
 };
-
+// make sure size_t/ptr_t and the corresponding integer types are the same
 template <class T1, class T2, class ...Ts>
 constexpr bool aqis_same = aqis_same_impl<T1, T2>::value &&
 aqis_same<T2, Ts...>;
 
 template <class T1, class T2>
 constexpr bool aqis_same<T1, T2> = aqis_same_impl<T1, T2>::value;
+
 namespace types {
 	enum Type_t {
 		AINT32, AFLOAT, ASTR, ADOUBLE, ALDOUBLE, AINT64, AINT128, AINT16, ADATE, ATIME, AINT8,
diff --git a/server/utils.h b/server/utils.h
index 3cd7b2a..6a7eb07 100644
--- a/server/utils.h
+++ b/server/utils.h
@@ -1,14 +1,18 @@
 #pragma once
+
 #include <ctime>
+#include <type_traits>
+#include <string>
+
 #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L)
 constexpr static bool cpp_17 = true;
 #else
 constexpr static bool cpp_17 = false;
 #endif
+
 template <class T>
 inline const char* str(const T& v) {
 	return "";
 }
 
-#include<string>
 extern std::string base62uuid(int l = 6);
diff --git a/server/vector_type.hpp b/server/vector_type.hpp
index 9b03e89..f817051 100644
--- a/server/vector_type.hpp
+++ b/server/vector_type.hpp
@@ -17,13 +17,16 @@
 #include "types.h"
 
 #pragma pack(push, 1)
+template<class T>
+struct vector_base {};
+
 struct vectortype_cstorage{
 	void* container;
 	unsigned int size, capacity;
 };
 
 template <typename _Ty>
-class vector_type {
+class vector_type : public vector_base<_Ty>{
 public:
 	typedef vector_type<_Ty> Decayed_t;
 	void inline _copy(const vector_type<_Ty>& vt) {
@@ -71,9 +74,15 @@ public:
 	constexpr explicit vector_type(const vector_type<_Ty>& vt) noexcept : capacity(0) {
 		_copy(vt);
 	}
+	constexpr vector_type(vector_type<_Ty>& vt) noexcept : capacity(0) {
+		_move(std::move(vt));
+	}
 	constexpr vector_type(vector_type<_Ty>&& vt) noexcept : capacity(0) {
 		_move(std::move(vt));
 	}
+	vector_type(vectortype_cstorage vt) noexcept : capacity(vt.capacity), size(vt.size), container((_Ty*)vt.container) {
+		out(10);
+	};
 	// size >= capacity ==> readonly vector
 	constexpr vector_type(const uint32_t size, void* data) : 
 		size(size), capacity(0), container(static_cast<_Ty*>(data)) {}
@@ -159,6 +168,10 @@ public:
 		grow();
 		container[size++] = _val;
 	}
+	void emplace_back(_Ty& _val) {
+		grow();
+		container[size++] = std::move(_val);
+	}
 	void emplace_back(_Ty&& _val) {
 		grow();
 		container[size++] = std::move(_val);
@@ -255,7 +268,7 @@ public:
 		}
 		size = this->size + dist;
 	}
-	inline void out(uint32_t n = 4, const char* sep = " ") const
+	inline void out(uint32_t n = 4000, const char* sep = " ") const
 	{
 		const char* more = "";
 		if (n < this->size)
diff --git a/server/winhelper.cpp b/server/winhelper.cpp
index 48bfa86..d59f58b 100644
--- a/server/winhelper.cpp
+++ b/server/winhelper.cpp
@@ -41,4 +41,20 @@ void SharedMemory::FreeMemoryMap()
         if (this->hFileMap)
             CloseHandle(this->hFileMap);
 }
+
+#ifndef __USE_STD_SEMAPHORE__
+A_Semaphore::A_Semaphore(bool v = false) {
+    native_handle = CreateSemaphore(NULL, v, 1, NULL);
+}
+void A_Semaphore::acquire() {
+    WaitForSingleObject(native_handle, INFINITE);
+}
+void A_Semaphore::release() {
+    ReleaseSemaphore(native_handle, 1, NULL);
+}
+A_Semaphore::~A_Semaphore() {
+    CloseHandle(native_handle);
+}
+#endif
+
 #endif
diff --git a/server/winhelper.h b/server/winhelper.h
index df9231e..f39c0b9 100644
--- a/server/winhelper.h
+++ b/server/winhelper.h
@@ -14,5 +14,17 @@ struct SharedMemory
     SharedMemory(const char*);
     void FreeMemoryMap();
 };
+
+#ifndef __USE_STD_SEMAPHORE__
+class A_Semaphore {
+private:
+	void* native_handle;
+public:
+	A_Semaphore();
+	void acquire();
+	void release();
+	~A_Semaphore();
+};
 #endif
+
 #endif
diff --git a/tests/complex_data.a b/tests/complex_data.a
new file mode 100644
index 0000000..e08da4b
--- /dev/null
+++ b/tests/complex_data.a
@@ -0,0 +1,3 @@
+create table f (a float, b vecfloat, c int)
+load complex data infile 'data/test_complex.csv' into table f fields terminated by ',' element terminated by ';'
+select * from f
\ No newline at end of file
diff --git a/tests/datagen_jose/Time.cpp b/tests/datagen_jose/Time.cpp
index 5f852cb..d5130d5 100644
--- a/tests/datagen_jose/Time.cpp
+++ b/tests/datagen_jose/Time.cpp
@@ -18,7 +18,7 @@
 //
 ///////////////////////////////////////////////////////////////////////////////
 #include <stdio.h>
-#include "Time.H"
+#include "Time.hpp"
 
 Time::Time(char *startTime_)
 {
diff --git a/tests/dt.a b/tests/dt.a
index 5a52ac1..69a2bca 100644
--- a/tests/dt.a
+++ b/tests/dt.a
@@ -1,21 +1,21 @@
 LOAD MODULE FROM "./libirf.so"
-FUNCTIONS (
-    newtree(height:int, f:int64, sparse:vecint, forget:double, maxf:int64, noclasses:int64, e:int, r:int64, rb:int64) -> bool,
-    additem(X:vecdouble, y:int64, size:int64) -> bool,
-    fit() -> bool,
-    predict() -> vecint
-);
-create table tb(x int);
-create table tb2(x double, y double, z double);
-insert into tb values (0);
-insert into tb values (0);
-insert into tb values (0);
-select newtree(5, 3, tb.x, 0, 3, 2, 0, 100, 1) from tb;
-insert into tb2 values (1, 0, 1);
-insert into tb2 values (0, 1, 1);
-insert into tb2 values (1, 1, 1);
-select additem(tb2.x, 1, 3) from tb2;
-select additem(tb2.y, 0, -1) from tb2;
-select additem(tb2.z, 1, -1) from tb2;
-select fit();
-select predict();
+ FUNCTIONS (
+     newtree(height:int, f:int64, sparse:vecint, forget:double, maxf:int64, noclasses:int64, e:int, r:int64, rb:int64) -> bool,
+     additem(X:vecdouble, y:int64, size:int64) -> bool,
+     fit() -> bool,
+     predict() -> vecint
+ );
+ create table tb(x int);
+ create table tb2(x double, y double, z double);
+ insert into tb values (0);
+ insert into tb values (0);
+ insert into tb values (0);
+ select newtree(5, 3, tb.x, 0, 3, 2, 0, 100, 1) from tb;
+ insert into tb2 values (1, 0, 1);
+ insert into tb2 values (0, 1, 1);
+ insert into tb2 values (1, 1, 1);
+ select additem(tb2.x, 1, 3) from tb2;
+ select additem(tb2.y, 0, -1) from tb2;
+ select additem(tb2.z, 1, -1) from tb2;
+ select fit();
+ select predict();
diff --git a/tests/dt2.a b/tests/dt2.a
new file mode 100644
index 0000000..bd2bde7
--- /dev/null
+++ b/tests/dt2.a
@@ -0,0 +1,22 @@
+LOAD MODULE FROM "./libirf.so"
+ FUNCTIONS (
+     newtree(height:int, f:int64, sparse:vecint, forget:double, maxf:int64, noclasses:int64, e:int, r:int64, rb:int64) -> bool,
+     fit(X:vecvecdouble, y:vecint64) -> bool,
+     predict(X:vecvecdouble) -> vecint
+ );
+
+ create table source(x1 double, x2 double, x3 double, x4 double, x5 int64);
+ load data infile "data/benchmark" into table source fields terminated by ",";
+
+ create table sparse(x int);
+ insert into sparse values (1);
+ insert into sparse values (1);
+ insert into sparse values (1);
+ insert into sparse values (1);
+
+ select newtree(6, 4, sparse.x, 0, 4, 2, 0, 400, 2147483647) from sparse
+
+ select fit(pack(x1, x2, x3, x4), x5) from source
+
+-- select pack(x1, x2, x3, x4) from source
+  select predict(pack(x1, x2, x3, x4)) from source
\ No newline at end of file
diff --git a/tests/funcs.a b/tests/funcs.a
index 65316ce..7f17f0c 100644
--- a/tests/funcs.a
+++ b/tests/funcs.a
@@ -19,7 +19,7 @@ LOAD DATA INFILE "data/test.csv"
 INTO TABLE test1
 FIELDS TERMINATED BY ","
 
-SELECT pairCorr(c, b) * d, sum(a), b
+SELECT pairCorr(c, b) * d, a, sum(b)
 FROM test1
-group by c,b,d
+group by a
 order by b ASC
diff --git a/tests/jose_gh.a b/tests/jose_gh.a
new file mode 100644
index 0000000..4589202
--- /dev/null
+++ b/tests/jose_gh.a
@@ -0,0 +1,31 @@
+CREATE TABLE t(indiv INT, grp STRING, val INT)
+INSERT INTO t VALUES(1, 'A', 1)
+INSERT INTO t VALUES(1, 'A', 2)
+INSERT INTO t VALUES(1, 'A', 3)
+INSERT INTO t VALUES(1, 'A', 4)
+INSERT INTO t VALUES(2, 'A', 2)
+INSERT INTO t VALUES(2, 'A', 2)
+INSERT INTO t VALUES(2, 'A', 4)
+INSERT INTO t VALUES(2, 'A', 8)
+INSERT INTO t VALUES(3, 'B', 10)
+INSERT INTO t VALUES(3, 'B', 20)
+INSERT INTO t VALUES(3, 'B', 30)
+INSERT INTO t VALUES(3, 'B', 40)
+INSERT INTO t VALUES(4, 'B', 20)
+INSERT INTO t VALUES(4, 'B', 20)
+INSERT INTO t VALUES(4, 'B', 40)
+INSERT INTO t VALUES(4, 'B', 80)
+
+
+SELECT * FROM t 
+
+FUNCTION myCov(x, y) {
+  center_x := x - avg(x);
+  center_y := y - avg(y);
+  num := sum(center_x * center_y);
+  denom := sqrt(sum(center_x * center_x)) * sqrt(sum(center_y * center_y));
+  num / denom
+  }
+
+
+select myCov(1,2);
\ No newline at end of file
diff --git a/tests/q1.sql b/tests/q1.sql
index 747b83b..b57c2d1 100644
--- a/tests/q1.sql
+++ b/tests/q1.sql
@@ -7,4 +7,4 @@ FIELDS TERMINATED BY ","
 SELECT sum(c), b, d
 FROM testq1
 group by a,b,d
-order by d DESC, b ASC
+order by d DESC, b ASC;
diff --git a/tests/q4.a b/tests/q4.a
index 4237b16..4a4016b 100644
--- a/tests/q4.a
+++ b/tests/q4.a
@@ -17,4 +17,23 @@ LOAD DATA INFILE "data/ticks.csv" INTO TABLE TICKS FIELDS TERMINATED BY ","
 SELECT max(endofdayprice/prev(endofdayprice)) as Max_Ratio
 FROM ticks
 ASSUMING ASC date
-WHERE ID = "3001"
\ No newline at end of file
+WHERE ID = "3001"
+
+CREATE TABLE ticks2(ID VARCHAR(20), max REAL, min REAL)
+INSERT INTO ticks2 SELECT ID AS ID, max(ratios(endofdayprice)) AS max, min(ratios(endofdayprice)) AS min from ticks  group by ID;
+
+SELECT ID, max, min
+FROM ticks2;
+
+CREATE TABLE my_table (c1 INT, c2 INT, c3 STRING)
+INSERT INTO my_table VALUES(10, 20, "example")
+select * from my_table;
+INSERT INTO my_table SELECT * FROM my_table
+select * from my_table;
+SELECT c1, c2  as twice_c2 FROM my_table;
+
+CREATE TABLE my_table_derived
+AS
+  SELECT c1, c2  as twice_c2 FROM my_table;
+SELECT * FROM my_table_derived;
+
diff --git a/tests/sqlblock.a b/tests/sqlblock.a
new file mode 100644
index 0000000..861c0eb
--- /dev/null
+++ b/tests/sqlblock.a
@@ -0,0 +1,9 @@
+CREATE TABLE my_table (c1 INT, c2 INT, c3 STRING)
+INSERT INTO my_table VALUES(10, 20, "example"), (20, 30, "example2")
+<sql>
+INSERT INTO my_table VALUES(14, 24, 'example3');
+CREATE INDEX idx1 ON my_table(c1);
+SELECT * FROM my_table WHERE c1 < 15;
+
+</sql>
+SELECT * FROM my_table WHERE c1 > 15
\ No newline at end of file