diff --git a/.gitignore b/.gitignore index 4807b2c..508685f 100644 --- a/.gitignore +++ b/.gitignore @@ -51,12 +51,14 @@ k **/Debug **/Release test*.c* +data/benchmark *.csv !test.csv !test2.csv !moving_avg.csv !nyctx100.csv !network.csv +!test_complex.csv *.out *.asm !mmw.so diff --git a/Makefile b/Makefile index dd7747e..21b55bd 100644 --- a/Makefile +++ b/Makefile @@ -1,25 +1,34 @@ OS_SUPPORT = MonetDB_LIB = MonetDB_INC = -Threading = +Defines = CXXFLAGS = --std=c++1z ifeq ($(AQ_DEBUG), 1) - OPTFLAGS = -g3 + OPTFLAGS = -g3 -fsanitize=address -fsanitize=leak + LINKFLAGS = else OPTFLAGS = -O3 -DNDEBUG -fno-stack-protector + LINKFLAGS = -flto -s endif -LINKFLAGS = -flto # + $(AQ_LINK_FLAG) SHAREDFLAGS = -shared FPIC = -fPIC -COMPILER = $(shell $(CXX) --version | grep -q clang && echo clang|| echo gcc) +_COMPILER = $(shell $(CXX) --version | grep -q clang && echo clang|| echo gcc) +COMPILER = $(strip $(_COMPILER)) LIBTOOL = ar rcs USELIB_FLAG = -Wl,--whole-archive,libaquery.a -Wl,-no-whole-archive -LIBAQ_SRC = server/server.cpp server/monetdb_conn.cpp server/io.cpp -LIBAQ_OBJ = server.o monetdb_conn.o io.o +LIBAQ_SRC = server/monetdb_conn.cpp server/libaquery.cpp +LIBAQ_OBJ = monetdb_conn.o libaquery.o SEMANTIC_INTERPOSITION = -fno-semantic-interposition RANLIB = ranlib +_LINKER_BINARY = $(shell `$(CXX) -print-prog-name=ld` -v 2>&1 | grep -q LLVM && echo lld || echo ld) +LINKER_BINARY = $(strip $(_LINKER_BINARY)) +ifeq ($(LINKER_BINARY), ld) + LINKER_FLAGS = -Wl,--allow-multiple-definition +else + LINKER_FLAGS = +endif -ifeq ($(COMPILER), clang ) +ifeq ($(COMPILER), clang) CLANG_GE_10 = $(shell expr `$(CXX) -dumpversion | cut -f1 -d.` \>= 10) ifneq ($(CLANG_GE_10), 1) SEMANTIC_INTERPOSITION = @@ -49,7 +58,7 @@ ifeq ($(OS),Windows_NT) MonetDB_LIB += msc-plugin/monetdbe.dll MonetDB_INC += -Imonetdb/msvc LIBTOOL = gcc-ar rcs - ifeq ($(COMPILER), clang ) + ifeq ($(COMPILER), clang) FPIC = endif else @@ -61,7 +70,7 @@ else USELIB_FLAG = -Wl,-force_load MonetDB_LIB += -L$(shell brew --prefix monetdb)/lib MonetDB_INC += -I$(shell brew --prefix monetdb)/include/monetdb - ifeq ($(COMPILER), clang ) + ifeq ($(COMPILER), clang) LIBTOOL = libtool -static -o endif ifneq ($(UNAME_M),arm64) @@ -79,43 +88,65 @@ endif ifeq ($(THREADING),1) LIBAQ_SRC += server/threading.cpp LIBAQ_OBJ += threading.o - Threading += -DTHREADING + Defines += -DTHREADING +endif + +ifeq ($(AQUERY_ITC_USE_SEMPH), 1) + Defines += -D__AQUERY_ITC_USE_SEMPH__ endif -SHAREDFLAGS += $(FPIC) +CXXFLAGS += $(OPTFLAGS) $(Defines) $(MonetDB_INC) +BINARYFLAGS = $(CXXFLAGS) $(LINKFLAGS) $(MonetDB_LIB) +SHAREDFLAGS += $(FPIC) $(BINARYFLAGS) info: - $(info $(OPTFLAGS)) - $(info $(OS_SUPPORT)) - $(info $(OS)) - $(info $(Threading)) - $(info "test") - $(info $(LIBTOOL)) - $(info $(MonetDB_INC)) - $(info $(COMPILER)) - $(info $(CXX)) - $(info $(FPIC)) + $(info This makefile script is used in AQuery to automatically build required libraries and executables.) + $(info Run it manually only for debugging purposes.) + $(info Targets (built by `make `):) + $(info $" pch: generate precompiled header) + $(info $" libaquery.a: build static library) + $(info $" server.so: build execution engine) + $(info $" snippet: build generated query snippet) + $(info $" server_uselib: build execution engine using shared library and pch) + $(info $" snippet_uselib: build generated query snippet using shared library and pch) + $(info $" docker: build docker image with name aquery) + $(info $" launcher: build launcher for aquery ./aq) + $(info $" clean: remove all generated binaraies and caches) + $(info ) + $(info Variables:) + $(info $" OPTFLAGS: $(OPTFLAGS)) + $(info $" OS_SUPPORT: $(OS_SUPPORT)) + $(info $" OS: $(OS)) + $(info $" Defines: $(Defines)) + $(info $" LIBTOOL: $(LIBTOOL)) + $(info $" MonetDB_INC: $(MonetDB_INC)) + $(info $" COMPILER: $(COMPILER)) + $(info $" CXX: $(CXX)) + $(info $" LINKER_BINARY: $(LINKER_BINARY)) + $(info $" LINKER_FLAGS: $(LINKER_FLAGS)) pch: - $(CXX) -x c++-header server/pch.hpp $(FPIC) $(MonetDB_INC) $(OPTFLAGS) $(CXXFLAGS) $(Threading) -libaquery.a: - $(CXX) -c $(FPIC) $(PCHFLAGS) $(LIBAQ_SRC) $(MonetDB_INC) $(MonetDB_LIB) $(OS_SUPPORT) $(Threading) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) &&\ + $(CXX) -x c++-header server/pch.hpp $(FPIC) $(CXXFLAGS) +libaquery: + $(CXX) -c $(FPIC) $(PCHFLAGS) $(LIBAQ_SRC) $(OS_SUPPORT) $(CXXFLAGS) &&\ $(LIBTOOL) libaquery.a $(LIBAQ_OBJ) &&\ $(RANLIB) libaquery.a +warmup: + $(CXX) msc-plugin/dummy.cpp libaquery.a $(SHAREDFLAGS) -o dll.so server.bin: - $(CXX) $(LIBAQ_SRC) $(LINKFLAGS) $(OS_SUPPORT) $(Threading) $(MonetDB_INC) $(MonetDB_LIB) $(OPTFLAGS) $(CXXFLAGS) -o server.bin + $(CXX) $(LIBAQ_SRC) $(OS_SUPPORT) $(BINARYFLAGS) -o server.bin launcher: - $(CXX) -D__AQ_BUILD_LAUNCHER__ $(LIBAQ_SRC) $(LINKFLAGS) $(OS_SUPPORT) $(Threading) $(MonetDB_INC) $(MonetDB_LIB) $(OPTFLAGS) $(CXXFLAGS) -o aq + $(CXX) -D__AQ_BUILD_LAUNCHER__ server/server.cpp $(LIBAQ_SRC) $(OS_SUPPORT) $(BINARYFLAGS) -o aq server.so: # $(CXX) -z muldefs server/server.cpp server/monetdb_conn.cpp -fPIC -shared $(OS_SUPPORT) monetdb/msvc/monetdbe.dll --std=c++1z -O3 -march=native -o server.so -I./monetdb/msvc - $(CXX) $(SHAREDFLAGS) $(PCHFLAGS) $(LIBAQ_SRC) $(OS_SUPPORT) $(Threading) $(MonetDB_INC) $(MonetDB_LIB) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) -o server.so + $(CXX) $(PCHFLAGS) $(LIBAQ_SRC) server/server.cpp $(OS_SUPPORT) $(SHAREDFLAGS) -o server.so server_uselib: - $(CXX) $(SHAREDFLAGS) $(USELIB_FLAG),libaquery.a $(MonetDB_LIB) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) -o server.so + $(CXX) $(LINKER_FLAGS) server/server.cpp libaquery.a $(SHAREDFLAGS) -o server.so snippet: - $(CXX) $(SHAREDFLAGS) $(PCHFLAGS) out.cpp $(LIBAQ_SRC) $(MonetDB_INC) $(MonetDB_LIB) $(Threading) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) -o dll.so + $(CXX) $(PCHFLAGS) out.cpp $(LIBAQ_SRC) $(SHAREDFLAGS) -o dll.so snippet_uselib: - $(CXX) $(SHAREDFLAGS) $(PCHFLAGS) out.cpp libaquery.a $(MonetDB_INC) $(Threading) $(MonetDB_LIB) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) -o dll.so + $(CXX) $(PCHFLAGS) out.cpp libaquery.a $(SHAREDFLAGS) -o dll.so docker: docker build -t aquery . diff --git a/README.md b/README.md index 8abab8f..d272137 100644 --- a/README.md +++ b/README.md @@ -226,9 +226,38 @@ DROP TABLE my_table IF EXISTS - File name can also be absolute path. - See `data/q1.sql` for more information +## Combine Queries +- `UNION ALL` is a bag union of two query results with same schema. e.g. +``` +SELECT * FROM table 1 UNION ALL SELECT * FROM table 2 +``` +- `EXCEPT` clause will return the difference of two query results. e.g. + ## Delete Data: - Use a query like `DELETE FROM [WHERE ]` to delete rows from a table that matches the conditions. +## Performance Measurement +- Execution time can be recorded using the `stats` command described above. + - `stats` command without any argument will show the execution time of all queries executed so far. + - `stats reset` will reset the timer for total execution time printed by `stats` command above. + - `stats on` will show execution time for every following query until a `stats off` command is received. + +## MonetDB Passthrough for Hybrid Engine +AQuery++ supports MonetDB passthrough for hybrid engine. Simply put standard SQL queries inside a \ \ block.
+ +Each query inside an sql block must be separated by a semicolon. And they will be sent to MonetDB directly which means they should be written in MonetDB dialect instead of AQuery dialect. Please refer to the [MonetDB documentation](https://www.monetdb.org/documentation-Sep2022/user-guide/sql-summary/) for more information. + +For example: +``` +CREATE TABLE my_table (c1 INT, c2 INT, c3 STRING) +INSERT INTO my_table VALUES(10, 20, "example"), (20, 30, "example2") + +INSERT INTO my_table VALUES(10, 20, "example3"); +CREATE INDEX idx1 ON my_table(c1); + +SELECT * FROM my_table WHERE c1 > 10 +``` + ## Built-in functions: - `avg[s]`: average of a column. `avgs(col), avgs(w, col)` is rolling and moving average with window `w` of the column `col`. - `var[s]`, `stddev[s]`: [moving/rolling] **population** variance, standard deviation. @@ -250,7 +279,7 @@ DROP TABLE my_table IF EXISTS - AQuery++ supports different execution engines thanks to the decoupled compiler structure. - Hybrid Execution Engine: decouples the query into two parts. The sql-compliant part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed. - AQuery Library: A set of header based libraries that provide column arithmetic and operations inspired by array programming languages like kdb. This library is used by C++ post-processor code which can significantly reduce the complexity of generated code, reducing compile time while maintaining the best performance. The set of libraries can also be used by UDFs as well as User modules which makes it easier for users to write simple but powerful extensions. - + # Roadmap - [x] SQL Parser -> AQuery Parser (Front End) - [x] AQuery-C++ Compiler (Back End) diff --git a/aquery_config.py b/aquery_config.py index cdff3b7..caa4faa 100644 --- a/aquery_config.py +++ b/aquery_config.py @@ -2,7 +2,7 @@ ## GLOBAL CONFIGURATION FLAGS -version_string = '0.4.9a' +version_string = '0.5.3a' add_path_to_ldpath = True rebuild_backend = False run_backend = True @@ -11,6 +11,9 @@ cygroot = 'c:/msys64/usr/bin' msbuildroot = '' os_platform = 'unknown' build_driver = 'Auto' +compilation_output = True + +## END GLOBAL CONFIGURATION FLAGS def init_config(): global __config_initialized__, os_platform, msbuildroot, build_driver @@ -21,7 +24,8 @@ def init_config(): import os from engine.utils import add_dll_dir # os.environ['CXX'] = 'C:/Program Files/LLVM/bin/clang.exe' - # os.environ['THREADING'] = '1' + os.environ['THREADING'] = '1' + os.environ['AQUERY_ITC_USE_SEMPH'] = '1' if ('__config_initialized__' not in globals() or not __config_initialized__): diff --git a/aquery_parser/keywords.py b/aquery_parser/keywords.py index 5ae05bf..b9da28a 100644 --- a/aquery_parser/keywords.py +++ b/aquery_parser/keywords.py @@ -243,8 +243,8 @@ RESERVED = MatchFirst([ WITHIN, INTO, ]) -L_INLINE = Literal("").suppress() -R_INLINE = Literal("").suppress() +L_INLINE = Literal("").suppress() +R_INLINE = Literal("").suppress() LBRACE = Literal("{").suppress() RBRACE = Literal("}").suppress() LSB = Literal("[").suppress() diff --git a/aquery_parser/sql_parser.py b/aquery_parser/sql_parser.py index 9c08db6..9237470 100644 --- a/aquery_parser/sql_parser.py +++ b/aquery_parser/sql_parser.py @@ -8,6 +8,7 @@ # from sre_parse import WHITESPACE + from mo_parsing.helpers import restOfLine from mo_parsing.infix import delimited_list from mo_parsing.whitespaces import NO_WHITESPACE, Whitespace @@ -65,7 +66,7 @@ def parser(literal_string, ident, sqlserver=False): var_name = ~RESERVED + ident - inline_kblock = (L_INLINE + SkipTo(R_INLINE, include=True))("c") + inline_sqlblock = (L_INLINE + SkipTo(R_INLINE, include=True))("sql") # EXPRESSIONS expr = Forward() column_type, column_definition, column_def_references = get_column_type( @@ -568,8 +569,9 @@ def parser(literal_string, ident, sqlserver=False): | assign("comment", EQ + literal_string) | assign("default character set", EQ + var_name) | assign("default charset", EQ + var_name) - ) - + Optional(AS.suppress() + infix_notation(query, [])("query")) + ) + + Optional(AS.suppress() + query("query")) + # investigate why infix_notation(query, []) eats up the rest of queries )("create_table") create_view = ( @@ -655,7 +657,8 @@ def parser(literal_string, ident, sqlserver=False): ) / to_json_call load_data = ( - keyword("data") ("file_type") + Optional(keyword("complex")("complex")) + + keyword("data") ("file_type") + keyword("infile")("loc") + literal_string ("file") + INTO @@ -667,6 +670,12 @@ def parser(literal_string, ident, sqlserver=False): + keyword("by").suppress() + literal_string ("term") ) + + Optional( + keyword("element").suppress() + + keyword("terminated").suppress() + + keyword("by").suppress() + + literal_string ("ele") + ) ) module_func_def = ( @@ -716,7 +725,7 @@ def parser(literal_string, ident, sqlserver=False): )("stmts"), ";") other_stmt = ( - inline_kblock + inline_sqlblock | udf ) ("stmts") diff --git a/build.py b/build.py index 8cd4b91..5ce74e8 100644 --- a/build.py +++ b/build.py @@ -16,6 +16,7 @@ class checksums: server : Optional[Union[bytes, bool]] = None sources : Optional[Union[Dict[str, bytes], bool]] = None env : str = '' + def calc(self, compiler_name, libaquery_a = 'libaquery.a' , pch_hpp_gch = 'server/pch.hpp.gch', server = 'server.so' @@ -24,7 +25,8 @@ class checksums: self.env = (aquery_config.os_platform + machine() + aquery_config.build_driver + - compiler_name + compiler_name + + aquery_config.version_string ) for key in self.__dict__.keys(): try: @@ -71,14 +73,14 @@ class checksums: class build_manager: sourcefiles = [ 'build.py', 'Makefile', - 'server/server.cpp', 'server/io.cpp', + 'server/server.cpp', 'server/libaquery.cpp', 'server/monetdb_conn.cpp', 'server/threading.cpp', 'server/winhelper.cpp' ] headerfiles = ['server/aggregations.h', 'server/hasher.h', 'server/io.h', 'server/libaquery.h', 'server/monetdb_conn.h', 'server/pch.hpp', 'server/table.h', 'server/threading.h', 'server/types.h', 'server/utils.h', - 'server/winhelper.h', 'server/gc.hpp', 'server/vector_type.hpp', + 'server/winhelper.h', 'server/gc.h', 'server/vector_type.hpp', 'server/table_ext_monetdb.hpp' ] @@ -92,6 +94,9 @@ class build_manager: return False def build(self, stdout = sys.stdout, stderr = sys.stderr): ret = True + if not aquery_config.compilation_output: + stdout = nullstream + stderr = nullstream for c in self.build_cmd: if c: try: # only last success matters @@ -100,6 +105,8 @@ class build_manager: ret = False pass return ret + def warmup(self): + return True class MakefileDriver(DriverBase): def __init__(self, mgr : 'build_manager') -> None: @@ -111,9 +118,9 @@ class build_manager: mgr.cxx = os.environ['CXX'] if 'AQ_DEBUG' not in os.environ: os.environ['AQ_DEBUG'] = '0' if mgr.OptimizationLv else '1' - + def libaquery_a(self): - self.build_cmd = [['rm', 'libaquery.a'],['make', 'libaquery.a']] + self.build_cmd = [['rm', 'libaquery.a'],['make', 'libaquery']] return self.build() def pch(self): self.build_cmd = [['rm', 'server/pch.hpp.gch'], ['make', 'pch']] @@ -166,6 +173,10 @@ class build_manager: self.build_cmd = [[aquery_config.msbuildroot, loc, self.opt, self.platform]] return self.build() + def warmup(self): + self.build_cmd = [['make', 'warmup']] + return self.build() + #class PythonDriver(DriverBase): # def __init__(self, mgr : 'build_manager') -> None: # super().__init__(mgr) @@ -221,6 +232,9 @@ class build_manager: current.calc(self.cxx, libaquery_a) with open('.cached', 'wb') as cache_sig: cache_sig.write(pickle.dumps(current)) + self.driver.warmup() + + else: if aquery_config.os_platform == 'mac': os.system('./arch-check.sh') diff --git a/csv.h b/csv.h index c5cb5bc..6b10915 100644 --- a/csv.h +++ b/csv.h @@ -1,4 +1,4 @@ -// Copyright: (2012-2015) Ben Strasser +// Copyright: (2012-2015) Ben Strasser , 2022 Bill Sun // License: BSD-3 // // All rights reserved. @@ -49,6 +49,7 @@ #include #include #include +#include "server/vector_type.hpp" namespace io{ //////////////////////////////////////////////////////////////////////////// @@ -974,8 +975,7 @@ namespace io{ return; } x = 10*x+y; - }else - throw error::no_digit(); + } ++col; } } @@ -1005,8 +1005,7 @@ namespace io{ return; } x = 10*x-y; - }else - throw error::no_digit(); + } ++col; } return; @@ -1080,19 +1079,37 @@ namespace io{ } x *= base; } - }else{ - if(*col != '\0') - throw error::no_digit(); } if(is_neg) x = -x; } + template void parse(char*col, float&x) { parse_float(col, x); } template void parse(char*col, double&x) { parse_float(col, x); } template void parse(char*col, long double&x) { parse_float(col, x); } - + + + template + void parse_vector(char* col, vector_type& x) { + while (*col != '\0') { + char* next_col = col; + while (*next_col != sep2 && *next_col != '\0') + ++next_col; + while (*next_col == ' ' || *next_col == '\t' || + *next_col == sep2 || *next_col == '\r' || + *next_col == '\n') + ++next_col; + char _next_end = *next_col; + *next_col = '\0'; + T y; + ::io::detail::parse(col, y); + x.emplace_back(y); + col = next_col; + *next_col = _next_end; + } + } template void parse(char*col, T&x){ // Mute unused variable compiler warning @@ -1108,6 +1125,7 @@ namespace io{ } template, class quote_policy = no_quote_escape<','>, class overflow_policy = throw_on_overflow, @@ -1234,7 +1252,23 @@ namespace io{ parse_helper(r+1, cols...); } - + template + void parse_helper(std::size_t r, vector_type&t, ColType&...cols){ + if(row[r]){ + try{ + try{ + ::io::detail::parse_vector(row[r], t); + }catch(error::with_column_content&err){ + err.set_column_content(row[r]); + throw; + } + }catch(error::with_column_name&err){ + err.set_column_name(column_names[r].c_str()); + throw; + } + } + parse_helper(r+1, cols...); + } public: template bool read_row(ColType& ...cols){ @@ -1269,5 +1303,12 @@ namespace io{ } }; } + +template +using AQCSVReader = io::CSVReader, io::no_quote_escape, + io::ignore_overflow, io::empty_line_comment + >; + #endif diff --git a/data/test.csv b/data/test.csv index 5eb9e8f..b4fe244 100644 --- a/data/test.csv +++ b/data/test.csv @@ -1,11 +1,21 @@ a, b, c, d 1,1,2,2 +2,1,2,2 +2,4,3,4 1,2,2,2 1,2,3,4 4,2,1,4 -2,1,3,4 +2,1,3,3 +2,1,1,2 1,2,3,4 +3,2,4,2 1,2,3,3 3,2,1,2 -2,1,2,2 +2,1,4,2 +3,3,4,4 +2,2,3,1 +2,3,4,4 +2,4,1,2 +3,4,1,2 +2,3,2,2 1,2,3,1 diff --git a/data/test_complex.csv b/data/test_complex.csv new file mode 100644 index 0000000..efd7b3e --- /dev/null +++ b/data/test_complex.csv @@ -0,0 +1,6 @@ +a,b,c +5e-3, 3;4 ;5e-3;6.32,7 +1,2,3 +4,5;6;7;8;9, 0 + 3 ,2 ; 4; 5.7; -.3; 5., 6 +-3.12312,-4E+7;67456746744567;75,4 diff --git a/datagen.cpp b/datagen.cpp index 88f5a48..c96b480 100644 --- a/datagen.cpp +++ b/datagen.cpp @@ -151,5 +151,5 @@ int gen_stock_data(int argc, char* argv[]){ } int main(int argc, char* argv[]){ - gen_stock_data(argc, argv); + return gen_stock_data(argc, argv); } diff --git a/engine/types.py b/engine/types.py index 8eac736..5baf47f 100644 --- a/engine/types.py +++ b/engine/types.py @@ -1,8 +1,9 @@ from copy import deepcopy -from engine.utils import base62uuid, defval -from aquery_config import have_hge from typing import Dict, List +from aquery_config import have_hge +from engine.utils import base62uuid, defval + type_table: Dict[str, "Types"] = {} class Types: @@ -65,10 +66,10 @@ class Types: return self.sqlname @staticmethod - def decode(aquery_type : str, vector_type:str = 'ColRef') -> "Types": - if (aquery_type.startswith('vec')): + def decode(aquery_type : str, vector_type:str = 'vector_type') -> "Types": + if (aquery_type.lower().startswith('vec')): return VectorT(Types.decode(aquery_type[3:]), vector_type) - return type_table[aquery_type] + return type_table[aquery_type.lower()] class TypeCollection: def __init__(self, sz, deftype, fptype = None, utype = None, *, collection = None) -> None: @@ -121,7 +122,7 @@ class VectorT(Types): return f'{self.vector_type}<{self.inner_type.name}>' @property def sqlname(self) -> str: - return 'BIGINT' + return 'HUGEINT' # Store vector_type into 16 bit integers @property def cname(self) -> str: return f'{self.vector_type}<{self.inner_type.cname}>' @@ -142,7 +143,7 @@ fp_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', FloatT, DoubleT temporal_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', DateT, TimeT, TimeStampT) builtin_types : Dict[str, Types] = { 'string' : StrT, - **_ty_make_dict('t.sqlname.lower()', AnyT, TextT, VarcharT), + **_ty_make_dict('t.sqlname.lower()', AnyT, TextT, VarcharT, HgeT), **int_types, **fp_types, **temporal_types} def get_int128_support(): @@ -294,7 +295,7 @@ opadd = OperatorBase('add', 2, auto_extension, cname = '+', sqlname = '+', call # monetdb wont extend int division to fp type # opdiv = OperatorBase('div', 2, fp(auto_extension), cname = '/', sqlname = '/', call = binary_op_behavior) opdiv = OperatorBase('div', 2, auto_extension, cname = '/', sqlname = '/', call = binary_op_behavior) -opmul = OperatorBase('mul', 2, fp(auto_extension), cname = '*', sqlname = '*', call = binary_op_behavior) +opmul = OperatorBase('mul', 2, auto_extension, cname = '*', sqlname = '*', call = binary_op_behavior) opsub = OperatorBase('sub', 2, auto_extension, cname = '-', sqlname = '-', call = binary_op_behavior) opmod = OperatorBase('mod', 2, auto_extension_int, cname = '%', sqlname = '%', call = binary_op_behavior) opneg = OperatorBase('neg', 1, as_is, cname = '-', sqlname = '-', call = unary_op_behavior) @@ -323,10 +324,14 @@ fnfirst = OperatorBase('first', 1, as_is, cname = 'frist', sqlname = 'FRIST', ca #fnavg = OperatorBase('avg', 1, fp(ext(auto_extension)), cname = 'avg', sqlname = 'AVG', call = fn_behavior) fnsum = OperatorBase('sum', 1, long_return, cname = 'sum', sqlname = 'SUM', call = fn_behavior) fnavg = OperatorBase('avg', 1, lfp_return, cname = 'avg', sqlname = 'AVG', call = fn_behavior) +fnvar = OperatorBase('var', 1, lfp_return, cname = 'var', sqlname = 'VAR_POP', call = fn_behavior) +fnstd = OperatorBase('stddev', 1, lfp_return, cname = 'stddev', sqlname = 'STDDEV_POP', call = fn_behavior) fnmaxs = OperatorBase('maxs', [1, 2], ty_clamp(as_is, -1), cname = 'maxs', sqlname = 'MAXS', call = windowed_fn_behavor) fnmins = OperatorBase('mins', [1, 2], ty_clamp(as_is, -1), cname = 'mins', sqlname = 'MINS', call = windowed_fn_behavor) fnsums = OperatorBase('sums', [1, 2], ext(ty_clamp(auto_extension, -1)), cname = 'sums', sqlname = 'SUMS', call = windowed_fn_behavor) fnavgs = OperatorBase('avgs', [1, 2], fp(ext(ty_clamp(auto_extension, -1))), cname = 'avgs', sqlname = 'AVGS', call = windowed_fn_behavor) +fnvars = OperatorBase('vars', [1, 2], fp(ext(ty_clamp(auto_extension, -1))), cname = 'vars', sqlname = 'VARS', call = windowed_fn_behavor) +fnstds = OperatorBase('stddevs', [1, 2], fp(ext(ty_clamp(auto_extension, -1))), cname = 'stddevs', sqlname = 'STDDEVS', call = windowed_fn_behavor) fncnt = OperatorBase('count', 1, int_return, cname = 'count', sqlname = 'COUNT', call = count_behavior) fnpack = OperatorBase('pack', -1, pack_return, cname = 'pack', sqlname = 'PACK', call = pack_behavior) # special @@ -360,8 +365,14 @@ builtin_cstdlib = _op_make_dict(fnsqrt, fnlog, fnsin, fncos, fntan, fnpow) builtin_func = _op_make_dict(fnmax, fnmin, fnsum, fnavg, fnmaxs, fnmins, fndeltas, fnratios, fnlast, fnfirst, fnsums, fnavgs, fncnt, - fnpack, fntrunc, fnprev, fnnext) + fnpack, fntrunc, fnprev, fnnext, + fnvar, fnvars, fnstd, fnstds) user_module_func = {} builtin_operators : Dict[str, OperatorBase] = {**builtin_binary_arith, **builtin_binary_logical, **builtin_unary_arith, **builtin_unary_logical, **builtin_unary_special, **builtin_func, **builtin_cstdlib, **user_module_func} + +type_table = {**builtin_types, **type_table} + +# Additional Aliases for type names +type_table['boolean'] = BoolT diff --git a/engine/utils.py b/engine/utils.py index 065f8c8..8e65fcd 100644 --- a/engine/utils.py +++ b/engine/utils.py @@ -1,6 +1,6 @@ -from collections import OrderedDict -from collections.abc import MutableMapping, Mapping import uuid +from collections import OrderedDict +from collections.abc import Mapping, MutableMapping lower_alp = 'abcdefghijklmnopqrstuvwxyz' upper_alp = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' @@ -107,6 +107,8 @@ def defval(val, default): # escape must be readonly from typing import Mapping, Set + + def remove_last(pattern : str, string : str, escape : Set[str] = set()) -> str: idx = string.rfind(pattern) if idx == -1: @@ -126,9 +128,11 @@ class _Counter: return cnt import re + ws = re.compile(r'\s+') import os + def add_dll_dir(dll: str): import sys if sys.version_info.major >= 3 and sys.version_info.minor >7 and os.name == 'nt': @@ -144,3 +148,13 @@ def clamp(val, minval, maxval): def escape_qoutes(string : str): return re.sub(r'^\'', r'\'',re.sub(r'([^\\])\'', r'\1\'', string)) + +def get_innermost(sl): + if sl and type(sl) is dict: + if 'literal' in sl and type(sl['literal']) is str: + return f"'{get_innermost(sl['literal'])}'" + return get_innermost(next(iter(sl.values()), None)) + elif sl and type(sl) is list: + return get_innermost(sl[0]) + else: + return sl \ No newline at end of file diff --git a/msc-plugin/libaquery.vcxproj b/msc-plugin/libaquery.vcxproj index cb493e4..f0d3dd4 100644 --- a/msc-plugin/libaquery.vcxproj +++ b/msc-plugin/libaquery.vcxproj @@ -221,7 +221,7 @@ - + @@ -238,7 +238,7 @@ - + diff --git a/prompt.py b/prompt.py index cd17360..b8ec8d1 100644 --- a/prompt.py +++ b/prompt.py @@ -1,4 +1,5 @@ import aquery_config + help_message = '''\ ====================================================== AQUERY COMMANDLINE HELP @@ -82,31 +83,31 @@ if __name__ == '__main__': -import os -from dataclasses import dataclass +import atexit +import ctypes import enum -import time +import mmap +import os # import dbconn import re +import subprocess +import sys +import threading +import time +from dataclasses import dataclass from typing import Callable, List, Optional + +import numpy as np from mo_parsing import ParseException + import aquery_parser as parser import engine -import engine.projection import engine.ddl +import engine.projection import reconstruct as xengine -import subprocess -import mmap -import sys -from engine.utils import base62uuid -import atexit -import threading -import ctypes -import numpy as np -from engine.utils import ws -from engine.utils import add_dll_dir -from engine.utils import nullstream from build import build_manager +from engine.utils import add_dll_dir, base62uuid, nullstream, ws + ## CLASSES BEGIN class RunType(enum.Enum): @@ -159,9 +160,11 @@ class QueryStats: class Config: __all_attrs__ = ['running', 'new_query', 'server_mode', 'backend_type', 'has_dll', - 'postproc_time', 'sql_time', - 'n_buffers' + 'n_buffers', ] + __i64_attrs__ = [ + 'monetdb_time', 'postproc_time' + ] __init_attributes__ = False @staticmethod @@ -170,26 +173,42 @@ class Config: from functools import partial for _i, attr in enumerate(Config.__all_attrs__): if not hasattr(Config, attr): - setattr(Config, attr, property(partial(Config.getter, i = _i), partial(Config.setter, i = _i))) + setattr(Config, attr, property( + partial(Config.getter, i = _i), partial(Config.setter, i = _i) + )) + for _i, attr in enumerate(Config.__i64_attrs__): + if not hasattr(Config, attr): + setattr(Config, attr, property( + partial(Config.i64_getter, i = _i), partial(Config.i64_setter, i = _i) + )) Config.__init_attributes__ = True def __init__(self, mode, nq = 0, n_bufs = 0, bf_szs = []) -> None: Config.__init_self__() - self.int_size = 4 self.n_attrib = len(Config.__all_attrs__) - self.buf = bytearray((self.n_attrib + n_bufs) * self.int_size) - self.np_buf = np.ndarray(shape=(self.n_attrib), buffer=self.buf, dtype=np.int32) + self.buf = bytearray((self.n_attrib + n_bufs) * 4 + + len(self.__i64_attrs__) * 8 + ) + self.np_buf = np.ndarray(shape = (self.n_attrib), buffer = self.buf, dtype = np.int32) + self.np_i64buf = np.ndarray(shape = len(self.__i64_attrs__), buffer = self.buf, + dtype = np.int64, offset = 4 * len(self.__all_attrs__)) self.new_query = nq self.server_mode = mode.value self.running = 1 - self.backend_type = Backend_Type.BACKEND_AQuery.value + self.backend_type = Backend_Type.BACKEND_MonetDB.value self.has_dll = 0 self.n_buffers = n_bufs + self.monetdb_time = 0 + self.postproc_time = 0 def getter (self, *, i): return self.np_buf[i] def setter(self, v, *, i): self.np_buf[i] = v + def i64_getter (self, *, i): + return self.np_i64buf[i] + def i64_setter(self, v, *, i): + self.np_i64buf[i] = v def set_bufszs(self, buf_szs): for i in range(min(len(buf_szs), self.n_buffers)): @@ -208,6 +227,8 @@ class PromptState(): test_parser = True server_mode: RunType = RunType.Threaded server_bin = 'server.bin' if server_mode == RunType.IPC else 'server.so' + wait_engine = lambda: None + wake_engine = lambda: None set_ready = lambda: None get_ready = lambda: None server_status = lambda: False @@ -298,12 +319,14 @@ def init_threaded(state : PromptState): if aquery_config.run_backend: server_so = ctypes.CDLL('./'+state.server_bin) state.send = server_so['receive_args'] + state.wait_engine = server_so['wait_engine'] + state.wake_engine = server_so['wake_engine'] aquery_config.have_hge = server_so['have_hge']() if aquery_config.have_hge != 0: from engine.types import get_int128_support get_int128_support() state.th = threading.Thread(target=server_so['main'], args=(-1, ctypes.POINTER(ctypes.c_char_p)(state.cfg.c)), daemon=True) - state.th.start() + state.th.start() def init_prompt() -> PromptState: aquery_config.init_config() @@ -336,6 +359,8 @@ def init_prompt() -> PromptState: rm = lambda: None def __set_ready(): state.cfg.new_query = 1 + state.wake_engine() + state.set_ready = __set_ready state.get_ready = lambda: aquery_config.run_backend and state.cfg.new_query if aquery_config.run_backend: @@ -374,14 +399,23 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None): payload = None keep = True cxt = engine.initialize() + parser.parse('SELECT "**** WELCOME TO AQUERY++! ****";') + # state.currstats = QueryStats() # state.need_print = False while running(): try: if state.server_status(): - state.init() + state.init(state) + # *** busy waiting *** + # while state.get_ready(): + # time.sleep(.00001) while state.get_ready(): - time.sleep(.00001) + state.wait_engine() + if state.need_print: + print(f'MonetDB Time: {state.cfg.monetdb_time/10**9}, ' + f'PostProc Time: {state.cfg.postproc_time/10**9}') + state.cfg.monetdb_time = state.cfg.postproc_time = 0 state.currstats.print(state.stats, need_print=state.need_print) try: og_q : str = next() @@ -407,7 +441,7 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None): for t in cxt.tables: lst_cols = [] for c in t.columns: - lst_cols.append(f'{c.name} : {c.type}') + lst_cols.append(f'{c.name} : {c.type.name}') print(f'{t.table_name} ({", ".join(lst_cols)})') continue elif q.startswith('help'): @@ -498,17 +532,17 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None): rm(state) exit() elif q.startswith('sh'): - from distutils.spawn import find_executable + from shutil import which qs = re.split(r'[ \t]', q) shells = ('zsh', 'bash', 'sh', 'fish', 'cmd', 'pwsh', 'powershell', 'csh', 'tcsh', 'ksh') shell_path = '' if len(qs) > 1 and qs[1] in shells: - shell_path = find_executable(qs[1]) + shell_path = which(qs[1]) if shell_path: os.system(shell_path) else: for sh in shells: - shell_path = find_executable(sh) + shell_path = which(sh) if shell_path: os.system(shell_path) break @@ -575,7 +609,7 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None): state.stats.print(clear = False) continue trimed = ws.sub(' ', og_q).split(' ') - if trimed[0].lower().startswith('f'): + if len(trimed) > 1 and trimed[0].lower().startswith('fi') or trimed[0].lower() == 'f': fn = 'stock.a' if len(trimed) <= 1 or len(trimed[1]) == 0 \ else trimed[1] try: @@ -605,7 +639,8 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None): print("\nBye.") raise except ValueError as e: - import code, traceback + import code + import traceback __stdin = os.dup(0) raise_exception = True sh = code.InteractiveConsole({**globals(), **locals()}) diff --git a/reconstruct/__init__.py b/reconstruct/__init__.py index fd02f61..97afaba 100644 --- a/reconstruct/__init__.py +++ b/reconstruct/__init__.py @@ -1,4 +1,5 @@ from reconstruct.ast import Context, ast_node + saved_cxt = None def initialize(cxt = None, keep = False): diff --git a/reconstruct/ast.py b/reconstruct/ast.py index 270e671..04e5abc 100644 --- a/reconstruct/ast.py +++ b/reconstruct/ast.py @@ -1,12 +1,14 @@ +from binascii import Error from copy import deepcopy from dataclasses import dataclass from enum import Enum, auto -from typing import Set, Tuple, Dict, Union, List, Optional +from typing import Dict, List, Optional, Set, Tuple, Union from engine.types import * -from engine.utils import enlist, base62uuid, base62alp, get_legal_name -from reconstruct.storage import Context, TableInfo, ColRef - +from engine.utils import (base62alp, base62uuid, enlist, get_innermost, + get_legal_name) +from reconstruct.storage import ColRef, Context, TableInfo + class ast_node: header = [] types = dict() @@ -28,8 +30,8 @@ class ast_node: def emit(self, code): self.context.emit(code) - def add(self, code): - self.sql += code + ' ' + def add(self, code, sp = ' '): + self.sql += code + sp def addc(self, code): self.ccode += code + '\n' @@ -51,26 +53,60 @@ class ast_node: self.context.sql_end() from reconstruct.expr import expr, fastscan - - +class SubqType(Enum): + WITH = auto() + FROM = auto() + PROJECTION = auto() + FILTER = auto() + GROUPBY = auto() + ORDERBY = auto() + NONE = auto() class projection(ast_node): name = 'projection' first_order = 'select' - + + + def __init__(self, + parent : Optional["ast_node"], + node, + context : Optional[Context] = None, + force_use_spgb : bool = False, + subq_type: SubqType = SubqType.NONE + ): + self.force_use_spgb = force_use_spgb + self.subq_type = subq_type + super().__init__(parent, node, context) + def init(self, _): # skip default init pass def produce(self, node): self.add('SELECT') - self.has_postproc = False + self.has_postproc = 'into' in node if 'select' in node: p = node['select'] self.distinct = False elif 'select_distinct' in node: p = node['select_distinct'] self.distinct = True - + if 'with' in node: + with_table = node['with']['name'] + with_table_name = tuple(with_table.keys())[0] + with_table_cols = tuple(with_table.values())[0] + self.with_clause = projection(self, node['with']['value'], subq_type=SubqType.WITH) + self.with_clause.out_table.add_alias(with_table_name) + for new_name, col in zip(with_table_cols, self.with_clause.out_table.columns): + col.rename(new_name) + self.with_clause.out_table.contextname_cpp + # in monetdb, in cxt + else: + self.with_clause = None + + self.limit = None + if 'limit' in node: + self.limit = node['limit'] + self.projections = p if type(p) is list else [p] if self.parent is None: self.context.sql_begin() @@ -99,8 +135,9 @@ class projection(ast_node): if type(self.datasource) is join: self.datasource.process_join_conditions() - if 'groupby' in node: - self.context.special_gb = groupby.check_special(self, node['groupby']) + self.context.special_gb = self.force_use_spgb + if 'groupby' in node: # if groupby clause contains special stuff + self.context.special_gb |= groupby.check_special(self, node['groupby']) def consume(self, node): # deal with projections @@ -158,6 +195,11 @@ class projection(ast_node): this_type = [c.type for c in _datasource] compound = [c.compound for c in _datasource] proj_expr = [expr(self, c.name) for c in _datasource] + for pe in proj_expr: + if pe.is_ColExpr: + pe.cols_mentioned = {pe.raw_col} + else: + pe.cols_mentioned = set() else: y = lambda x:x count = lambda : 'count(*)' @@ -203,8 +245,14 @@ class projection(ast_node): self.out_table.add_cols(cols, new = False) + self.proj_map = proj_map + if 'groupby' in node: self.group_node = groupby(self, node['groupby']) + if self.group_node.terminate: + self.context.abandon_query() + projection(self.parent, node, self.context, True, subq_type=self.subq_type) + return if self.group_node.use_sp_gb: self.has_postproc = True else: @@ -223,7 +271,11 @@ class projection(ast_node): self.var_table[col.name] = offset for n in (col.table.alias): self.var_table[f'{n}.'+col.name] = offset - + # monetdb doesn't support select into table + # if 'into' in node: + # self.into_stub = f'{{INTOSTUB{base62uuid(20)}}}' + # self.add(self.into_stub, '') + def finialize(astnode:ast_node): if(astnode is not None): self.add(astnode.sql) @@ -235,6 +287,9 @@ class projection(ast_node): if self.col_ext or self.group_node and self.group_node.use_sp_gb: self.has_postproc = True + if self.group_node and self.group_node.use_sp_gb : + self.group_node.dedicated_glist + ... o = self.assumptions if 'orderby' in node: o.extend(enlist(node['orderby'])) @@ -258,7 +313,6 @@ class projection(ast_node): # cpp module codegen - self.context.has_dll = True # extract typed-columns from result-set vid2cname = [0]*len(self.var_table) self.pyname2cname = dict() @@ -338,28 +392,36 @@ class projection(ast_node): ) else: # for funcs evaluate f_i(x, ...) - self.context.emitc(f'{self.out_table.contextname_cpp}->get_col<{key}>() = {val[1]};') + self.context.emitc(f'{self.out_table.contextname_cpp}->get_col<{key}>().initfrom({val[1]}, "{cols[i].name}");') # print out col_is - if 'into' not in node: - self.context.emitc(f'print(*{self.out_table.contextname_cpp});') + + if 'into' not in node and self.subq_type == SubqType.NONE: + if self.limit is None: + self.context.emitc(f'print(*{self.out_table.contextname_cpp});') + else: + self.context.emitc(f'{self.out_table.contextname_cpp}->printall(" ","\\n", nullptr, nullptr, {self.limit});') if self.outfile and self.has_postproc: self.outfile.finalize() if 'into' in node: self.context.emitc(select_into(self, node['into']).ccode) + self.has_postproc = True if not self.distinct: self.finalize() - + def finalize(self): self.context.emitc(f'puts("done.");') if self.parent is None: self.context.sql_end() - if self.outfile and not self.has_postproc: - self.context.abandon_postproc() - else: + if self.has_postproc: + self.context.has_dll = True self.context.postproc_end(self.postproc_fname) + else: + self.context.ccode = '' + if self.limit != 0 and not self.outfile: + self.context.direct_output() class select_distinct(projection): first_order = 'select_distinct' @@ -367,18 +429,18 @@ class select_distinct(projection): super().consume(node) if self.has_postproc: self.context.emitc( - f'{self.out_table.table_name}->distinct();' + f'{self.out_table.contextname_cpp}->distinct();' ) self.finalize() class select_into(ast_node): - def init(self, node): + def init(self, _): if isinstance(self.parent, projection): - if self.context.has_dll: - # has postproc put back to monetdb - self.produce = self.produce_cpp - else: - self.produce = self.produce_sql + # if self.parent.has_postproc: + # # has postproc put back to monetdb + self.produce = self.produce_cpp + # else: + # self.produce = self.produce_sql else: raise ValueError('parent must be projection') @@ -390,7 +452,8 @@ class select_into(ast_node): self.ccode = f'{self.parent.out_table.contextname_cpp}->monetdb_append_table(cxt->alt_server, \"{node.lower()}\");' def produce_sql(self, node): - self.sql = f' INTO {node}' + self.context.sql = self.context.sql.replace( + self.parent.into_stub, f'INTO {node}', 1) class orderby(ast_node): @@ -409,7 +472,7 @@ class orderby(ast_node): o_str += ' ' + 'DESC' o_list.append(o_str) self.add(', '.join(o_list)) - + class scan(ast_node): class Position(Enum): @@ -586,6 +649,10 @@ class groupby(ast_node): return True return False + def init(self, _): + self.terminate = False + super().init(_) + def produce(self, node): if not isinstance(self.parent, projection): raise ValueError('groupby can only be used in projection') @@ -593,8 +660,10 @@ class groupby(ast_node): node = enlist(node) o_list = [] self.refs = set() + self.gb_cols = set() + # dedicated_glist -> cols populated for special group by self.dedicated_glist : List[Tuple[expr, Set[ColRef]]] = [] - self.use_sp_gb = False + self.use_sp_gb = self.parent.force_use_spgb for g in node: self.datasource.rec = set() g_expr = expr(self, g['value']) @@ -610,7 +679,24 @@ class groupby(ast_node): if 'sort' in g and f'{g["sort"]}'.lower() == 'desc': g_str = g_str + ' ' + 'DESC' o_list.append(g_str) - + if g_expr.is_ColExpr: + self.gb_cols.add(g_expr.raw_col) + else: + self.gb_cols.add(g_expr.sql) + + for projs in self.parent.proj_map.values(): + if self.use_sp_gb: + break + if (projs[2].is_compound and + not ((projs[2].is_ColExpr and projs[2].raw_col in self.gb_cols) or + projs[2].sql in self.gb_cols) + ) and (not self.parent.force_use_spgb): + self.use_sp_gb = True + break + + if self.use_sp_gb and not self.parent.force_use_spgb: + self.terminate = True + return if not self.use_sp_gb: self.dedicated_gb = None self.add(', '.join(o_list)) @@ -916,38 +1002,64 @@ class insert(ast_node): name = 'insert' first_order = name def init(self, node): - values = node['query'] - complex_query_kw = ['from', 'where', 'groupby', 'having', 'orderby', 'limit'] - if any([kw in values for kw in complex_query_kw]): - values['into'] = node['insert'] - proj_cls = (select_distinct - if 'select_distinct' in values - else projection) - proj_cls(None, values, self.context) - self.produce = lambda*_:None - self.spawn = lambda*_:None - self.consume = lambda*_:None + if 'query' in node: + values = node['query'] + complex_query_kw = ['from', 'where', 'groupby', 'having', 'orderby', 'limit'] + if any([kw in values for kw in complex_query_kw]): + values['into'] = node['insert'] + proj_cls = (select_distinct + if 'select_distinct' in values + else projection) + proj_cls(None, values, self.context) + self.produce = lambda*_:None + self.spawn = lambda*_:None + self.consume = lambda*_:None else: super().init(node) def produce(self, node): - values = node['query']['select'] + keys = [] + if 'query' in node: + if 'select' in node['query']: + values = enlist(node['query']['select']) + if 'columns' in node: + keys = node['columns'] + values = [v['value'] for v in values] + + elif 'union_all' in node['query']: + values = [[v['select']['value']] for v in node['query']['union_all']] + if 'columns' in node: + keys = node['columns'] + else: + values = enlist(node['values']) + _vals = [] + for v in values: + if isinstance(v, dict): + keys = v.keys() + v = list(v.values()) + v = [f"'{vv}'" if type(vv) is str else vv for vv in v] + _vals.append(v) + values = _vals + + keys = f'({", ".join(keys)})' if keys else '' tbl = node['insert'] - self.sql = f'INSERT INTO {tbl} VALUES(' + self.sql = f'INSERT INTO {tbl}{keys} VALUES' # if len(values) != table.n_cols: # raise ValueError("Column Mismatch") - + values = [values] if isinstance(values, list) and not isinstance(values[0], list) else values list_values = [] - for i, s in enumerate(enlist(values)): - if 'value' in s: - list_values.append(f"{s['value']}") - else: - # subquery, dispatch to select astnode - pass - self.sql += ', '.join(list_values) + ')' + for l in values: + inner_list_values = [] + for s in enlist(l): + if type(s) is dict and 'value' in s: + s = s['value'] + inner_list_values.append(f"{get_innermost(s)}") + list_values.append(f"({', '.join(inner_list_values)})") + + self.sql += ', '.join(list_values) -class delete_table(ast_node): +class delete_from(ast_node): name = 'delete' first_order = name def init(self, node): @@ -959,6 +1071,31 @@ class delete_table(ast_node): if 'where' in node: self.sql += filter(self, node['where']).sql +class union_all(ast_node): + name = 'union_all' + first_order = name + sql_name = 'UNION ALL' + def produce(self, node): + queries = node[self.name] + generated_queries : List[Optional[projection]] = [None] * len(queries) + is_standard = True + for i, q in enumerate(queries): + if 'select' in q: + generated_queries[i] = projection(self, q) + is_standard &= not generated_queries[i].has_postproc + if is_standard: + self.sql = f' {self.sql_name} '.join([q.sql for q in generated_queries]) + else: + raise NotImplementedError(f"{self.sql_name} only support standard sql for now") + def consume(self, node): + super().consume(node) + self.context.direct_output() + +class except_clause(union_all): + name = 'except' + first_order = name + sql_name = 'EXCEPT' + class load(ast_node): name="load" first_order = name @@ -967,6 +1104,9 @@ class load(ast_node): if node['load']['file_type'] == 'module': self.produce = self.produce_module self.module = True + elif 'complex' in node['load']: + self.produce = self.produce_cpp + self.consume = lambda *_: None elif self.context.dialect == 'MonetDB': self.produce = self.produce_monetdb else: @@ -998,7 +1138,7 @@ class load(ast_node): self.context.queries.append(f'F{fname}') ret_type = VoidT if 'ret_type' in f: - ret_type = Types.decode(f['ret_type']) + ret_type = Types.decode(f['ret_type'], vector_type='vector_type') nargs = 0 arglist = '' if 'vars' in f: @@ -1008,7 +1148,7 @@ class load(ast_node): nargs = len(arglist) arglist = ', '.join(arglist) # create c++ stub - cpp_stub = f'{ret_type.cname} (*{fname})({arglist}) = nullptr;' + cpp_stub = f'{"vectortype_cstorage" if isinstance(ret_type, VectorT) else ret_type.cname} (*{fname})({arglist}) = nullptr;' self.context.module_stubs += cpp_stub + '\n' self.context.module_map[fname] = cpp_stub #registration for parser @@ -1035,7 +1175,56 @@ class load(ast_node): self.sql = f'{s1} \'{p}\' {s2} ' if 'term' in node: self.sql += f' {s3} \'{node["term"]["literal"]}\'' - + + def produce_cpp(self, node): + self.context.has_dll = True + self.context.headers.add('"csv.h"') + node = node['load'] + self.postproc_fname = 'ld_' + base62uuid(5) + self.context.postproc_begin(self.postproc_fname) + + table:TableInfo = self.context.tables_byname[node['table']] + self.sql = F"SELECT {', '.join([c.name for c in table.columns])} FROM {table.table_name};" + self.emit(self.sql+';\n') + self.context.sql_end() + length_name = 'len_' + base62uuid(6) + self.context.emitc(f'auto {length_name} = server->cnt;') + + out_typenames = [t.type.cname for t in table.columns] + outtable_col_nameslist = ', '.join([f'"{c.name}"' for c in table.columns]) + + self.outtable_col_names = 'names_' + base62uuid(4) + self.context.emitc(f'const char* {self.outtable_col_names}[] = {{{outtable_col_nameslist}}};') + + self.out_table = 'tbl_' + base62uuid(4) + self.context.emitc(f'auto {self.out_table} = new TableInfo<{",".join(out_typenames)}>("{table.table_name}", {self.outtable_col_names});') + for i, c in enumerate(table.columns): + c.cxt_name = 'c_' + base62uuid(6) + self.context.emitc(f'decltype(auto) {c.cxt_name} = {self.out_table}->get_col<{i}>();') + self.context.emitc(f'{c.cxt_name}.initfrom({length_name}, server->getCol({i}), "{table.columns[i].name}");') + csv_reader_name = 'csv_reader_' + base62uuid(6) + col_types = [c.type.cname for c in table.columns] + col_tmp_names = ['tmp_'+base62uuid(8) for _ in range(len(table.columns))] + #col_names = ','.join([f'"{c.name}"' for c in table.columns]) + term_field = ',' if 'term' not in node else node['term']['literal'] + term_ele = ';' if 'ele' not in node else node['ele']['literal'] + self.context.emitc(f'AQCSVReader<{len(col_types)}, \'{term_field.strip()[0]}\', \'{term_ele.strip()[0]}\'> {csv_reader_name}("{node["file"]["literal"]}");') + # self.context.emitc(f'{csv_reader_name}.read_header(io::ignore_extra_column, {col_names});') + self.context.emitc(f'{csv_reader_name}.next_line();') + + for t, n in zip(col_types, col_tmp_names): + self.context.emitc(f'{t} {n};') + self.context.emitc(f'while({csv_reader_name}.read_row({",".join(col_tmp_names)})) {{ \n') + for i, c in enumerate(table.columns): + # self.context.emitc(f'print({col_tmp_names[i]});') + self.context.emitc(f'{c.cxt_name}.emplace_back({col_tmp_names[i]});') + + self.context.emitc('}') + # self.context.emitc(f'print(*{self.out_table});') + self.context.emitc(f'{self.out_table}->monetdb_append_table(cxt->alt_server, "{table.table_name}");') + + self.context.postproc_end(self.postproc_fname) + class outfile(ast_node): name="_outfile" def __init__(self, parent, node, context = None, *, sql = None): @@ -1062,6 +1251,13 @@ class outfile(ast_node): filename = node['loc']['literal'] if 'loc' in node else node['literal'] import os p = os.path.abspath('.').replace('\\', '/') + '/' + filename + print('Warning: file {p} exists and will be overwritten') + if os.path.exists(p): + try: + os.remove(p) + except OSError: + print(f'Error: file {p} exists and cannot be removed') + self.sql = f'COPY {self.parent.sql} INTO \'{p}\'' d = ',' e = '\\n' @@ -1137,7 +1333,7 @@ class udf(ast_node): def produce(self, node): - from engine.utils import get_legal_name, check_legal_name + from engine.utils import check_legal_name, get_legal_name node = node[self.name] # register udf self.agg = 'Agg' in node @@ -1232,7 +1428,7 @@ class udf(ast_node): def consume(self, node): - from engine.utils import get_legal_name, check_legal_name + from engine.utils import check_legal_name, get_legal_name node = node[self.name] if 'params' in node: @@ -1339,7 +1535,25 @@ class udf(ast_node): return udf.ReturnPattern.elemental_return else: return udf.ReturnPattern.bulk_return - + +class passthru_sql(ast_node): + name = 'sql' + first_order = name + import re + # escapestr = r'''(?:((?:[^;"']|"[^"]*"|'[^']*')+)|(?:--[^\r\n]*[\r|\n])+)''' + # escape_comment = fr'''(?:{escapestr}|{escapestr}*-{escapestr}*)''' + seprator = re.compile(r'''((?:[^;"']|"[^"]*"|'[^']*')+)''') + def __init__(self, _, node, context:Context): + sqls = passthru_sql.seprator.split(node['sql']) + for sql in sqls: + sq = sql.strip(' \t\n\r;') + if sq: + context.queries.append('Q' + sql.strip('\r\n\t ;') + ';') + lq = sq.lower() + if lq.startswith('select'): + context.queries.append('O') + + class user_module_function(OperatorBase): def __init__(self, name, nargs, ret_type, context : Context): super().__init__(name, nargs, lambda *_: ret_type, call=fn_behavior) @@ -1355,4 +1569,5 @@ def include(objs): import sys + include(sys.modules[__name__]) diff --git a/reconstruct/expr.py b/reconstruct/expr.py index 4fd483b..af1f0cb 100644 --- a/reconstruct/expr.py +++ b/reconstruct/expr.py @@ -1,7 +1,8 @@ from typing import Optional, Set + +from engine.types import * from reconstruct.ast import ast_node from reconstruct.storage import ColRef, Context -from engine.types import * # TODO: Decouple expr and upgrade architecture # C_CODE : get ccode/sql code? @@ -31,6 +32,7 @@ class expr(ast_node): def __init__(self, parent, node, *, c_code = None, supress_undefined = False): from reconstruct.ast import projection, udf + # gen2 expr have multi-passes # first pass parse json into expr tree # generate target code in later passes upon need @@ -78,7 +80,7 @@ class expr(ast_node): ast_node.__init__(self, parent, node, None) def init(self, _): - from reconstruct.ast import projection, _tmp_join_union + from reconstruct.ast import _tmp_join_union, projection parent = self.parent self.is_compound = parent.is_compound if type(parent) is expr else False if type(parent) in [projection, expr, _tmp_join_union]: @@ -88,11 +90,13 @@ class expr(ast_node): self.udf_map = parent.context.udf_map self.func_maps = {**builtin_func, **self.udf_map, **user_module_func} self.operators = {**builtin_operators, **self.udf_map, **user_module_func} - self.ext_aggfuncs = ['sum', 'avg', 'count', 'min', 'max', 'last', 'first', 'prev', 'next'] + self.ext_aggfuncs = ['sum', 'avg', 'count', 'min', 'max', + 'last', 'first', 'prev', 'next', 'var', + 'stddev'] def produce(self, node): from engine.utils import enlist - from reconstruct.ast import udf + from reconstruct.ast import udf, projection if type(node) is dict: if 'literal' in node: @@ -166,8 +170,17 @@ class expr(ast_node): special_func = [*self.context.udf_map.keys(), *self.context.module_map.keys(), "maxs", "mins", "avgs", "sums", "deltas", "last", "first", - "ratios", "pack", "truncate"] - if self.context.special_gb: + "stddevs", "vars", "ratios", "pack", "truncate"] + + if ( + self.context.special_gb + or + ( + type(self.root.parent) is projection + and + self.root.parent.force_use_spgb + ) + ): special_func = [*special_func, *self.ext_aggfuncs] if key in special_func and not self.is_special: @@ -333,7 +346,8 @@ class expr(ast_node): self.type = ByteT elif type(node) is float: self.type = DoubleT - + self.sql = f'{{"CAST({node} AS DOUBLE)" if not c_code else "{node}f"}}' + def finalize(self, override = False): from reconstruct.ast import udf if self.codebuf is None or override: diff --git a/reconstruct/storage.py b/reconstruct/storage.py index 0ba567a..c8f5e69 100644 --- a/reconstruct/storage.py +++ b/reconstruct/storage.py @@ -1,12 +1,14 @@ +from typing import Dict, List, Set + from engine.types import * from engine.utils import CaseInsensitiveDict, base62uuid, enlist -from typing import List, Dict, Set + class ColRef: def __init__(self, _ty, cobj, table:'TableInfo', name, id, compound = False, _ty_args = None): self.type : Types = AnyT if type(_ty) is str: - self.type = builtin_types[_ty.lower()] + self.type = Types.decode(_ty) if _ty_args: self.type = self.type(enlist(_ty_args)) elif type(_ty) is Types: @@ -17,6 +19,7 @@ class ColRef: self.alias = set() self.id = id # position in table self.compound = compound # compound field (list as a field) + self.cxt_name = '' # e.g. order by, group by, filter by expressions self.__arr__ = (_ty, cobj, table, name, id) @@ -42,6 +45,14 @@ class ColRef: alias = table_name return f'{alias}.{self.get_name()}' + def rename(self, name): + self.alias.discard(self.name) + self.table.columns_byname.pop(self.name, None) + self.name = name + self.table.columns_byname[name] = self + + return self + def __getitem__(self, key): if type(key) is str: return getattr(self, key) @@ -94,6 +105,17 @@ class TableInfo: return self.cxt.tables_byname[alias] = self self.alias.add(alias) + + def rename(self, name): + if name in self.cxt.tables_byname.keys(): + print(f"Error: table name {name} already exists") + return + + self.cxt.tables_byname.pop(self.table_name, None) + self.alias.discard(self.table_name) + self.table_name = name + self.cxt.tables_byname[name] = self + self.alias.add(name) def parse_col_names(self, colExpr) -> ColRef: parsedColExpr = colExpr.split('.') @@ -134,6 +156,7 @@ class Context: self.queries = [] self.module_init_loc = 0 self.special_gb = False + self.has_dll = False def __init__(self): self.tables_byname = dict() @@ -147,7 +170,6 @@ class Context: self.udf_agg_map = dict() self.use_columnstore = False self.print = print - self.has_dll = False self.dialect = 'MonetDB' self.is_msvc = False self.have_hge = False @@ -223,6 +245,14 @@ class Context: self.queries.append('P' + proc_name) self.finalize_query() + def abandon_query(self): + self.sql = '' + self.ccode = '' + self.finalize_query() + + def direct_output(self): + self.queries.append('O') + def abandon_postproc(self): self.ccode = '' self.finalize_query() diff --git a/sdk/Evaluation.cpp b/sdk/Evaluation.cpp index 3683597..8e347a7 100644 --- a/sdk/Evaluation.cpp +++ b/sdk/Evaluation.cpp @@ -5,14 +5,13 @@ struct minEval{ double value; - double values; + int* values; double eval; long left; // how many on its left double* record; long max; long** count; - long* sorted; // sorted d }; minEval giniSparse(double** data, long* result, long* d, long size, long col, long classes, long* totalT){ diff --git a/sdk/Makefile b/sdk/Makefile index 7bd5c8c..b146a81 100644 --- a/sdk/Makefile +++ b/sdk/Makefile @@ -1,5 +1,11 @@ +OPT_FLASG = +ifneq ($(DEBUG), 1) + OPT_FLAGS = -Ofast -march=native -flto -DNDEBUG +else + OPT_FLAGS = -g3 -D_DEBUG -fsanitize=leak -fsanitize=address +endif example: $(CXX) -shared -fPIC example.cpp aquery_mem.cpp -fno-semantic-interposition -Ofast -march=native -flto --std=c++1z -o ../test.so irf: - $(CXX) -shared -fPIC RF.cpp irf.cpp incrementalDecisionTree.cpp aquery_mem.cpp Evaluation.cpp -fno-semantic-interposition -Ofast -march=native -flto --std=c++1z -o ../libirf.so + $(CXX) -shared -fPIC RF.cpp irf.cpp incrementalDecisionTree.cpp aquery_mem.cpp Evaluation.cpp -fno-semantic-interposition $(OPT_FLAGS) --std=c++1z -o ../libirf.so all: example diff --git a/sdk/irf.cpp b/sdk/irf.cpp index 8433c95..0f9aac1 100644 --- a/sdk/irf.cpp +++ b/sdk/irf.cpp @@ -4,9 +4,6 @@ #include "../server/table.h" DecisionTree* dt = nullptr; -long pt = 0; -double** data = nullptr; -long* result = nullptr; __AQEXPORT__(bool) newtree(int height, long f, ColRef sparse, double forget, long maxf, long noclasses, Evaluation e, long r, long rb){ if(sparse.size!=f)return 0; @@ -19,14 +16,13 @@ __AQEXPORT__(bool) newtree(int height, long f, ColRef sparse, double forget return 1; } -__AQEXPORT__(bool) additem(ColRefX, long y, long size){ - long j = 0; - if(size>0){ - free(data); - free(result); - pt = 0; - data=(double**)malloc(size*sizeof(double*)); - result=(long*)malloc(size*sizeof(long)); +__AQEXPORT__(bool) fit(ColRef> X, ColRef y){ + if(X.size != y.size)return 0; + double** data = (double**)malloc(X.size*sizeof(double*)); + long* result = (long*)malloc(y.size*sizeof(long)); + for(long i=0; iX, long y, long size){ pt ++; return 1; } -__AQEXPORT__(bool) fit(){ - if(pt<=0)return 0; - dt->fit(data, result, pt); - return 1; +__AQEXPORT__(bool) fit(vector_type> v, vector_type res){ + double** data = (double**)malloc(v.size*sizeof(double*)); + for(int i = 0; i < v.size; ++i) + data[i] = v.container[i].container; + dt->fit(data, res.container, v.size); + return true; } -__AQEXPORT__(ColRef_storage) predict(){ - int* result = (int*)malloc(pt*sizeof(int)); - for(long i=0; iTest(data[i], dt->DTree); - } +__AQEXPORT__(vectortype_cstorage) predict(vector_type> v){ + int* result = (int*)malloc(v.size*sizeof(int)); - return ColRef_storage(new ColRef_storage(result, pt, 0, "prediction", 0), 1, 0, "prediction", 0); + for(long i=0; iTest(v.container[i].container, dt->DTree); + //printf("%d ", result[i]); + } + auto container = (vector_type*)malloc(sizeof(vector_type)); + container->size = v.size; + container->capacity = 0; + container->container = result; + // container->out(10); + // ColRef>* col = (ColRef>*)malloc(sizeof(ColRef>)); + auto ret = vectortype_cstorage{.container = container, .size = 1, .capacity = 0}; + // col->initfrom(ret, "sibal"); + // print(*col); + return ret; + //return true; } diff --git a/server/Makefile b/server/Makefile index cb082c8..a2d4e44 100644 --- a/server/Makefile +++ b/server/Makefile @@ -1,6 +1,6 @@ debug: - g++ -g3 -O0 server/server.cpp server/io.cpp -o a.out -Wall -Wextra -Wpedantic -lpthread + g++ -g3 -O0 server/server.cpp server/libaquery.cpp -o a.out -Wall -Wextra -Wpedantic -lpthread test: - g++ --std=c++1z -g3 -O0 server.cpp io.cpp -o a.out -Wall -Wextra -Wpedantic -lpthread + g++ --std=c++1z -g3 -O0 server.cpp libaquery.cpp -o a.out -Wall -Wextra -Wpedantic -lpthread diff --git a/server/aggregations.h b/server/aggregations.h index 5338e23..cb4bcbe 100644 --- a/server/aggregations.h +++ b/server/aggregations.h @@ -202,6 +202,102 @@ decayed_t>> avgw(uint32_t w, const VT return ret; } +template class VT, bool sd = false> +decayed_t>> varw(uint32_t w, const VT& arr) { + using FPType = types::GetFPType>; + const uint32_t& len = arr.size; + decayed_t ret(len); + uint32_t i = 0; + types::GetLongType s{}; + w = w > len ? len : w; + FPType EnX {}, MnX{}; + if (len) { + s = arr[0]; + MnX = 0; + EnX = arr[0]; + ret[i++] = 0; + } + for (; i < len; ++i){ + s += arr[i]; + FPType _EnX = s / (FPType)(i + 1); + MnX += (arr[i] - EnX) * (arr[i] - _EnX); + EnX = _EnX; + ret[i] = MnX / (FPType)(i + 1); + if constexpr(sd) ret[i-1] = sqrt(ret[i-1]); + } + const float rw = 1.f / (float)w; + s *= rw; + for (; i < len; ++i){ + const auto dw = arr[i] - arr[i - w - 1]; + const auto sw = arr[i] + arr[i - w - 1]; + const auto dex = dw * rw; + ret[i] = ret[i-1] - dex*(s + s + dex - sw); + if constexpr(sd) ret[i-1] = sqrt(ret[i-1]); + s += dex; + } + if constexpr(sd) + if(i) + ret[i-1] = sqrt(ret[i-1]); + + return ret; +} + +template class VT> +types::GetFPType>> var(const VT& arr) { + typedef types::GetFPType>> FPType; + const uint32_t& len = arr.size; + uint32_t i = 0; + types::GetLongType s{0}; + types::GetLongType ssq{0}; + if (len) { + s = arr[0]; + ssq = arr[0] * arr[0]; + } + for (; i < len; ++i){ + s += arr[i]; + ssq += arr[i] * arr[i]; + } + return (ssq - s * s / (FPType)(len + 1)) / (FPType)(len + 1); +} + +template class VT, bool sd = false> +decayed_t>> vars(const VT& arr) { + typedef types::GetFPType> FPType; + const uint32_t& len = arr.size; + decayed_t ret(len); + uint32_t i = 0; + types::GetLongType s{}; + FPType MnX{}; + FPType EnX {}; + if (len) { + s = arr[0]; + MnX = 0; + EnX = arr[0]; + ret[i++] = 0; + } + for (; i < len; ++i){ + s += arr[i]; + FPType _EnX = s / (FPType)(i + 1); + MnX += (arr[i] - EnX) * (arr[i] - _EnX); + printf("%d %ld ", arr[i], MnX); + EnX = _EnX; + ret[i] = MnX / (FPType)(i + 1); + if constexpr(sd) ret[i] = sqrt(ret[i]); + } + return ret; +} +template class VT> +types::GetFPType>> stddev(const VT& arr) { + return sqrt(var(arr)); +} +template class VT> +decayed_t>> stddevs(const VT& arr) { + return vars(arr); +} +template class VT> +decayed_t>> stddevw(uint32_t w, const VT& arr) { + return varw(w, arr); +} // use getSignedType template class VT> decayed_t deltas(const VT& arr) { @@ -251,26 +347,33 @@ T first(const VT& arr) { } + #define __DEFAULT_AGGREGATE_FUNCTION__(NAME, RET) \ -template constexpr inline T NAME(const T& v) { return RET; } +template constexpr T NAME(const T& v) { return RET; } // non-aggreation count. E.g. SELECT COUNT(col) from table; -template constexpr inline T count(const T& v) { return 1; } -template constexpr inline T max(const T& v) { return v; } -template constexpr inline T min(const T& v) { return v; } -template constexpr inline T avg(const T& v) { return v; } -template constexpr inline T sum(const T& v) { return v; } -template constexpr inline T maxw(uint32_t, const T& v) { return v; } -template constexpr inline T minw(uint32_t, const T& v) { return v; } -template constexpr inline T avgw(uint32_t, const T& v) { return v; } -template constexpr inline T sumw(uint32_t, const T& v) { return v; } -template constexpr inline T ratiow(uint32_t, const T& v) { return 1; } -template constexpr inline T maxs(const T& v) { return v; } -template constexpr inline T mins(const T& v) { return v; } -template constexpr inline T avgs(const T& v) { return v; } -template constexpr inline T sums(const T& v) { return v; } -template constexpr inline T last(const T& v) { return v; } -template constexpr inline T prev(const T& v) { return v; } -template constexpr inline T aggnext(const T& v) { return v; } -template constexpr inline T daltas(const T& v) { return 0; } -template constexpr inline T ratios(const T& v) { return 1; } +template constexpr T count(const T&) { return 1; } +template constexpr T var(const T&) { return 0; } +template constexpr T vars(const T&) { return 0; } +template constexpr T varw(uint32_t, const T&) { return 0; } +template constexpr T stddev(const T&) { return 0; } +template constexpr T stddevs(const T&) { return 0; } +template constexpr T stddevw(uint32_t, const T&) { return 0; } +template constexpr T max(const T& v) { return v; } +template constexpr T min(const T& v) { return v; } +template constexpr T avg(const T& v) { return v; } +template constexpr T sum(const T& v) { return v; } +template constexpr T maxw(uint32_t, const T& v) { return v; } +template constexpr T minw(uint32_t, const T& v) { return v; } +template constexpr T avgw(uint32_t, const T& v) { return v; } +template constexpr T sumw(uint32_t, const T& v) { return v; } +template constexpr T ratiow(uint32_t, const T&) { return 1; } +template constexpr T maxs(const T& v) { return v; } +template constexpr T mins(const T& v) { return v; } +template constexpr T avgs(const T& v) { return v; } +template constexpr T sums(const T& v) { return v; } +template constexpr T last(const T& v) { return v; } +template constexpr T prev(const T& v) { return v; } +template constexpr T aggnext(const T& v) { return v; } +template constexpr T daltas(const T&) { return 0; } +template constexpr T ratios(const T&) { return 1; } diff --git a/server/dragonbox/dragonbox.h b/server/dragonbox/dragonbox.h new file mode 100644 index 0000000..e4b954d --- /dev/null +++ b/server/dragonbox/dragonbox.h @@ -0,0 +1,2658 @@ +// Copyright 2020-2022 Junekey Jeon +// +// The contents of this file may be used under the terms of +// the Apache License v2.0 with LLVM Exceptions. +// +// (See accompanying file LICENSE-Apache or copy at +// https://llvm.org/foundation/relicensing/LICENSE.txt) +// +// Alternatively, the contents of this file may be used under the terms of +// the Boost Software License, Version 1.0. +// (See accompanying file LICENSE-Boost or copy at +// https://www.boost.org/LICENSE_1_0.txt) +// +// Unless required by applicable law or agreed to in writing, this software +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. + + +#ifndef JKJ_HEADER_DRAGONBOX +#define JKJ_HEADER_DRAGONBOX + +#include +#include +#include +#include +#include + +// Suppress additional buffer overrun check. +// I have no idea why MSVC thinks some functions here are vulnerable to the buffer overrun +// attacks. No, they aren't. +#if defined(__GNUC__) || defined(__clang__) + #define JKJ_SAFEBUFFERS + #define JKJ_FORCEINLINE inline __attribute__((always_inline)) +#elif defined(_MSC_VER) + #define JKJ_SAFEBUFFERS __declspec(safebuffers) + #define JKJ_FORCEINLINE __forceinline +#else + #define JKJ_SAFEBUFFERS + #define JKJ_FORCEINLINE inline +#endif + +#if defined(__has_builtin) + #define JKJ_DRAGONBOX_HAS_BUILTIN(x) __has_builtin(x) +#else + #define JKJ_DRAGONBOX_HAS_BUILTIN(x) false +#endif + +#if defined(_MSC_VER) + #include +#endif + +namespace jkj::dragonbox { + namespace detail { + template + constexpr std::size_t + physical_bits = sizeof(T) * std::numeric_limits::digits; + + template + constexpr std::size_t value_bits = + std::numeric_limits, T>>::digits; + } + + // These classes expose encoding specs of IEEE-754-like floating-point formats. + // Currently available formats are IEEE754-binary32 & IEEE754-binary64. + + struct ieee754_binary32 { + static constexpr int significand_bits = 23; + static constexpr int exponent_bits = 8; + static constexpr int min_exponent = -126; + static constexpr int max_exponent = 127; + static constexpr int exponent_bias = -127; + static constexpr int decimal_digits = 9; + }; + struct ieee754_binary64 { + static constexpr int significand_bits = 52; + static constexpr int exponent_bits = 11; + static constexpr int min_exponent = -1022; + static constexpr int max_exponent = 1023; + static constexpr int exponent_bias = -1023; + static constexpr int decimal_digits = 17; + }; + + // A floating-point traits class defines ways to interpret a bit pattern of given size as an + // encoding of floating-point number. This is a default implementation of such a traits class, + // supporting ways to interpret 32-bits into a binary32-encoded floating-point number and to + // interpret 64-bits into a binary64-encoded floating-point number. Users might specialize this + // class to change the default behavior for certain types. + template + struct default_float_traits { + // I don't know if there is a truly reliable way of detecting + // IEEE-754 binary32/binary64 formats; I just did my best here. + static_assert(std::numeric_limits::is_iec559 && std::numeric_limits::radix == 2 && + (detail::physical_bits == 32 || detail::physical_bits == 64), + "default_ieee754_traits only works for 32-bits or 64-bits types " + "supporting binary32 or binary64 formats!"); + + // The type that is being viewed. + using type = T; + + // Refers to the format specification class. + using format = + std::conditional_t == 32, ieee754_binary32, ieee754_binary64>; + + // Defines an unsigned integer type that is large enough to carry a variable of type T. + // Most of the operations will be done on this integer type. + using carrier_uint = + std::conditional_t == 32, std::uint32_t, std::uint64_t>; + static_assert(sizeof(carrier_uint) == sizeof(T)); + + // Number of bits in the above unsigned integer type. + static constexpr int carrier_bits = int(detail::physical_bits); + + // Convert from carrier_uint into the original type. + // Depending on the floating-point encoding format, this operation might not be possible for + // some specific bit patterns. However, the contract is that u always denotes a + // valid bit pattern, so this function must be assumed to be noexcept. + static T carrier_to_float(carrier_uint u) noexcept { + T x; + std::memcpy(&x, &u, sizeof(carrier_uint)); + return x; + } + + // Same as above. + static carrier_uint float_to_carrier(T x) noexcept { + carrier_uint u; + std::memcpy(&u, &x, sizeof(carrier_uint)); + return u; + } + + // Extract exponent bits from a bit pattern. + // The result must be aligned to the LSB so that there is no additional zero paddings + // on the right. This function does not do bias adjustment. + static constexpr unsigned int extract_exponent_bits(carrier_uint u) noexcept { + constexpr int significand_bits = format::significand_bits; + constexpr int exponent_bits = format::exponent_bits; + static_assert(detail::value_bits > exponent_bits); + constexpr auto exponent_bits_mask = + (unsigned int)(((unsigned int)(1) << exponent_bits) - 1); + return (unsigned int)(u >> significand_bits) & exponent_bits_mask; + } + + // Extract significand bits from a bit pattern. + // The result must be aligned to the LSB so that there is no additional zero paddings + // on the right. The result does not contain the implicit bit. + static constexpr carrier_uint extract_significand_bits(carrier_uint u) noexcept { + constexpr auto mask = carrier_uint((carrier_uint(1) << format::significand_bits) - 1); + return carrier_uint(u & mask); + } + + // Remove the exponent bits and extract significand bits together with the sign bit. + static constexpr carrier_uint remove_exponent_bits(carrier_uint u, + unsigned int exponent_bits) noexcept { + return u ^ (carrier_uint(exponent_bits) << format::significand_bits); + } + + // Shift the obtained signed significand bits to the left by 1 to remove the sign bit. + static constexpr carrier_uint remove_sign_bit_and_shift(carrier_uint u) noexcept { + return carrier_uint(carrier_uint(u) << 1); + } + + // The actual value of exponent is obtained by adding this value to the extracted exponent + // bits. + static constexpr int exponent_bias = + 1 - (1 << (carrier_bits - format::significand_bits - 2)); + + // Obtain the actual value of the binary exponent from the extracted exponent bits. + static constexpr int binary_exponent(unsigned int exponent_bits) noexcept { + if (exponent_bits == 0) { + return format::min_exponent; + } + else { + return int(exponent_bits) + format::exponent_bias; + } + } + + // Obtain the actual value of the binary exponent from the extracted significand bits and + // exponent bits. + static constexpr carrier_uint binary_significand(carrier_uint significand_bits, + unsigned int exponent_bits) noexcept { + if (exponent_bits == 0) { + return significand_bits; + } + else { + return significand_bits | (carrier_uint(1) << format::significand_bits); + } + } + + + /* Various boolean observer functions */ + + static constexpr bool is_nonzero(carrier_uint u) noexcept { return (u << 1) != 0; } + static constexpr bool is_positive(carrier_uint u) noexcept { + constexpr auto sign_bit = carrier_uint(1) + << (format::significand_bits + format::exponent_bits); + return u < sign_bit; + } + static constexpr bool is_negative(carrier_uint u) noexcept { return !is_positive(u); } + static constexpr bool is_finite(unsigned int exponent_bits) noexcept { + constexpr unsigned int exponent_bits_all_set = (1u << format::exponent_bits) - 1; + return exponent_bits != exponent_bits_all_set; + } + static constexpr bool has_all_zero_significand_bits(carrier_uint u) noexcept { + return (u << 1) == 0; + } + static constexpr bool has_even_significand_bits(carrier_uint u) noexcept { + return u % 2 == 0; + } + }; + + // Convenient wrappers for floating-point traits classes. + // In order to reduce the argument passing overhead, these classes should be as simple as + // possible (e.g., no inheritance, no private non-static data member, etc.; this is an + // unfortunate fact about common ABI convention). + + template > + struct float_bits; + + template > + struct signed_significand_bits; + + template + struct float_bits { + using type = T; + using traits_type = Traits; + using carrier_uint = typename traits_type::carrier_uint; + + carrier_uint u; + + float_bits() = default; + constexpr explicit float_bits(carrier_uint bit_pattern) noexcept : u{bit_pattern} {} + constexpr explicit float_bits(T float_value) noexcept + : u{traits_type::float_to_carrier(float_value)} {} + + constexpr T to_float() const noexcept { return traits_type::carrier_to_float(u); } + + // Extract exponent bits from a bit pattern. + // The result must be aligned to the LSB so that there is no additional zero paddings + // on the right. This function does not do bias adjustment. + constexpr unsigned int extract_exponent_bits() const noexcept { + return traits_type::extract_exponent_bits(u); + } + + // Extract significand bits from a bit pattern. + // The result must be aligned to the LSB so that there is no additional zero paddings + // on the right. The result does not contain the implicit bit. + constexpr carrier_uint extract_significand_bits() const noexcept { + return traits_type::extract_significand_bits(u); + } + + // Remove the exponent bits and extract significand bits together with the sign bit. + constexpr auto remove_exponent_bits(unsigned int exponent_bits) const noexcept { + return signed_significand_bits( + traits_type::remove_exponent_bits(u, exponent_bits)); + } + + // Obtain the actual value of the binary exponent from the extracted exponent bits. + static constexpr int binary_exponent(unsigned int exponent_bits) noexcept { + return traits_type::binary_exponent(exponent_bits); + } + constexpr int binary_exponent() const noexcept { + return binary_exponent(extract_exponent_bits()); + } + + // Obtain the actual value of the binary exponent from the extracted significand bits and + // exponent bits. + static constexpr carrier_uint binary_significand(carrier_uint significand_bits, + unsigned int exponent_bits) noexcept { + return traits_type::binary_significand(significand_bits, exponent_bits); + } + constexpr carrier_uint binary_significand() const noexcept { + return binary_significand(extract_significand_bits(), extract_exponent_bits()); + } + + constexpr bool is_nonzero() const noexcept { return traits_type::is_nonzero(u); } + constexpr bool is_positive() const noexcept { return traits_type::is_positive(u); } + constexpr bool is_negative() const noexcept { return traits_type::is_negative(u); } + constexpr bool is_finite(unsigned int exponent_bits) const noexcept { + return traits_type::is_finite(exponent_bits); + } + constexpr bool is_finite() const noexcept { + return traits_type::is_finite(extract_exponent_bits()); + } + constexpr bool has_even_significand_bits() const noexcept { + return traits_type::has_even_significand_bits(u); + } + }; + + template + struct signed_significand_bits { + using type = T; + using traits_type = Traits; + using carrier_uint = typename traits_type::carrier_uint; + + carrier_uint u; + + signed_significand_bits() = default; + constexpr explicit signed_significand_bits(carrier_uint bit_pattern) noexcept + : u{bit_pattern} {} + + // Shift the obtained signed significand bits to the left by 1 to remove the sign bit. + constexpr carrier_uint remove_sign_bit_and_shift() const noexcept { + return traits_type::remove_sign_bit_and_shift(u); + } + + constexpr bool is_positive() const noexcept { return traits_type::is_positive(u); } + constexpr bool is_negative() const noexcept { return traits_type::is_negative(u); } + constexpr bool has_all_zero_significand_bits() const noexcept { + return traits_type::has_all_zero_significand_bits(u); + } + constexpr bool has_even_significand_bits() const noexcept { + return traits_type::has_even_significand_bits(u); + } + }; + + namespace detail { + //////////////////////////////////////////////////////////////////////////////////////// + // Bit operation intrinsics. + //////////////////////////////////////////////////////////////////////////////////////// + + namespace bits { + // Most compilers should be able to optimize this into the ROR instruction. + inline std::uint32_t rotr(std::uint32_t n, std::uint32_t r) noexcept { + r &= 31; + return (n >> r) | (n << (32 - r)); + } + inline std::uint64_t rotr(std::uint64_t n, std::uint32_t r) noexcept { + r &= 63; + return (n >> r) | (n << (64 - r)); + } + } + + //////////////////////////////////////////////////////////////////////////////////////// + // Utilities for wide unsigned integer arithmetic. + //////////////////////////////////////////////////////////////////////////////////////// + + namespace wuint { + // Compilers might support built-in 128-bit integer types. However, it seems that + // emulating them with a pair of 64-bit integers actually produces a better code, + // so we avoid using those built-ins. That said, they are still useful for + // implementing 64-bit x 64-bit -> 128-bit multiplication. + + // clang-format off +#if defined(__SIZEOF_INT128__) + // To silence "error: ISO C++ does not support '__int128' for 'type name' + // [-Wpedantic]" +#if defined(__GNUC__) + __extension__ +#endif + using builtin_uint128_t = unsigned __int128; +#endif + // clang-format on + + struct uint128 { + uint128() = default; + + std::uint64_t high_; + std::uint64_t low_; + + constexpr uint128(std::uint64_t high, std::uint64_t low) noexcept + : high_{high}, low_{low} {} + + constexpr std::uint64_t high() const noexcept { return high_; } + constexpr std::uint64_t low() const noexcept { return low_; } + + uint128& operator+=(std::uint64_t n) & noexcept { +#if JKJ_DRAGONBOX_HAS_BUILTIN(__builtin_addcll) + unsigned long long carry; + low_ = __builtin_addcll(low_, n, 0, &carry); + high_ = __builtin_addcll(high_, 0, carry, &carry); +#elif JKJ_DRAGONBOX_HAS_BUILTIN(__builtin_ia32_addcarryx_u64) + unsigned long long result; + auto carry = __builtin_ia32_addcarryx_u64(0, low_, n, &result); + low_ = result; + __builtin_ia32_addcarryx_u64(carry, high_, 0, &result); + high_ = result; +#elif defined(_MSC_VER) && defined(_M_X64) + auto carry = _addcarry_u64(0, low_, n, &low_); + _addcarry_u64(carry, high_, 0, &high_); +#else + auto sum = low_ + n; + high_ += (sum < low_ ? 1 : 0); + low_ = sum; +#endif + return *this; + } + }; + + static inline std::uint64_t umul64(std::uint32_t x, std::uint32_t y) noexcept { +#if defined(_MSC_VER) && defined(_M_IX86) + return __emulu(x, y); +#else + return x * std::uint64_t(y); +#endif + } + + // Get 128-bit result of multiplication of two 64-bit unsigned integers. + JKJ_SAFEBUFFERS inline uint128 umul128(std::uint64_t x, std::uint64_t y) noexcept { +#if defined(__SIZEOF_INT128__) + auto result = builtin_uint128_t(x) * builtin_uint128_t(y); + return {std::uint64_t(result >> 64), std::uint64_t(result)}; +#elif defined(_MSC_VER) && defined(_M_X64) + uint128 result; + result.low_ = _umul128(x, y, &result.high_); + return result; +#else + auto a = std::uint32_t(x >> 32); + auto b = std::uint32_t(x); + auto c = std::uint32_t(y >> 32); + auto d = std::uint32_t(y); + + auto ac = umul64(a, c); + auto bc = umul64(b, c); + auto ad = umul64(a, d); + auto bd = umul64(b, d); + + auto intermediate = (bd >> 32) + std::uint32_t(ad) + std::uint32_t(bc); + + return {ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32), + (intermediate << 32) + std::uint32_t(bd)}; +#endif + } + + JKJ_SAFEBUFFERS inline std::uint64_t umul128_upper64(std::uint64_t x, + std::uint64_t y) noexcept { +#if defined(__SIZEOF_INT128__) + auto result = builtin_uint128_t(x) * builtin_uint128_t(y); + return std::uint64_t(result >> 64); +#elif defined(_MSC_VER) && defined(_M_X64) + return __umulh(x, y); +#else + auto a = std::uint32_t(x >> 32); + auto b = std::uint32_t(x); + auto c = std::uint32_t(y >> 32); + auto d = std::uint32_t(y); + + auto ac = umul64(a, c); + auto bc = umul64(b, c); + auto ad = umul64(a, d); + auto bd = umul64(b, d); + + auto intermediate = (bd >> 32) + std::uint32_t(ad) + std::uint32_t(bc); + + return ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32); +#endif + } + + // Get upper 128-bits of multiplication of a 64-bit unsigned integer and a 128-bit + // unsigned integer. + JKJ_SAFEBUFFERS inline uint128 umul192_upper128(std::uint64_t x, uint128 y) noexcept { + auto r = umul128(x, y.high()); + r += umul128_upper64(x, y.low()); + return r; + } + + // Get upper 64-bits of multiplication of a 32-bit unsigned integer and a 64-bit + // unsigned integer. + inline std::uint64_t umul96_upper64(std::uint32_t x, std::uint64_t y) noexcept { +#if defined(__SIZEOF_INT128__) || (defined(_MSC_VER) && defined(_M_X64)) + return umul128_upper64(std::uint64_t(x) << 32, y); +#else + auto yh = std::uint32_t(y >> 32); + auto yl = std::uint32_t(y); + + auto xyh = umul64(x, yh); + auto xyl = umul64(x, yl); + + return xyh + (xyl >> 32); +#endif + } + + // Get lower 128-bits of multiplication of a 64-bit unsigned integer and a 128-bit + // unsigned integer. + JKJ_SAFEBUFFERS inline uint128 umul192_lower128(std::uint64_t x, uint128 y) noexcept { + auto high = x * y.high(); + auto high_low = umul128(x, y.low()); + return {high + high_low.high(), high_low.low()}; + } + + // Get lower 64-bits of multiplication of a 32-bit unsigned integer and a 64-bit + // unsigned integer. + inline std::uint64_t umul96_lower64(std::uint32_t x, std::uint64_t y) noexcept { + return x * y; + } + } + + //////////////////////////////////////////////////////////////////////////////////////// + // Some simple utilities for constexpr computation. + //////////////////////////////////////////////////////////////////////////////////////// + + template + constexpr Int compute_power(Int a) noexcept { + static_assert(k >= 0); + Int p = 1; + for (int i = 0; i < k; ++i) { + p *= a; + } + return p; + } + + template + constexpr int count_factors(UInt n) noexcept { + static_assert(a > 1); + int c = 0; + while (n % a == 0) { + n /= a; + ++c; + } + return c; + } + + //////////////////////////////////////////////////////////////////////////////////////// + // Utilities for fast/constexpr log computation. + //////////////////////////////////////////////////////////////////////////////////////// + + namespace log { + static_assert((-1 >> 1) == -1, "right-shift for signed integers must be arithmetic"); + + // Compute floor(e * c - s). + enum class multiply : std::uint32_t {}; + enum class subtract : std::uint32_t {}; + enum class shift : std::size_t {}; + enum class min_exponent : std::int32_t {}; + enum class max_exponent : std::int32_t {}; + + template + constexpr int compute(int e) noexcept { + assert(std::int32_t(e_min) <= e && e <= std::int32_t(e_max)); + return int((std::int32_t(e) * std::int32_t(m) - std::int32_t(f)) >> std::size_t(k)); + } + + // For constexpr computation. + // Returns -1 when n = 0. + template + constexpr int floor_log2(UInt n) noexcept { + int count = -1; + while (n != 0) { + ++count; + n >>= 1; + } + return count; + } + + static constexpr int floor_log10_pow2_min_exponent = -2620; + static constexpr int floor_log10_pow2_max_exponent = 2620; + constexpr int floor_log10_pow2(int e) noexcept { + using namespace log; + return compute(e); + } + + static constexpr int floor_log2_pow10_min_exponent = -1233; + static constexpr int floor_log2_pow10_max_exponent = 1233; + constexpr int floor_log2_pow10(int e) noexcept { + using namespace log; + return compute(e); + } + + static constexpr int floor_log10_pow2_minus_log10_4_over_3_min_exponent = -2985; + static constexpr int floor_log10_pow2_minus_log10_4_over_3_max_exponent = 2936; + constexpr int floor_log10_pow2_minus_log10_4_over_3(int e) noexcept { + using namespace log; + return compute(e); + } + + static constexpr int floor_log5_pow2_min_exponent = -1831; + static constexpr int floor_log5_pow2_max_exponent = 1831; + constexpr int floor_log5_pow2(int e) noexcept { + using namespace log; + return compute(e); + } + + static constexpr int floor_log5_pow2_minus_log5_3_min_exponent = -3543; + static constexpr int floor_log5_pow2_minus_log5_3_max_exponent = 2427; + constexpr int floor_log5_pow2_minus_log5_3(int e) noexcept { + using namespace log; + return compute(e); + } + } + + //////////////////////////////////////////////////////////////////////////////////////// + // Utilities for fast divisibility tests. + //////////////////////////////////////////////////////////////////////////////////////// + + namespace div { + // Replace n by floor(n / 10^N). + // Returns true if and only if n is divisible by 10^N. + // Precondition: n <= 10^(N+1) + // !!It takes an in-out parameter!! + template + struct divide_by_pow10_info; + + template <> + struct divide_by_pow10_info<1> { + static constexpr std::uint32_t magic_number = 6554; + static constexpr int shift_amount = 16; + }; + + template <> + struct divide_by_pow10_info<2> { + static constexpr std::uint32_t magic_number = 656; + static constexpr int shift_amount = 16; + }; + + template + constexpr bool check_divisibility_and_divide_by_pow10(std::uint32_t& n) noexcept { + // Make sure the computation for max_n does not overflow. + static_assert(N + 1 <= log::floor_log10_pow2(31)); + assert(n <= compute_power(std::uint32_t(10))); + + using info = divide_by_pow10_info; + n *= info::magic_number; + + constexpr auto mask = std::uint32_t(std::uint32_t(1) << info::shift_amount) - 1; + bool result = ((n & mask) < info::magic_number); + + n >>= info::shift_amount; + return result; + } + + // Compute floor(n / 10^N) for small n and N. + // Precondition: n <= 10^(N+1) + template + constexpr std::uint32_t small_division_by_pow10(std::uint32_t n) noexcept { + // Make sure the computation for max_n does not overflow. + static_assert(N + 1 <= log::floor_log10_pow2(31)); + assert(n <= compute_power(std::uint32_t(10))); + + return (n * divide_by_pow10_info::magic_number) >> + divide_by_pow10_info::shift_amount; + } + + // Compute floor(n / 10^N) for small N. + // Precondition: n <= n_max + template + constexpr UInt divide_by_pow10(UInt n) noexcept { + static_assert(N >= 0); + + // Specialize for 32-bit division by 100. + // Compiler is supposed to generate the identical code for just writing + // "n / 100", but for some reason MSVC generates an inefficient code + // (mul + mov for no apparent reason, instead of single imul), + // so we does this manually. + if constexpr (std::is_same_v && N == 2) { + return std::uint32_t(wuint::umul64(n, std::uint32_t(1374389535)) >> 37); + } + // Specialize for 64-bit division by 1000. + // Ensure that the correctness condition is met. + if constexpr (std::is_same_v && N == 3 && + n_max <= std::uint64_t(15534100272597517998ull)) { + return wuint::umul128_upper64(n, std::uint64_t(2361183241434822607ull)) >> 7; + } + else { + constexpr auto divisor = compute_power(UInt(10)); + return n / divisor; + } + } + } + } + + //////////////////////////////////////////////////////////////////////////////////////// + // Return types for the main interface function. + //////////////////////////////////////////////////////////////////////////////////////// + + template + struct decimal_fp; + + template + struct decimal_fp { + using carrier_uint = UInt; + + carrier_uint significand; + int exponent; + }; + + template + struct decimal_fp { + using carrier_uint = UInt; + + carrier_uint significand; + int exponent; + bool is_negative; + }; + + template + struct decimal_fp { + using carrier_uint = UInt; + + carrier_uint significand; + int exponent; + bool may_have_trailing_zeros; + }; + + template + struct decimal_fp { + using carrier_uint = UInt; + + carrier_uint significand; + int exponent; + bool is_negative; + bool may_have_trailing_zeros; + }; + + template + using unsigned_decimal_fp = decimal_fp; + + template + using signed_decimal_fp = decimal_fp; + + + //////////////////////////////////////////////////////////////////////////////////////// + // Computed cache entries. + //////////////////////////////////////////////////////////////////////////////////////// + + namespace detail { + template + struct cache_holder; + + template <> + struct cache_holder { + using cache_entry_type = std::uint64_t; + static constexpr int cache_bits = 64; + static constexpr int min_k = -31; + static constexpr int max_k = 46; + static constexpr cache_entry_type cache[] = { + 0x81ceb32c4b43fcf5, 0xa2425ff75e14fc32, 0xcad2f7f5359a3b3f, 0xfd87b5f28300ca0e, + 0x9e74d1b791e07e49, 0xc612062576589ddb, 0xf79687aed3eec552, 0x9abe14cd44753b53, + 0xc16d9a0095928a28, 0xf1c90080baf72cb2, 0x971da05074da7bef, 0xbce5086492111aeb, + 0xec1e4a7db69561a6, 0x9392ee8e921d5d08, 0xb877aa3236a4b44a, 0xe69594bec44de15c, + 0x901d7cf73ab0acda, 0xb424dc35095cd810, 0xe12e13424bb40e14, 0x8cbccc096f5088cc, + 0xafebff0bcb24aaff, 0xdbe6fecebdedd5bf, 0x89705f4136b4a598, 0xabcc77118461cefd, + 0xd6bf94d5e57a42bd, 0x8637bd05af6c69b6, 0xa7c5ac471b478424, 0xd1b71758e219652c, + 0x83126e978d4fdf3c, 0xa3d70a3d70a3d70b, 0xcccccccccccccccd, 0x8000000000000000, + 0xa000000000000000, 0xc800000000000000, 0xfa00000000000000, 0x9c40000000000000, + 0xc350000000000000, 0xf424000000000000, 0x9896800000000000, 0xbebc200000000000, + 0xee6b280000000000, 0x9502f90000000000, 0xba43b74000000000, 0xe8d4a51000000000, + 0x9184e72a00000000, 0xb5e620f480000000, 0xe35fa931a0000000, 0x8e1bc9bf04000000, + 0xb1a2bc2ec5000000, 0xde0b6b3a76400000, 0x8ac7230489e80000, 0xad78ebc5ac620000, + 0xd8d726b7177a8000, 0x878678326eac9000, 0xa968163f0a57b400, 0xd3c21bcecceda100, + 0x84595161401484a0, 0xa56fa5b99019a5c8, 0xcecb8f27f4200f3a, 0x813f3978f8940985, + 0xa18f07d736b90be6, 0xc9f2c9cd04674edf, 0xfc6f7c4045812297, 0x9dc5ada82b70b59e, + 0xc5371912364ce306, 0xf684df56c3e01bc7, 0x9a130b963a6c115d, 0xc097ce7bc90715b4, + 0xf0bdc21abb48db21, 0x96769950b50d88f5, 0xbc143fa4e250eb32, 0xeb194f8e1ae525fe, + 0x92efd1b8d0cf37bf, 0xb7abc627050305ae, 0xe596b7b0c643c71a, 0x8f7e32ce7bea5c70, + 0xb35dbf821ae4f38c, 0xe0352f62a19e306f}; + }; + + template <> + struct cache_holder { + using cache_entry_type = wuint::uint128; + static constexpr int cache_bits = 128; + static constexpr int min_k = -292; + static constexpr int max_k = 326; + static constexpr cache_entry_type cache[] = { + {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b}, {0x9faacf3df73609b1, 0x77b191618c54e9ad}, + {0xc795830d75038c1d, 0xd59df5b9ef6a2418}, {0xf97ae3d0d2446f25, 0x4b0573286b44ad1e}, + {0x9becce62836ac577, 0x4ee367f9430aec33}, {0xc2e801fb244576d5, 0x229c41f793cda740}, + {0xf3a20279ed56d48a, 0x6b43527578c11110}, {0x9845418c345644d6, 0x830a13896b78aaaa}, + {0xbe5691ef416bd60c, 0x23cc986bc656d554}, {0xedec366b11c6cb8f, 0x2cbfbe86b7ec8aa9}, + {0x94b3a202eb1c3f39, 0x7bf7d71432f3d6aa}, {0xb9e08a83a5e34f07, 0xdaf5ccd93fb0cc54}, + {0xe858ad248f5c22c9, 0xd1b3400f8f9cff69}, {0x91376c36d99995be, 0x23100809b9c21fa2}, + {0xb58547448ffffb2d, 0xabd40a0c2832a78b}, {0xe2e69915b3fff9f9, 0x16c90c8f323f516d}, + {0x8dd01fad907ffc3b, 0xae3da7d97f6792e4}, {0xb1442798f49ffb4a, 0x99cd11cfdf41779d}, + {0xdd95317f31c7fa1d, 0x40405643d711d584}, {0x8a7d3eef7f1cfc52, 0x482835ea666b2573}, + {0xad1c8eab5ee43b66, 0xda3243650005eed0}, {0xd863b256369d4a40, 0x90bed43e40076a83}, + {0x873e4f75e2224e68, 0x5a7744a6e804a292}, {0xa90de3535aaae202, 0x711515d0a205cb37}, + {0xd3515c2831559a83, 0x0d5a5b44ca873e04}, {0x8412d9991ed58091, 0xe858790afe9486c3}, + {0xa5178fff668ae0b6, 0x626e974dbe39a873}, {0xce5d73ff402d98e3, 0xfb0a3d212dc81290}, + {0x80fa687f881c7f8e, 0x7ce66634bc9d0b9a}, {0xa139029f6a239f72, 0x1c1fffc1ebc44e81}, + {0xc987434744ac874e, 0xa327ffb266b56221}, {0xfbe9141915d7a922, 0x4bf1ff9f0062baa9}, + {0x9d71ac8fada6c9b5, 0x6f773fc3603db4aa}, {0xc4ce17b399107c22, 0xcb550fb4384d21d4}, + {0xf6019da07f549b2b, 0x7e2a53a146606a49}, {0x99c102844f94e0fb, 0x2eda7444cbfc426e}, + {0xc0314325637a1939, 0xfa911155fefb5309}, {0xf03d93eebc589f88, 0x793555ab7eba27cb}, + {0x96267c7535b763b5, 0x4bc1558b2f3458df}, {0xbbb01b9283253ca2, 0x9eb1aaedfb016f17}, + {0xea9c227723ee8bcb, 0x465e15a979c1cadd}, {0x92a1958a7675175f, 0x0bfacd89ec191eca}, + {0xb749faed14125d36, 0xcef980ec671f667c}, {0xe51c79a85916f484, 0x82b7e12780e7401b}, + {0x8f31cc0937ae58d2, 0xd1b2ecb8b0908811}, {0xb2fe3f0b8599ef07, 0x861fa7e6dcb4aa16}, + {0xdfbdcece67006ac9, 0x67a791e093e1d49b}, {0x8bd6a141006042bd, 0xe0c8bb2c5c6d24e1}, + {0xaecc49914078536d, 0x58fae9f773886e19}, {0xda7f5bf590966848, 0xaf39a475506a899f}, + {0x888f99797a5e012d, 0x6d8406c952429604}, {0xaab37fd7d8f58178, 0xc8e5087ba6d33b84}, + {0xd5605fcdcf32e1d6, 0xfb1e4a9a90880a65}, {0x855c3be0a17fcd26, 0x5cf2eea09a550680}, + {0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481f}, {0xd0601d8efc57b08b, 0xf13b94daf124da27}, + {0x823c12795db6ce57, 0x76c53d08d6b70859}, {0xa2cb1717b52481ed, 0x54768c4b0c64ca6f}, + {0xcb7ddcdda26da268, 0xa9942f5dcf7dfd0a}, {0xfe5d54150b090b02, 0xd3f93b35435d7c4d}, + {0x9efa548d26e5a6e1, 0xc47bc5014a1a6db0}, {0xc6b8e9b0709f109a, 0x359ab6419ca1091c}, + {0xf867241c8cc6d4c0, 0xc30163d203c94b63}, {0x9b407691d7fc44f8, 0x79e0de63425dcf1e}, + {0xc21094364dfb5636, 0x985915fc12f542e5}, {0xf294b943e17a2bc4, 0x3e6f5b7b17b2939e}, + {0x979cf3ca6cec5b5a, 0xa705992ceecf9c43}, {0xbd8430bd08277231, 0x50c6ff782a838354}, + {0xece53cec4a314ebd, 0xa4f8bf5635246429}, {0x940f4613ae5ed136, 0x871b7795e136be9a}, + {0xb913179899f68584, 0x28e2557b59846e40}, {0xe757dd7ec07426e5, 0x331aeada2fe589d0}, + {0x9096ea6f3848984f, 0x3ff0d2c85def7622}, {0xb4bca50b065abe63, 0x0fed077a756b53aa}, + {0xe1ebce4dc7f16dfb, 0xd3e8495912c62895}, {0x8d3360f09cf6e4bd, 0x64712dd7abbbd95d}, + {0xb080392cc4349dec, 0xbd8d794d96aacfb4}, {0xdca04777f541c567, 0xecf0d7a0fc5583a1}, + {0x89e42caaf9491b60, 0xf41686c49db57245}, {0xac5d37d5b79b6239, 0x311c2875c522ced6}, + {0xd77485cb25823ac7, 0x7d633293366b828c}, {0x86a8d39ef77164bc, 0xae5dff9c02033198}, + {0xa8530886b54dbdeb, 0xd9f57f830283fdfd}, {0xd267caa862a12d66, 0xd072df63c324fd7c}, + {0x8380dea93da4bc60, 0x4247cb9e59f71e6e}, {0xa46116538d0deb78, 0x52d9be85f074e609}, + {0xcd795be870516656, 0x67902e276c921f8c}, {0x806bd9714632dff6, 0x00ba1cd8a3db53b7}, + {0xa086cfcd97bf97f3, 0x80e8a40eccd228a5}, {0xc8a883c0fdaf7df0, 0x6122cd128006b2ce}, + {0xfad2a4b13d1b5d6c, 0x796b805720085f82}, {0x9cc3a6eec6311a63, 0xcbe3303674053bb1}, + {0xc3f490aa77bd60fc, 0xbedbfc4411068a9d}, {0xf4f1b4d515acb93b, 0xee92fb5515482d45}, + {0x991711052d8bf3c5, 0x751bdd152d4d1c4b}, {0xbf5cd54678eef0b6, 0xd262d45a78a0635e}, + {0xef340a98172aace4, 0x86fb897116c87c35}, {0x9580869f0e7aac0e, 0xd45d35e6ae3d4da1}, + {0xbae0a846d2195712, 0x8974836059cca10a}, {0xe998d258869facd7, 0x2bd1a438703fc94c}, + {0x91ff83775423cc06, 0x7b6306a34627ddd0}, {0xb67f6455292cbf08, 0x1a3bc84c17b1d543}, + {0xe41f3d6a7377eeca, 0x20caba5f1d9e4a94}, {0x8e938662882af53e, 0x547eb47b7282ee9d}, + {0xb23867fb2a35b28d, 0xe99e619a4f23aa44}, {0xdec681f9f4c31f31, 0x6405fa00e2ec94d5}, + {0x8b3c113c38f9f37e, 0xde83bc408dd3dd05}, {0xae0b158b4738705e, 0x9624ab50b148d446}, + {0xd98ddaee19068c76, 0x3badd624dd9b0958}, {0x87f8a8d4cfa417c9, 0xe54ca5d70a80e5d7}, + {0xa9f6d30a038d1dbc, 0x5e9fcf4ccd211f4d}, {0xd47487cc8470652b, 0x7647c32000696720}, + {0x84c8d4dfd2c63f3b, 0x29ecd9f40041e074}, {0xa5fb0a17c777cf09, 0xf468107100525891}, + {0xcf79cc9db955c2cc, 0x7182148d4066eeb5}, {0x81ac1fe293d599bf, 0xc6f14cd848405531}, + {0xa21727db38cb002f, 0xb8ada00e5a506a7d}, {0xca9cf1d206fdc03b, 0xa6d90811f0e4851d}, + {0xfd442e4688bd304a, 0x908f4a166d1da664}, {0x9e4a9cec15763e2e, 0x9a598e4e043287ff}, + {0xc5dd44271ad3cdba, 0x40eff1e1853f29fe}, {0xf7549530e188c128, 0xd12bee59e68ef47d}, + {0x9a94dd3e8cf578b9, 0x82bb74f8301958cf}, {0xc13a148e3032d6e7, 0xe36a52363c1faf02}, + {0xf18899b1bc3f8ca1, 0xdc44e6c3cb279ac2}, {0x96f5600f15a7b7e5, 0x29ab103a5ef8c0ba}, + {0xbcb2b812db11a5de, 0x7415d448f6b6f0e8}, {0xebdf661791d60f56, 0x111b495b3464ad22}, + {0x936b9fcebb25c995, 0xcab10dd900beec35}, {0xb84687c269ef3bfb, 0x3d5d514f40eea743}, + {0xe65829b3046b0afa, 0x0cb4a5a3112a5113}, {0x8ff71a0fe2c2e6dc, 0x47f0e785eaba72ac}, + {0xb3f4e093db73a093, 0x59ed216765690f57}, {0xe0f218b8d25088b8, 0x306869c13ec3532d}, + {0x8c974f7383725573, 0x1e414218c73a13fc}, {0xafbd2350644eeacf, 0xe5d1929ef90898fb}, + {0xdbac6c247d62a583, 0xdf45f746b74abf3a}, {0x894bc396ce5da772, 0x6b8bba8c328eb784}, + {0xab9eb47c81f5114f, 0x066ea92f3f326565}, {0xd686619ba27255a2, 0xc80a537b0efefebe}, + {0x8613fd0145877585, 0xbd06742ce95f5f37}, {0xa798fc4196e952e7, 0x2c48113823b73705}, + {0xd17f3b51fca3a7a0, 0xf75a15862ca504c6}, {0x82ef85133de648c4, 0x9a984d73dbe722fc}, + {0xa3ab66580d5fdaf5, 0xc13e60d0d2e0ebbb}, {0xcc963fee10b7d1b3, 0x318df905079926a9}, + {0xffbbcfe994e5c61f, 0xfdf17746497f7053}, {0x9fd561f1fd0f9bd3, 0xfeb6ea8bedefa634}, + {0xc7caba6e7c5382c8, 0xfe64a52ee96b8fc1}, {0xf9bd690a1b68637b, 0x3dfdce7aa3c673b1}, + {0x9c1661a651213e2d, 0x06bea10ca65c084f}, {0xc31bfa0fe5698db8, 0x486e494fcff30a63}, + {0xf3e2f893dec3f126, 0x5a89dba3c3efccfb}, {0x986ddb5c6b3a76b7, 0xf89629465a75e01d}, + {0xbe89523386091465, 0xf6bbb397f1135824}, {0xee2ba6c0678b597f, 0x746aa07ded582e2d}, + {0x94db483840b717ef, 0xa8c2a44eb4571cdd}, {0xba121a4650e4ddeb, 0x92f34d62616ce414}, + {0xe896a0d7e51e1566, 0x77b020baf9c81d18}, {0x915e2486ef32cd60, 0x0ace1474dc1d122f}, + {0xb5b5ada8aaff80b8, 0x0d819992132456bb}, {0xe3231912d5bf60e6, 0x10e1fff697ed6c6a}, + {0x8df5efabc5979c8f, 0xca8d3ffa1ef463c2}, {0xb1736b96b6fd83b3, 0xbd308ff8a6b17cb3}, + {0xddd0467c64bce4a0, 0xac7cb3f6d05ddbdf}, {0x8aa22c0dbef60ee4, 0x6bcdf07a423aa96c}, + {0xad4ab7112eb3929d, 0x86c16c98d2c953c7}, {0xd89d64d57a607744, 0xe871c7bf077ba8b8}, + {0x87625f056c7c4a8b, 0x11471cd764ad4973}, {0xa93af6c6c79b5d2d, 0xd598e40d3dd89bd0}, + {0xd389b47879823479, 0x4aff1d108d4ec2c4}, {0x843610cb4bf160cb, 0xcedf722a585139bb}, + {0xa54394fe1eedb8fe, 0xc2974eb4ee658829}, {0xce947a3da6a9273e, 0x733d226229feea33}, + {0x811ccc668829b887, 0x0806357d5a3f5260}, {0xa163ff802a3426a8, 0xca07c2dcb0cf26f8}, + {0xc9bcff6034c13052, 0xfc89b393dd02f0b6}, {0xfc2c3f3841f17c67, 0xbbac2078d443ace3}, + {0x9d9ba7832936edc0, 0xd54b944b84aa4c0e}, {0xc5029163f384a931, 0x0a9e795e65d4df12}, + {0xf64335bcf065d37d, 0x4d4617b5ff4a16d6}, {0x99ea0196163fa42e, 0x504bced1bf8e4e46}, + {0xc06481fb9bcf8d39, 0xe45ec2862f71e1d7}, {0xf07da27a82c37088, 0x5d767327bb4e5a4d}, + {0x964e858c91ba2655, 0x3a6a07f8d510f870}, {0xbbe226efb628afea, 0x890489f70a55368c}, + {0xeadab0aba3b2dbe5, 0x2b45ac74ccea842f}, {0x92c8ae6b464fc96f, 0x3b0b8bc90012929e}, + {0xb77ada0617e3bbcb, 0x09ce6ebb40173745}, {0xe55990879ddcaabd, 0xcc420a6a101d0516}, + {0x8f57fa54c2a9eab6, 0x9fa946824a12232e}, {0xb32df8e9f3546564, 0x47939822dc96abfa}, + {0xdff9772470297ebd, 0x59787e2b93bc56f8}, {0x8bfbea76c619ef36, 0x57eb4edb3c55b65b}, + {0xaefae51477a06b03, 0xede622920b6b23f2}, {0xdab99e59958885c4, 0xe95fab368e45ecee}, + {0x88b402f7fd75539b, 0x11dbcb0218ebb415}, {0xaae103b5fcd2a881, 0xd652bdc29f26a11a}, + {0xd59944a37c0752a2, 0x4be76d3346f04960}, {0x857fcae62d8493a5, 0x6f70a4400c562ddc}, + {0xa6dfbd9fb8e5b88e, 0xcb4ccd500f6bb953}, {0xd097ad07a71f26b2, 0x7e2000a41346a7a8}, + {0x825ecc24c873782f, 0x8ed400668c0c28c9}, {0xa2f67f2dfa90563b, 0x728900802f0f32fb}, + {0xcbb41ef979346bca, 0x4f2b40a03ad2ffba}, {0xfea126b7d78186bc, 0xe2f610c84987bfa9}, + {0x9f24b832e6b0f436, 0x0dd9ca7d2df4d7ca}, {0xc6ede63fa05d3143, 0x91503d1c79720dbc}, + {0xf8a95fcf88747d94, 0x75a44c6397ce912b}, {0x9b69dbe1b548ce7c, 0xc986afbe3ee11abb}, + {0xc24452da229b021b, 0xfbe85badce996169}, {0xf2d56790ab41c2a2, 0xfae27299423fb9c4}, + {0x97c560ba6b0919a5, 0xdccd879fc967d41b}, {0xbdb6b8e905cb600f, 0x5400e987bbc1c921}, + {0xed246723473e3813, 0x290123e9aab23b69}, {0x9436c0760c86e30b, 0xf9a0b6720aaf6522}, + {0xb94470938fa89bce, 0xf808e40e8d5b3e6a}, {0xe7958cb87392c2c2, 0xb60b1d1230b20e05}, + {0x90bd77f3483bb9b9, 0xb1c6f22b5e6f48c3}, {0xb4ecd5f01a4aa828, 0x1e38aeb6360b1af4}, + {0xe2280b6c20dd5232, 0x25c6da63c38de1b1}, {0x8d590723948a535f, 0x579c487e5a38ad0f}, + {0xb0af48ec79ace837, 0x2d835a9df0c6d852}, {0xdcdb1b2798182244, 0xf8e431456cf88e66}, + {0x8a08f0f8bf0f156b, 0x1b8e9ecb641b5900}, {0xac8b2d36eed2dac5, 0xe272467e3d222f40}, + {0xd7adf884aa879177, 0x5b0ed81dcc6abb10}, {0x86ccbb52ea94baea, 0x98e947129fc2b4ea}, + {0xa87fea27a539e9a5, 0x3f2398d747b36225}, {0xd29fe4b18e88640e, 0x8eec7f0d19a03aae}, + {0x83a3eeeef9153e89, 0x1953cf68300424ad}, {0xa48ceaaab75a8e2b, 0x5fa8c3423c052dd8}, + {0xcdb02555653131b6, 0x3792f412cb06794e}, {0x808e17555f3ebf11, 0xe2bbd88bbee40bd1}, + {0xa0b19d2ab70e6ed6, 0x5b6aceaeae9d0ec5}, {0xc8de047564d20a8b, 0xf245825a5a445276}, + {0xfb158592be068d2e, 0xeed6e2f0f0d56713}, {0x9ced737bb6c4183d, 0x55464dd69685606c}, + {0xc428d05aa4751e4c, 0xaa97e14c3c26b887}, {0xf53304714d9265df, 0xd53dd99f4b3066a9}, + {0x993fe2c6d07b7fab, 0xe546a8038efe402a}, {0xbf8fdb78849a5f96, 0xde98520472bdd034}, + {0xef73d256a5c0f77c, 0x963e66858f6d4441}, {0x95a8637627989aad, 0xdde7001379a44aa9}, + {0xbb127c53b17ec159, 0x5560c018580d5d53}, {0xe9d71b689dde71af, 0xaab8f01e6e10b4a7}, + {0x9226712162ab070d, 0xcab3961304ca70e9}, {0xb6b00d69bb55c8d1, 0x3d607b97c5fd0d23}, + {0xe45c10c42a2b3b05, 0x8cb89a7db77c506b}, {0x8eb98a7a9a5b04e3, 0x77f3608e92adb243}, + {0xb267ed1940f1c61c, 0x55f038b237591ed4}, {0xdf01e85f912e37a3, 0x6b6c46dec52f6689}, + {0x8b61313bbabce2c6, 0x2323ac4b3b3da016}, {0xae397d8aa96c1b77, 0xabec975e0a0d081b}, + {0xd9c7dced53c72255, 0x96e7bd358c904a22}, {0x881cea14545c7575, 0x7e50d64177da2e55}, + {0xaa242499697392d2, 0xdde50bd1d5d0b9ea}, {0xd4ad2dbfc3d07787, 0x955e4ec64b44e865}, + {0x84ec3c97da624ab4, 0xbd5af13bef0b113f}, {0xa6274bbdd0fadd61, 0xecb1ad8aeacdd58f}, + {0xcfb11ead453994ba, 0x67de18eda5814af3}, {0x81ceb32c4b43fcf4, 0x80eacf948770ced8}, + {0xa2425ff75e14fc31, 0xa1258379a94d028e}, {0xcad2f7f5359a3b3e, 0x096ee45813a04331}, + {0xfd87b5f28300ca0d, 0x8bca9d6e188853fd}, {0x9e74d1b791e07e48, 0x775ea264cf55347e}, + {0xc612062576589dda, 0x95364afe032a819e}, {0xf79687aed3eec551, 0x3a83ddbd83f52205}, + {0x9abe14cd44753b52, 0xc4926a9672793543}, {0xc16d9a0095928a27, 0x75b7053c0f178294}, + {0xf1c90080baf72cb1, 0x5324c68b12dd6339}, {0x971da05074da7bee, 0xd3f6fc16ebca5e04}, + {0xbce5086492111aea, 0x88f4bb1ca6bcf585}, {0xec1e4a7db69561a5, 0x2b31e9e3d06c32e6}, + {0x9392ee8e921d5d07, 0x3aff322e62439fd0}, {0xb877aa3236a4b449, 0x09befeb9fad487c3}, + {0xe69594bec44de15b, 0x4c2ebe687989a9b4}, {0x901d7cf73ab0acd9, 0x0f9d37014bf60a11}, + {0xb424dc35095cd80f, 0x538484c19ef38c95}, {0xe12e13424bb40e13, 0x2865a5f206b06fba}, + {0x8cbccc096f5088cb, 0xf93f87b7442e45d4}, {0xafebff0bcb24aafe, 0xf78f69a51539d749}, + {0xdbe6fecebdedd5be, 0xb573440e5a884d1c}, {0x89705f4136b4a597, 0x31680a88f8953031}, + {0xabcc77118461cefc, 0xfdc20d2b36ba7c3e}, {0xd6bf94d5e57a42bc, 0x3d32907604691b4d}, + {0x8637bd05af6c69b5, 0xa63f9a49c2c1b110}, {0xa7c5ac471b478423, 0x0fcf80dc33721d54}, + {0xd1b71758e219652b, 0xd3c36113404ea4a9}, {0x83126e978d4fdf3b, 0x645a1cac083126ea}, + {0xa3d70a3d70a3d70a, 0x3d70a3d70a3d70a4}, {0xcccccccccccccccc, 0xcccccccccccccccd}, + {0x8000000000000000, 0x0000000000000000}, {0xa000000000000000, 0x0000000000000000}, + {0xc800000000000000, 0x0000000000000000}, {0xfa00000000000000, 0x0000000000000000}, + {0x9c40000000000000, 0x0000000000000000}, {0xc350000000000000, 0x0000000000000000}, + {0xf424000000000000, 0x0000000000000000}, {0x9896800000000000, 0x0000000000000000}, + {0xbebc200000000000, 0x0000000000000000}, {0xee6b280000000000, 0x0000000000000000}, + {0x9502f90000000000, 0x0000000000000000}, {0xba43b74000000000, 0x0000000000000000}, + {0xe8d4a51000000000, 0x0000000000000000}, {0x9184e72a00000000, 0x0000000000000000}, + {0xb5e620f480000000, 0x0000000000000000}, {0xe35fa931a0000000, 0x0000000000000000}, + {0x8e1bc9bf04000000, 0x0000000000000000}, {0xb1a2bc2ec5000000, 0x0000000000000000}, + {0xde0b6b3a76400000, 0x0000000000000000}, {0x8ac7230489e80000, 0x0000000000000000}, + {0xad78ebc5ac620000, 0x0000000000000000}, {0xd8d726b7177a8000, 0x0000000000000000}, + {0x878678326eac9000, 0x0000000000000000}, {0xa968163f0a57b400, 0x0000000000000000}, + {0xd3c21bcecceda100, 0x0000000000000000}, {0x84595161401484a0, 0x0000000000000000}, + {0xa56fa5b99019a5c8, 0x0000000000000000}, {0xcecb8f27f4200f3a, 0x0000000000000000}, + {0x813f3978f8940984, 0x4000000000000000}, {0xa18f07d736b90be5, 0x5000000000000000}, + {0xc9f2c9cd04674ede, 0xa400000000000000}, {0xfc6f7c4045812296, 0x4d00000000000000}, + {0x9dc5ada82b70b59d, 0xf020000000000000}, {0xc5371912364ce305, 0x6c28000000000000}, + {0xf684df56c3e01bc6, 0xc732000000000000}, {0x9a130b963a6c115c, 0x3c7f400000000000}, + {0xc097ce7bc90715b3, 0x4b9f100000000000}, {0xf0bdc21abb48db20, 0x1e86d40000000000}, + {0x96769950b50d88f4, 0x1314448000000000}, {0xbc143fa4e250eb31, 0x17d955a000000000}, + {0xeb194f8e1ae525fd, 0x5dcfab0800000000}, {0x92efd1b8d0cf37be, 0x5aa1cae500000000}, + {0xb7abc627050305ad, 0xf14a3d9e40000000}, {0xe596b7b0c643c719, 0x6d9ccd05d0000000}, + {0x8f7e32ce7bea5c6f, 0xe4820023a2000000}, {0xb35dbf821ae4f38b, 0xdda2802c8a800000}, + {0xe0352f62a19e306e, 0xd50b2037ad200000}, {0x8c213d9da502de45, 0x4526f422cc340000}, + {0xaf298d050e4395d6, 0x9670b12b7f410000}, {0xdaf3f04651d47b4c, 0x3c0cdd765f114000}, + {0x88d8762bf324cd0f, 0xa5880a69fb6ac800}, {0xab0e93b6efee0053, 0x8eea0d047a457a00}, + {0xd5d238a4abe98068, 0x72a4904598d6d880}, {0x85a36366eb71f041, 0x47a6da2b7f864750}, + {0xa70c3c40a64e6c51, 0x999090b65f67d924}, {0xd0cf4b50cfe20765, 0xfff4b4e3f741cf6d}, + {0x82818f1281ed449f, 0xbff8f10e7a8921a5}, {0xa321f2d7226895c7, 0xaff72d52192b6a0e}, + {0xcbea6f8ceb02bb39, 0x9bf4f8a69f764491}, {0xfee50b7025c36a08, 0x02f236d04753d5b5}, + {0x9f4f2726179a2245, 0x01d762422c946591}, {0xc722f0ef9d80aad6, 0x424d3ad2b7b97ef6}, + {0xf8ebad2b84e0d58b, 0xd2e0898765a7deb3}, {0x9b934c3b330c8577, 0x63cc55f49f88eb30}, + {0xc2781f49ffcfa6d5, 0x3cbf6b71c76b25fc}, {0xf316271c7fc3908a, 0x8bef464e3945ef7b}, + {0x97edd871cfda3a56, 0x97758bf0e3cbb5ad}, {0xbde94e8e43d0c8ec, 0x3d52eeed1cbea318}, + {0xed63a231d4c4fb27, 0x4ca7aaa863ee4bde}, {0x945e455f24fb1cf8, 0x8fe8caa93e74ef6b}, + {0xb975d6b6ee39e436, 0xb3e2fd538e122b45}, {0xe7d34c64a9c85d44, 0x60dbbca87196b617}, + {0x90e40fbeea1d3a4a, 0xbc8955e946fe31ce}, {0xb51d13aea4a488dd, 0x6babab6398bdbe42}, + {0xe264589a4dcdab14, 0xc696963c7eed2dd2}, {0x8d7eb76070a08aec, 0xfc1e1de5cf543ca3}, + {0xb0de65388cc8ada8, 0x3b25a55f43294bcc}, {0xdd15fe86affad912, 0x49ef0eb713f39ebf}, + {0x8a2dbf142dfcc7ab, 0x6e3569326c784338}, {0xacb92ed9397bf996, 0x49c2c37f07965405}, + {0xd7e77a8f87daf7fb, 0xdc33745ec97be907}, {0x86f0ac99b4e8dafd, 0x69a028bb3ded71a4}, + {0xa8acd7c0222311bc, 0xc40832ea0d68ce0d}, {0xd2d80db02aabd62b, 0xf50a3fa490c30191}, + {0x83c7088e1aab65db, 0x792667c6da79e0fb}, {0xa4b8cab1a1563f52, 0x577001b891185939}, + {0xcde6fd5e09abcf26, 0xed4c0226b55e6f87}, {0x80b05e5ac60b6178, 0x544f8158315b05b5}, + {0xa0dc75f1778e39d6, 0x696361ae3db1c722}, {0xc913936dd571c84c, 0x03bc3a19cd1e38ea}, + {0xfb5878494ace3a5f, 0x04ab48a04065c724}, {0x9d174b2dcec0e47b, 0x62eb0d64283f9c77}, + {0xc45d1df942711d9a, 0x3ba5d0bd324f8395}, {0xf5746577930d6500, 0xca8f44ec7ee3647a}, + {0x9968bf6abbe85f20, 0x7e998b13cf4e1ecc}, {0xbfc2ef456ae276e8, 0x9e3fedd8c321a67f}, + {0xefb3ab16c59b14a2, 0xc5cfe94ef3ea101f}, {0x95d04aee3b80ece5, 0xbba1f1d158724a13}, + {0xbb445da9ca61281f, 0x2a8a6e45ae8edc98}, {0xea1575143cf97226, 0xf52d09d71a3293be}, + {0x924d692ca61be758, 0x593c2626705f9c57}, {0xb6e0c377cfa2e12e, 0x6f8b2fb00c77836d}, + {0xe498f455c38b997a, 0x0b6dfb9c0f956448}, {0x8edf98b59a373fec, 0x4724bd4189bd5ead}, + {0xb2977ee300c50fe7, 0x58edec91ec2cb658}, {0xdf3d5e9bc0f653e1, 0x2f2967b66737e3ee}, + {0x8b865b215899f46c, 0xbd79e0d20082ee75}, {0xae67f1e9aec07187, 0xecd8590680a3aa12}, + {0xda01ee641a708de9, 0xe80e6f4820cc9496}, {0x884134fe908658b2, 0x3109058d147fdcde}, + {0xaa51823e34a7eede, 0xbd4b46f0599fd416}, {0xd4e5e2cdc1d1ea96, 0x6c9e18ac7007c91b}, + {0x850fadc09923329e, 0x03e2cf6bc604ddb1}, {0xa6539930bf6bff45, 0x84db8346b786151d}, + {0xcfe87f7cef46ff16, 0xe612641865679a64}, {0x81f14fae158c5f6e, 0x4fcb7e8f3f60c07f}, + {0xa26da3999aef7749, 0xe3be5e330f38f09e}, {0xcb090c8001ab551c, 0x5cadf5bfd3072cc6}, + {0xfdcb4fa002162a63, 0x73d9732fc7c8f7f7}, {0x9e9f11c4014dda7e, 0x2867e7fddcdd9afb}, + {0xc646d63501a1511d, 0xb281e1fd541501b9}, {0xf7d88bc24209a565, 0x1f225a7ca91a4227}, + {0x9ae757596946075f, 0x3375788de9b06959}, {0xc1a12d2fc3978937, 0x0052d6b1641c83af}, + {0xf209787bb47d6b84, 0xc0678c5dbd23a49b}, {0x9745eb4d50ce6332, 0xf840b7ba963646e1}, + {0xbd176620a501fbff, 0xb650e5a93bc3d899}, {0xec5d3fa8ce427aff, 0xa3e51f138ab4cebf}, + {0x93ba47c980e98cdf, 0xc66f336c36b10138}, {0xb8a8d9bbe123f017, 0xb80b0047445d4185}, + {0xe6d3102ad96cec1d, 0xa60dc059157491e6}, {0x9043ea1ac7e41392, 0x87c89837ad68db30}, + {0xb454e4a179dd1877, 0x29babe4598c311fc}, {0xe16a1dc9d8545e94, 0xf4296dd6fef3d67b}, + {0x8ce2529e2734bb1d, 0x1899e4a65f58660d}, {0xb01ae745b101e9e4, 0x5ec05dcff72e7f90}, + {0xdc21a1171d42645d, 0x76707543f4fa1f74}, {0x899504ae72497eba, 0x6a06494a791c53a9}, + {0xabfa45da0edbde69, 0x0487db9d17636893}, {0xd6f8d7509292d603, 0x45a9d2845d3c42b7}, + {0x865b86925b9bc5c2, 0x0b8a2392ba45a9b3}, {0xa7f26836f282b732, 0x8e6cac7768d7141f}, + {0xd1ef0244af2364ff, 0x3207d795430cd927}, {0x8335616aed761f1f, 0x7f44e6bd49e807b9}, + {0xa402b9c5a8d3a6e7, 0x5f16206c9c6209a7}, {0xcd036837130890a1, 0x36dba887c37a8c10}, + {0x802221226be55a64, 0xc2494954da2c978a}, {0xa02aa96b06deb0fd, 0xf2db9baa10b7bd6d}, + {0xc83553c5c8965d3d, 0x6f92829494e5acc8}, {0xfa42a8b73abbf48c, 0xcb772339ba1f17fa}, + {0x9c69a97284b578d7, 0xff2a760414536efc}, {0xc38413cf25e2d70d, 0xfef5138519684abb}, + {0xf46518c2ef5b8cd1, 0x7eb258665fc25d6a}, {0x98bf2f79d5993802, 0xef2f773ffbd97a62}, + {0xbeeefb584aff8603, 0xaafb550ffacfd8fb}, {0xeeaaba2e5dbf6784, 0x95ba2a53f983cf39}, + {0x952ab45cfa97a0b2, 0xdd945a747bf26184}, {0xba756174393d88df, 0x94f971119aeef9e5}, + {0xe912b9d1478ceb17, 0x7a37cd5601aab85e}, {0x91abb422ccb812ee, 0xac62e055c10ab33b}, + {0xb616a12b7fe617aa, 0x577b986b314d600a}, {0xe39c49765fdf9d94, 0xed5a7e85fda0b80c}, + {0x8e41ade9fbebc27d, 0x14588f13be847308}, {0xb1d219647ae6b31c, 0x596eb2d8ae258fc9}, + {0xde469fbd99a05fe3, 0x6fca5f8ed9aef3bc}, {0x8aec23d680043bee, 0x25de7bb9480d5855}, + {0xada72ccc20054ae9, 0xaf561aa79a10ae6b}, {0xd910f7ff28069da4, 0x1b2ba1518094da05}, + {0x87aa9aff79042286, 0x90fb44d2f05d0843}, {0xa99541bf57452b28, 0x353a1607ac744a54}, + {0xd3fa922f2d1675f2, 0x42889b8997915ce9}, {0x847c9b5d7c2e09b7, 0x69956135febada12}, + {0xa59bc234db398c25, 0x43fab9837e699096}, {0xcf02b2c21207ef2e, 0x94f967e45e03f4bc}, + {0x8161afb94b44f57d, 0x1d1be0eebac278f6}, {0xa1ba1ba79e1632dc, 0x6462d92a69731733}, + {0xca28a291859bbf93, 0x7d7b8f7503cfdcff}, {0xfcb2cb35e702af78, 0x5cda735244c3d43f}, + {0x9defbf01b061adab, 0x3a0888136afa64a8}, {0xc56baec21c7a1916, 0x088aaa1845b8fdd1}, + {0xf6c69a72a3989f5b, 0x8aad549e57273d46}, {0x9a3c2087a63f6399, 0x36ac54e2f678864c}, + {0xc0cb28a98fcf3c7f, 0x84576a1bb416a7de}, {0xf0fdf2d3f3c30b9f, 0x656d44a2a11c51d6}, + {0x969eb7c47859e743, 0x9f644ae5a4b1b326}, {0xbc4665b596706114, 0x873d5d9f0dde1fef}, + {0xeb57ff22fc0c7959, 0xa90cb506d155a7eb}, {0x9316ff75dd87cbd8, 0x09a7f12442d588f3}, + {0xb7dcbf5354e9bece, 0x0c11ed6d538aeb30}, {0xe5d3ef282a242e81, 0x8f1668c8a86da5fb}, + {0x8fa475791a569d10, 0xf96e017d694487bd}, {0xb38d92d760ec4455, 0x37c981dcc395a9ad}, + {0xe070f78d3927556a, 0x85bbe253f47b1418}, {0x8c469ab843b89562, 0x93956d7478ccec8f}, + {0xaf58416654a6babb, 0x387ac8d1970027b3}, {0xdb2e51bfe9d0696a, 0x06997b05fcc0319f}, + {0x88fcf317f22241e2, 0x441fece3bdf81f04}, {0xab3c2fddeeaad25a, 0xd527e81cad7626c4}, + {0xd60b3bd56a5586f1, 0x8a71e223d8d3b075}, {0x85c7056562757456, 0xf6872d5667844e4a}, + {0xa738c6bebb12d16c, 0xb428f8ac016561dc}, {0xd106f86e69d785c7, 0xe13336d701beba53}, + {0x82a45b450226b39c, 0xecc0024661173474}, {0xa34d721642b06084, 0x27f002d7f95d0191}, + {0xcc20ce9bd35c78a5, 0x31ec038df7b441f5}, {0xff290242c83396ce, 0x7e67047175a15272}, + {0x9f79a169bd203e41, 0x0f0062c6e984d387}, {0xc75809c42c684dd1, 0x52c07b78a3e60869}, + {0xf92e0c3537826145, 0xa7709a56ccdf8a83}, {0x9bbcc7a142b17ccb, 0x88a66076400bb692}, + {0xc2abf989935ddbfe, 0x6acff893d00ea436}, {0xf356f7ebf83552fe, 0x0583f6b8c4124d44}, + {0x98165af37b2153de, 0xc3727a337a8b704b}, {0xbe1bf1b059e9a8d6, 0x744f18c0592e4c5d}, + {0xeda2ee1c7064130c, 0x1162def06f79df74}, {0x9485d4d1c63e8be7, 0x8addcb5645ac2ba9}, + {0xb9a74a0637ce2ee1, 0x6d953e2bd7173693}, {0xe8111c87c5c1ba99, 0xc8fa8db6ccdd0438}, + {0x910ab1d4db9914a0, 0x1d9c9892400a22a3}, {0xb54d5e4a127f59c8, 0x2503beb6d00cab4c}, + {0xe2a0b5dc971f303a, 0x2e44ae64840fd61e}, {0x8da471a9de737e24, 0x5ceaecfed289e5d3}, + {0xb10d8e1456105dad, 0x7425a83e872c5f48}, {0xdd50f1996b947518, 0xd12f124e28f7771a}, + {0x8a5296ffe33cc92f, 0x82bd6b70d99aaa70}, {0xace73cbfdc0bfb7b, 0x636cc64d1001550c}, + {0xd8210befd30efa5a, 0x3c47f7e05401aa4f}, {0x8714a775e3e95c78, 0x65acfaec34810a72}, + {0xa8d9d1535ce3b396, 0x7f1839a741a14d0e}, {0xd31045a8341ca07c, 0x1ede48111209a051}, + {0x83ea2b892091e44d, 0x934aed0aab460433}, {0xa4e4b66b68b65d60, 0xf81da84d56178540}, + {0xce1de40642e3f4b9, 0x36251260ab9d668f}, {0x80d2ae83e9ce78f3, 0xc1d72b7c6b42601a}, + {0xa1075a24e4421730, 0xb24cf65b8612f820}, {0xc94930ae1d529cfc, 0xdee033f26797b628}, + {0xfb9b7cd9a4a7443c, 0x169840ef017da3b2}, {0x9d412e0806e88aa5, 0x8e1f289560ee864f}, + {0xc491798a08a2ad4e, 0xf1a6f2bab92a27e3}, {0xf5b5d7ec8acb58a2, 0xae10af696774b1dc}, + {0x9991a6f3d6bf1765, 0xacca6da1e0a8ef2a}, {0xbff610b0cc6edd3f, 0x17fd090a58d32af4}, + {0xeff394dcff8a948e, 0xddfc4b4cef07f5b1}, {0x95f83d0a1fb69cd9, 0x4abdaf101564f98f}, + {0xbb764c4ca7a4440f, 0x9d6d1ad41abe37f2}, {0xea53df5fd18d5513, 0x84c86189216dc5ee}, + {0x92746b9be2f8552c, 0x32fd3cf5b4e49bb5}, {0xb7118682dbb66a77, 0x3fbc8c33221dc2a2}, + {0xe4d5e82392a40515, 0x0fabaf3feaa5334b}, {0x8f05b1163ba6832d, 0x29cb4d87f2a7400f}, + {0xb2c71d5bca9023f8, 0x743e20e9ef511013}, {0xdf78e4b2bd342cf6, 0x914da9246b255417}, + {0x8bab8eefb6409c1a, 0x1ad089b6c2f7548f}, {0xae9672aba3d0c320, 0xa184ac2473b529b2}, + {0xda3c0f568cc4f3e8, 0xc9e5d72d90a2741f}, {0x8865899617fb1871, 0x7e2fa67c7a658893}, + {0xaa7eebfb9df9de8d, 0xddbb901b98feeab8}, {0xd51ea6fa85785631, 0x552a74227f3ea566}, + {0x8533285c936b35de, 0xd53a88958f872760}, {0xa67ff273b8460356, 0x8a892abaf368f138}, + {0xd01fef10a657842c, 0x2d2b7569b0432d86}, {0x8213f56a67f6b29b, 0x9c3b29620e29fc74}, + {0xa298f2c501f45f42, 0x8349f3ba91b47b90}, {0xcb3f2f7642717713, 0x241c70a936219a74}, + {0xfe0efb53d30dd4d7, 0xed238cd383aa0111}, {0x9ec95d1463e8a506, 0xf4363804324a40ab}, + {0xc67bb4597ce2ce48, 0xb143c6053edcd0d6}, {0xf81aa16fdc1b81da, 0xdd94b7868e94050b}, + {0x9b10a4e5e9913128, 0xca7cf2b4191c8327}, {0xc1d4ce1f63f57d72, 0xfd1c2f611f63a3f1}, + {0xf24a01a73cf2dccf, 0xbc633b39673c8ced}, {0x976e41088617ca01, 0xd5be0503e085d814}, + {0xbd49d14aa79dbc82, 0x4b2d8644d8a74e19}, {0xec9c459d51852ba2, 0xddf8e7d60ed1219f}, + {0x93e1ab8252f33b45, 0xcabb90e5c942b504}, {0xb8da1662e7b00a17, 0x3d6a751f3b936244}, + {0xe7109bfba19c0c9d, 0x0cc512670a783ad5}, {0x906a617d450187e2, 0x27fb2b80668b24c6}, + {0xb484f9dc9641e9da, 0xb1f9f660802dedf7}, {0xe1a63853bbd26451, 0x5e7873f8a0396974}, + {0x8d07e33455637eb2, 0xdb0b487b6423e1e9}, {0xb049dc016abc5e5f, 0x91ce1a9a3d2cda63}, + {0xdc5c5301c56b75f7, 0x7641a140cc7810fc}, {0x89b9b3e11b6329ba, 0xa9e904c87fcb0a9e}, + {0xac2820d9623bf429, 0x546345fa9fbdcd45}, {0xd732290fbacaf133, 0xa97c177947ad4096}, + {0x867f59a9d4bed6c0, 0x49ed8eabcccc485e}, {0xa81f301449ee8c70, 0x5c68f256bfff5a75}, + {0xd226fc195c6a2f8c, 0x73832eec6fff3112}, {0x83585d8fd9c25db7, 0xc831fd53c5ff7eac}, + {0xa42e74f3d032f525, 0xba3e7ca8b77f5e56}, {0xcd3a1230c43fb26f, 0x28ce1bd2e55f35ec}, + {0x80444b5e7aa7cf85, 0x7980d163cf5b81b4}, {0xa0555e361951c366, 0xd7e105bcc3326220}, + {0xc86ab5c39fa63440, 0x8dd9472bf3fefaa8}, {0xfa856334878fc150, 0xb14f98f6f0feb952}, + {0x9c935e00d4b9d8d2, 0x6ed1bf9a569f33d4}, {0xc3b8358109e84f07, 0x0a862f80ec4700c9}, + {0xf4a642e14c6262c8, 0xcd27bb612758c0fb}, {0x98e7e9cccfbd7dbd, 0x8038d51cb897789d}, + {0xbf21e44003acdd2c, 0xe0470a63e6bd56c4}, {0xeeea5d5004981478, 0x1858ccfce06cac75}, + {0x95527a5202df0ccb, 0x0f37801e0c43ebc9}, {0xbaa718e68396cffd, 0xd30560258f54e6bb}, + {0xe950df20247c83fd, 0x47c6b82ef32a206a}, {0x91d28b7416cdd27e, 0x4cdc331d57fa5442}, + {0xb6472e511c81471d, 0xe0133fe4adf8e953}, {0xe3d8f9e563a198e5, 0x58180fddd97723a7}, + {0x8e679c2f5e44ff8f, 0x570f09eaa7ea7649}, {0xb201833b35d63f73, 0x2cd2cc6551e513db}, + {0xde81e40a034bcf4f, 0xf8077f7ea65e58d2}, {0x8b112e86420f6191, 0xfb04afaf27faf783}, + {0xadd57a27d29339f6, 0x79c5db9af1f9b564}, {0xd94ad8b1c7380874, 0x18375281ae7822bd}, + {0x87cec76f1c830548, 0x8f2293910d0b15b6}, {0xa9c2794ae3a3c69a, 0xb2eb3875504ddb23}, + {0xd433179d9c8cb841, 0x5fa60692a46151ec}, {0x849feec281d7f328, 0xdbc7c41ba6bcd334}, + {0xa5c7ea73224deff3, 0x12b9b522906c0801}, {0xcf39e50feae16bef, 0xd768226b34870a01}, + {0x81842f29f2cce375, 0xe6a1158300d46641}, {0xa1e53af46f801c53, 0x60495ae3c1097fd1}, + {0xca5e89b18b602368, 0x385bb19cb14bdfc5}, {0xfcf62c1dee382c42, 0x46729e03dd9ed7b6}, + {0x9e19db92b4e31ba9, 0x6c07a2c26a8346d2}, {0xc5a05277621be293, 0xc7098b7305241886}, + {0xf70867153aa2db38, 0xb8cbee4fc66d1ea8}}; + }; + + // Compressed cache for double + struct compressed_cache_detail { + static constexpr int compression_ratio = 27; + static constexpr std::size_t compressed_table_size = + (cache_holder::max_k - cache_holder::min_k + + compression_ratio) / + compression_ratio; + + struct cache_holder_t { + wuint::uint128 table[compressed_table_size]; + }; + static constexpr cache_holder_t cache = [] { + cache_holder_t res{}; + for (std::size_t i = 0; i < compressed_table_size; ++i) { + res.table[i] = cache_holder::cache[i * compression_ratio]; + } + return res; + }(); + + struct pow5_holder_t { + std::uint64_t table[compression_ratio]; + }; + static constexpr pow5_holder_t pow5 = [] { + pow5_holder_t res{}; + std::uint64_t p = 1; + for (std::size_t i = 0; i < compression_ratio; ++i) { + res.table[i] = p; + p *= 5; + } + return res; + }(); + }; + } + + + //////////////////////////////////////////////////////////////////////////////////////// + // Policies. + //////////////////////////////////////////////////////////////////////////////////////// + + namespace detail { + // Forward declare the implementation class. + template > + struct impl; + + namespace policy_impl { + // Sign policies. + namespace sign { + struct base {}; + + struct ignore : base { + using sign_policy = ignore; + static constexpr bool return_has_sign = false; + + template + static constexpr void handle_sign(SignedSignificandBits, ReturnType&) noexcept { + } + }; + + struct return_sign : base { + using sign_policy = return_sign; + static constexpr bool return_has_sign = true; + + template + static constexpr void handle_sign(SignedSignificandBits s, + ReturnType& r) noexcept { + r.is_negative = s.is_negative(); + } + }; + } + + // Trailing zero policies. + namespace trailing_zero { + struct base {}; + + struct ignore : base { + using trailing_zero_policy = ignore; + static constexpr bool report_trailing_zeros = false; + + template + static constexpr void on_trailing_zeros(ReturnType&) noexcept {} + + template + static constexpr void no_trailing_zeros(ReturnType&) noexcept {} + }; + + struct remove : base { + using trailing_zero_policy = remove; + static constexpr bool report_trailing_zeros = false; + + template + JKJ_FORCEINLINE static constexpr void + on_trailing_zeros(ReturnType& r) noexcept { + r.exponent += Impl::remove_trailing_zeros(r.significand); + } + + template + static constexpr void no_trailing_zeros(ReturnType&) noexcept {} + }; + + struct report : base { + using trailing_zero_policy = report; + static constexpr bool report_trailing_zeros = true; + + template + static constexpr void on_trailing_zeros(ReturnType& r) noexcept { + r.may_have_trailing_zeros = true; + } + + template + static constexpr void no_trailing_zeros(ReturnType& r) noexcept { + r.may_have_trailing_zeros = false; + } + }; + } + + // Decimal-to-binary rounding mode policies. + namespace decimal_to_binary_rounding { + struct base {}; + + enum class tag_t { to_nearest, left_closed_directed, right_closed_directed }; + namespace interval_type { + struct symmetric_boundary { + static constexpr bool is_symmetric = true; + bool is_closed; + constexpr bool include_left_endpoint() const noexcept { return is_closed; } + constexpr bool include_right_endpoint() const noexcept { return is_closed; } + }; + struct asymmetric_boundary { + static constexpr bool is_symmetric = false; + bool is_left_closed; + constexpr bool include_left_endpoint() const noexcept { + return is_left_closed; + } + constexpr bool include_right_endpoint() const noexcept { + return !is_left_closed; + } + }; + struct closed { + static constexpr bool is_symmetric = true; + static constexpr bool include_left_endpoint() noexcept { return true; } + static constexpr bool include_right_endpoint() noexcept { return true; } + }; + struct open { + static constexpr bool is_symmetric = true; + static constexpr bool include_left_endpoint() noexcept { return false; } + static constexpr bool include_right_endpoint() noexcept { return false; } + }; + struct left_closed_right_open { + static constexpr bool is_symmetric = false; + static constexpr bool include_left_endpoint() noexcept { return true; } + static constexpr bool include_right_endpoint() noexcept { return false; } + }; + struct right_closed_left_open { + static constexpr bool is_symmetric = false; + static constexpr bool include_left_endpoint() noexcept { return false; } + static constexpr bool include_right_endpoint() noexcept { return true; } + }; + } + + struct nearest_to_even : base { + using decimal_to_binary_rounding_policy = nearest_to_even; + static constexpr auto tag = tag_t::to_nearest; + using normal_interval_type = interval_type::symmetric_boundary; + using shorter_interval_type = interval_type::closed; + + template + JKJ_FORCEINLINE static auto delegate(SignedSignificandBits, Func&& f) noexcept { + return f(nearest_to_even{}); + } + + template + JKJ_FORCEINLINE static constexpr auto + invoke_normal_interval_case(SignedSignificandBits s, Func&& f) noexcept { + return f(s.has_even_significand_bits()); + } + template + JKJ_FORCEINLINE static constexpr auto + invoke_shorter_interval_case(SignedSignificandBits, Func&& f) noexcept { + return f(); + } + }; + struct nearest_to_odd : base { + using decimal_to_binary_rounding_policy = nearest_to_odd; + static constexpr auto tag = tag_t::to_nearest; + using normal_interval_type = interval_type::symmetric_boundary; + using shorter_interval_type = interval_type::open; + + template + JKJ_FORCEINLINE static auto delegate(SignedSignificandBits, Func&& f) noexcept { + return f(nearest_to_odd{}); + } + + template + JKJ_FORCEINLINE static constexpr auto + invoke_normal_interval_case(SignedSignificandBits s, Func&& f) noexcept { + return f(!s.has_even_significand_bits()); + } + template + JKJ_FORCEINLINE static constexpr auto + invoke_shorter_interval_case(SignedSignificandBits, Func&& f) noexcept { + return f(); + } + }; + struct nearest_toward_plus_infinity : base { + using decimal_to_binary_rounding_policy = nearest_toward_plus_infinity; + static constexpr auto tag = tag_t::to_nearest; + using normal_interval_type = interval_type::asymmetric_boundary; + using shorter_interval_type = interval_type::asymmetric_boundary; + + template + JKJ_FORCEINLINE static auto delegate(SignedSignificandBits, Func&& f) noexcept { + return f(nearest_toward_plus_infinity{}); + } + + template + JKJ_FORCEINLINE static constexpr auto + invoke_normal_interval_case(SignedSignificandBits s, Func&& f) noexcept { + return f(!s.is_negative()); + } + template + JKJ_FORCEINLINE static constexpr auto + invoke_shorter_interval_case(SignedSignificandBits s, Func&& f) noexcept { + return f(!s.is_negative()); + } + }; + struct nearest_toward_minus_infinity : base { + using decimal_to_binary_rounding_policy = nearest_toward_minus_infinity; + static constexpr auto tag = tag_t::to_nearest; + using normal_interval_type = interval_type::asymmetric_boundary; + using shorter_interval_type = interval_type::asymmetric_boundary; + + template + JKJ_FORCEINLINE static auto delegate(SignedSignificandBits, Func&& f) noexcept { + return f(nearest_toward_minus_infinity{}); + } + + template + JKJ_FORCEINLINE static constexpr auto + invoke_normal_interval_case(SignedSignificandBits s, Func&& f) noexcept { + return f(s.is_negative()); + } + template + JKJ_FORCEINLINE static constexpr auto + invoke_shorter_interval_case(SignedSignificandBits s, Func&& f) noexcept { + return f(s.is_negative()); + } + }; + struct nearest_toward_zero : base { + using decimal_to_binary_rounding_policy = nearest_toward_zero; + static constexpr auto tag = tag_t::to_nearest; + using normal_interval_type = interval_type::right_closed_left_open; + using shorter_interval_type = interval_type::right_closed_left_open; + + template + JKJ_FORCEINLINE static auto delegate(SignedSignificandBits, Func&& f) noexcept { + return f(nearest_toward_zero{}); + } + + template + JKJ_FORCEINLINE static constexpr auto + invoke_normal_interval_case(SignedSignificandBits, Func&& f) noexcept { + return f(); + } + template + JKJ_FORCEINLINE static constexpr auto + invoke_shorter_interval_case(SignedSignificandBits, Func&& f) noexcept { + return f(); + } + }; + struct nearest_away_from_zero : base { + using decimal_to_binary_rounding_policy = nearest_away_from_zero; + static constexpr auto tag = tag_t::to_nearest; + using normal_interval_type = interval_type::left_closed_right_open; + using shorter_interval_type = interval_type::left_closed_right_open; + + template + JKJ_FORCEINLINE static auto delegate(SignedSignificandBits, Func&& f) noexcept { + return f(nearest_away_from_zero{}); + } + + template + JKJ_FORCEINLINE static constexpr auto + invoke_normal_interval_case(SignedSignificandBits, Func&& f) noexcept { + return f(); + } + template + JKJ_FORCEINLINE static constexpr auto + invoke_shorter_interval_case(SignedSignificandBits, Func&& f) noexcept { + return f(); + } + }; + + namespace detail { + struct nearest_always_closed { + static constexpr auto tag = tag_t::to_nearest; + using normal_interval_type = interval_type::closed; + using shorter_interval_type = interval_type::closed; + + template + JKJ_FORCEINLINE static constexpr auto + invoke_normal_interval_case(SignedSignificandBits, Func&& f) noexcept { + return f(); + } + template + JKJ_FORCEINLINE static constexpr auto + invoke_shorter_interval_case(SignedSignificandBits, Func&& f) noexcept { + return f(); + } + }; + struct nearest_always_open { + static constexpr auto tag = tag_t::to_nearest; + using normal_interval_type = interval_type::open; + using shorter_interval_type = interval_type::open; + + template + JKJ_FORCEINLINE static constexpr auto + invoke_normal_interval_case(SignedSignificandBits, Func&& f) noexcept { + return f(); + } + template + JKJ_FORCEINLINE static constexpr auto + invoke_shorter_interval_case(SignedSignificandBits, Func&& f) noexcept { + return f(); + } + }; + } + + struct nearest_to_even_static_boundary : base { + using decimal_to_binary_rounding_policy = nearest_to_even_static_boundary; + template + JKJ_FORCEINLINE static auto delegate(SignedSignificandBits s, + Func&& f) noexcept { + if (s.has_even_significand_bits()) { + return f(detail::nearest_always_closed{}); + } + else { + return f(detail::nearest_always_open{}); + } + } + }; + struct nearest_to_odd_static_boundary : base { + using decimal_to_binary_rounding_policy = nearest_to_odd_static_boundary; + template + JKJ_FORCEINLINE static auto delegate(SignedSignificandBits s, + Func&& f) noexcept { + if (s.has_even_significand_bits()) { + return f(detail::nearest_always_open{}); + } + else { + return f(detail::nearest_always_closed{}); + } + } + }; + struct nearest_toward_plus_infinity_static_boundary : base { + using decimal_to_binary_rounding_policy = + nearest_toward_plus_infinity_static_boundary; + template + JKJ_FORCEINLINE static auto delegate(SignedSignificandBits s, + Func&& f) noexcept { + if (s.is_negative()) { + return f(nearest_toward_zero{}); + } + else { + return f(nearest_away_from_zero{}); + } + } + }; + struct nearest_toward_minus_infinity_static_boundary : base { + using decimal_to_binary_rounding_policy = + nearest_toward_minus_infinity_static_boundary; + template + JKJ_FORCEINLINE static auto delegate(SignedSignificandBits s, + Func&& f) noexcept { + if (s.is_negative()) { + return f(nearest_away_from_zero{}); + } + else { + return f(nearest_toward_zero{}); + } + } + }; + + namespace detail { + struct left_closed_directed { + static constexpr auto tag = tag_t::left_closed_directed; + }; + struct right_closed_directed { + static constexpr auto tag = tag_t::right_closed_directed; + }; + } + + struct toward_plus_infinity : base { + using decimal_to_binary_rounding_policy = toward_plus_infinity; + template + JKJ_FORCEINLINE static auto delegate(SignedSignificandBits s, + Func&& f) noexcept { + if (s.is_negative()) { + return f(detail::left_closed_directed{}); + } + else { + return f(detail::right_closed_directed{}); + } + } + }; + struct toward_minus_infinity : base { + using decimal_to_binary_rounding_policy = toward_minus_infinity; + template + JKJ_FORCEINLINE static auto delegate(SignedSignificandBits s, + Func&& f) noexcept { + if (s.is_negative()) { + return f(detail::right_closed_directed{}); + } + else { + return f(detail::left_closed_directed{}); + } + } + }; + struct toward_zero : base { + using decimal_to_binary_rounding_policy = toward_zero; + template + JKJ_FORCEINLINE static auto delegate(SignedSignificandBits, Func&& f) noexcept { + return f(detail::left_closed_directed{}); + } + }; + struct away_from_zero : base { + using decimal_to_binary_rounding_policy = away_from_zero; + template + JKJ_FORCEINLINE static auto delegate(SignedSignificandBits, Func&& f) noexcept { + return f(detail::right_closed_directed{}); + } + }; + } + + // Binary-to-decimal rounding policies. + // (Always assumes nearest rounding modes.) + namespace binary_to_decimal_rounding { + struct base {}; + + enum class tag_t { do_not_care, to_even, to_odd, away_from_zero, toward_zero }; + + struct do_not_care : base { + using binary_to_decimal_rounding_policy = do_not_care; + static constexpr auto tag = tag_t::do_not_care; + + template + static constexpr bool prefer_round_down(ReturnType const&) noexcept { + return false; + } + }; + + struct to_even : base { + using binary_to_decimal_rounding_policy = to_even; + static constexpr auto tag = tag_t::to_even; + + template + static constexpr bool prefer_round_down(ReturnType const& r) noexcept { + return r.significand % 2 != 0; + } + }; + + struct to_odd : base { + using binary_to_decimal_rounding_policy = to_odd; + static constexpr auto tag = tag_t::to_odd; + + template + static constexpr bool prefer_round_down(ReturnType const& r) noexcept { + return r.significand % 2 == 0; + } + }; + + struct away_from_zero : base { + using binary_to_decimal_rounding_policy = away_from_zero; + static constexpr auto tag = tag_t::away_from_zero; + + template + static constexpr bool prefer_round_down(ReturnType const&) noexcept { + return false; + } + }; + + struct toward_zero : base { + using binary_to_decimal_rounding_policy = toward_zero; + static constexpr auto tag = tag_t::toward_zero; + + template + static constexpr bool prefer_round_down(ReturnType const&) noexcept { + return true; + } + }; + } + + // Cache policies. + namespace cache { + struct base {}; + + struct full : base { + using cache_policy = full; + template + static constexpr typename cache_holder::cache_entry_type + get_cache(int k) noexcept { + assert(k >= cache_holder::min_k && + k <= cache_holder::max_k); + return cache_holder::cache[std::size_t( + k - cache_holder::min_k)]; + } + }; + + struct compact : base { + using cache_policy = compact; + template + static constexpr typename cache_holder::cache_entry_type + get_cache(int k) noexcept { + assert(k >= cache_holder::min_k && + k <= cache_holder::max_k); + + if constexpr (std::is_same_v) { + // Compute the base index. + auto const cache_index = + int(std::uint32_t(k - cache_holder::min_k) / + compressed_cache_detail::compression_ratio); + auto const kb = + cache_index * compressed_cache_detail::compression_ratio + + cache_holder::min_k; + auto const offset = k - kb; + + // Get the base cache. + auto const base_cache = + compressed_cache_detail::cache.table[cache_index]; + + if (offset == 0) { + return base_cache; + } + else { + // Compute the required amount of bit-shift. + auto const alpha = log::floor_log2_pow10(kb + offset) - + log::floor_log2_pow10(kb) - offset; + assert(alpha > 0 && alpha < 64); + + // Try to recover the real cache. + auto const pow5 = compressed_cache_detail::pow5.table[offset]; + auto recovered_cache = wuint::umul128(base_cache.high(), pow5); + auto const middle_low = wuint::umul128(base_cache.low(), pow5); + + recovered_cache += middle_low.high(); + + auto const high_to_middle = recovered_cache.high() << (64 - alpha); + auto const middle_to_low = recovered_cache.low() << (64 - alpha); + + recovered_cache = wuint::uint128{ + (recovered_cache.low() >> alpha) | high_to_middle, + ((middle_low.low() >> alpha) | middle_to_low)}; + + assert(recovered_cache.low() + 1 != 0); + recovered_cache = {recovered_cache.high(), + recovered_cache.low() + 1}; + + return recovered_cache; + } + } + else { + // Just use the full cache for anything other than binary64 + return cache_holder::cache[std::size_t( + k - cache_holder::min_k)]; + } + } + }; + } + } + } + + namespace policy { + namespace sign { + inline constexpr auto ignore = detail::policy_impl::sign::ignore{}; + inline constexpr auto return_sign = detail::policy_impl::sign::return_sign{}; + } + + namespace trailing_zero { + inline constexpr auto ignore = detail::policy_impl::trailing_zero::ignore{}; + inline constexpr auto remove = detail::policy_impl::trailing_zero::remove{}; + inline constexpr auto report = detail::policy_impl::trailing_zero::report{}; + } + + namespace decimal_to_binary_rounding { + inline constexpr auto nearest_to_even = + detail::policy_impl::decimal_to_binary_rounding::nearest_to_even{}; + inline constexpr auto nearest_to_odd = + detail::policy_impl::decimal_to_binary_rounding::nearest_to_odd{}; + inline constexpr auto nearest_toward_plus_infinity = + detail::policy_impl::decimal_to_binary_rounding::nearest_toward_plus_infinity{}; + inline constexpr auto nearest_toward_minus_infinity = + detail::policy_impl::decimal_to_binary_rounding::nearest_toward_minus_infinity{}; + inline constexpr auto nearest_toward_zero = + detail::policy_impl::decimal_to_binary_rounding::nearest_toward_zero{}; + inline constexpr auto nearest_away_from_zero = + detail::policy_impl::decimal_to_binary_rounding::nearest_away_from_zero{}; + + inline constexpr auto nearest_to_even_static_boundary = + detail::policy_impl::decimal_to_binary_rounding::nearest_to_even_static_boundary{}; + inline constexpr auto nearest_to_odd_static_boundary = + detail::policy_impl::decimal_to_binary_rounding::nearest_to_odd_static_boundary{}; + inline constexpr auto nearest_toward_plus_infinity_static_boundary = + detail::policy_impl::decimal_to_binary_rounding:: + nearest_toward_plus_infinity_static_boundary{}; + inline constexpr auto nearest_toward_minus_infinity_static_boundary = + detail::policy_impl::decimal_to_binary_rounding:: + nearest_toward_minus_infinity_static_boundary{}; + + inline constexpr auto toward_plus_infinity = + detail::policy_impl::decimal_to_binary_rounding::toward_plus_infinity{}; + inline constexpr auto toward_minus_infinity = + detail::policy_impl::decimal_to_binary_rounding::toward_minus_infinity{}; + inline constexpr auto toward_zero = + detail::policy_impl::decimal_to_binary_rounding::toward_zero{}; + inline constexpr auto away_from_zero = + detail::policy_impl::decimal_to_binary_rounding::away_from_zero{}; + } + + namespace binary_to_decimal_rounding { + inline constexpr auto do_not_care = + detail::policy_impl::binary_to_decimal_rounding::do_not_care{}; + inline constexpr auto to_even = + detail::policy_impl::binary_to_decimal_rounding::to_even{}; + inline constexpr auto to_odd = + detail::policy_impl::binary_to_decimal_rounding::to_odd{}; + inline constexpr auto away_from_zero = + detail::policy_impl::binary_to_decimal_rounding::away_from_zero{}; + inline constexpr auto toward_zero = + detail::policy_impl::binary_to_decimal_rounding::toward_zero{}; + } + + namespace cache { + inline constexpr auto full = detail::policy_impl::cache::full{}; + inline constexpr auto compact = detail::policy_impl::cache::compact{}; + } + } + + namespace detail { + //////////////////////////////////////////////////////////////////////////////////////// + // The main algorithm. + //////////////////////////////////////////////////////////////////////////////////////// + + template + struct impl : private FloatTraits, private FloatTraits::format { + using format = typename FloatTraits::format; + using carrier_uint = typename FloatTraits::carrier_uint; + + using FloatTraits::carrier_bits; + using format::significand_bits; + using format::min_exponent; + using format::max_exponent; + using format::exponent_bias; + using format::decimal_digits; + + static constexpr int kappa = std::is_same_v ? 1 : 2; + static_assert(kappa >= 1); + static_assert(carrier_bits >= significand_bits + 2 + log::floor_log2_pow10(kappa + 1)); + + static constexpr int min_k = [] { + constexpr auto a = -log::floor_log10_pow2_minus_log10_4_over_3( + int(max_exponent - significand_bits)); + constexpr auto b = + -log::floor_log10_pow2(int(max_exponent - significand_bits)) + kappa; + return a < b ? a : b; + }(); + static_assert(min_k >= cache_holder::min_k); + + static constexpr int max_k = [] { + // We do invoke shorter_interval_case for exponent == min_exponent case, + // so we should not add 1 here. + constexpr auto a = -log::floor_log10_pow2_minus_log10_4_over_3( + int(min_exponent - significand_bits /*+ 1*/)); + constexpr auto b = + -log::floor_log10_pow2(int(min_exponent - significand_bits)) + kappa; + return a > b ? a : b; + }(); + static_assert(max_k <= cache_holder::max_k); + + using cache_entry_type = typename cache_holder::cache_entry_type; + static constexpr auto cache_bits = cache_holder::cache_bits; + + static constexpr int case_shorter_interval_left_endpoint_lower_threshold = 2; + static constexpr int case_shorter_interval_left_endpoint_upper_threshold = + 2 + + log::floor_log2( + compute_power< + count_factors<5>((carrier_uint(1) << (significand_bits + 2)) - 1) + 1>(10) / + 3); + + static constexpr int case_shorter_interval_right_endpoint_lower_threshold = 0; + static constexpr int case_shorter_interval_right_endpoint_upper_threshold = + 2 + + log::floor_log2( + compute_power< + count_factors<5>((carrier_uint(1) << (significand_bits + 1)) + 1) + 1>(10) / + 3); + + static constexpr int shorter_interval_tie_lower_threshold = + -log::floor_log5_pow2_minus_log5_3(significand_bits + 4) - 2 - significand_bits; + static constexpr int shorter_interval_tie_upper_threshold = + -log::floor_log5_pow2(significand_bits + 2) - 2 - significand_bits; + + struct compute_mul_result { + carrier_uint result; + bool is_integer; + }; + struct compute_mul_parity_result { + bool parity; + bool is_integer; + }; + + //// The main algorithm assumes the input is a normal/subnormal finite number + + template + JKJ_SAFEBUFFERS static ReturnType + compute_nearest_normal(carrier_uint const two_fc, int const exponent, + AdditionalArgs... additional_args) noexcept { + ////////////////////////////////////////////////////////////////////// + // Step 1: Schubfach multiplier calculation + ////////////////////////////////////////////////////////////////////// + + ReturnType ret_value; + IntervalType interval_type{additional_args...}; + + // Compute k and beta. + int const minus_k = log::floor_log10_pow2(exponent) - kappa; + auto const cache = CachePolicy::template get_cache(-minus_k); + int const beta = exponent + log::floor_log2_pow10(-minus_k); + + // Compute zi and deltai. + // 10^kappa <= deltai < 10^(kappa + 1) + auto const deltai = compute_delta(cache, beta); + // For the case of binary32, the result of integer check is not correct for + // 29711844 * 2^-82 + // = 6.1442653300000000008655037797566933477355632930994033813476... * 10^-18 + // and 29711844 * 2^-81 + // = 1.2288530660000000001731007559513386695471126586198806762695... * 10^-17, + // and they are the unique counterexamples. However, since 29711844 is even, + // this does not cause any problem for the endpoints calculations; it can only + // cause a problem when we need to perform integer check for the center. + // Fortunately, with these inputs, that branch is never executed, so we are fine. + auto const [zi, is_z_integer] = compute_mul((two_fc | 1) << beta, cache); + + + ////////////////////////////////////////////////////////////////////// + // Step 2: Try larger divisor; remove trailing zeros if necessary + ////////////////////////////////////////////////////////////////////// + + constexpr auto big_divisor = compute_power(std::uint32_t(10)); + constexpr auto small_divisor = compute_power(std::uint32_t(10)); + + // Using an upper bound on zi, we might be able to optimize the division + // better than the compiler; we are computing zi / big_divisor here. + ret_value.significand = + div::divide_by_pow10(zi); + auto r = std::uint32_t(zi - big_divisor * ret_value.significand); + + if (r < deltai) { + // Exclude the right endpoint if necessary. + if (r == 0 && (is_z_integer & !interval_type.include_right_endpoint())) { + if constexpr (BinaryToDecimalRoundingPolicy::tag == + policy_impl::binary_to_decimal_rounding::tag_t::do_not_care) { + ret_value.significand *= 10; + ret_value.exponent = minus_k + kappa; + --ret_value.significand; + TrailingZeroPolicy::template no_trailing_zeros(ret_value); + return ret_value; + } + else { + --ret_value.significand; + r = big_divisor; + goto small_divisor_case_label; + } + } + } + else if (r > deltai) { + goto small_divisor_case_label; + } + else { + // r == deltai; compare fractional parts. + auto const [xi_parity, x_is_integer] = + compute_mul_parity(two_fc - 1, cache, beta); + + if (!(xi_parity | (x_is_integer & interval_type.include_left_endpoint()))) { + goto small_divisor_case_label; + } + } + ret_value.exponent = minus_k + kappa + 1; + + // We may need to remove trailing zeros. + TrailingZeroPolicy::template on_trailing_zeros(ret_value); + return ret_value; + + + ////////////////////////////////////////////////////////////////////// + // Step 3: Find the significand with the smaller divisor + ////////////////////////////////////////////////////////////////////// + + small_divisor_case_label: + TrailingZeroPolicy::template no_trailing_zeros(ret_value); + ret_value.significand *= 10; + ret_value.exponent = minus_k + kappa; + + if constexpr (BinaryToDecimalRoundingPolicy::tag == + policy_impl::binary_to_decimal_rounding::tag_t::do_not_care) { + // Normally, we want to compute + // ret_value.significand += r / small_divisor + // and return, but we need to take care of the case that the resulting + // value is exactly the right endpoint, while that is not included in the + // interval. + if (!interval_type.include_right_endpoint()) { + // Is r divisible by 10^kappa? + if (is_z_integer && div::check_divisibility_and_divide_by_pow10(r)) { + // This should be in the interval. + ret_value.significand += r - 1; + } + else { + ret_value.significand += r; + } + } + else { + ret_value.significand += div::small_division_by_pow10(r); + } + } + else { + auto dist = r - (deltai / 2) + (small_divisor / 2); + bool const approx_y_parity = ((dist ^ (small_divisor / 2)) & 1) != 0; + + // Is dist divisible by 10^kappa? + bool const divisible_by_small_divisor = + div::check_divisibility_and_divide_by_pow10(dist); + + // Add dist / 10^kappa to the significand. + ret_value.significand += dist; + + if (divisible_by_small_divisor) { + // Check z^(f) >= epsilon^(f). + // We have either yi == zi - epsiloni or yi == (zi - epsiloni) - 1, + // where yi == zi - epsiloni if and only if z^(f) >= epsilon^(f). + // Since there are only 2 possibilities, we only need to care about the + // parity. Also, zi and r should have the same parity since the divisor is + // an even number. + auto const [yi_parity, is_y_integer] = + compute_mul_parity(two_fc, cache, beta); + if (yi_parity != approx_y_parity) { + --ret_value.significand; + } + else { + // If z^(f) >= epsilon^(f), we might have a tie + // when z^(f) == epsilon^(f), or equivalently, when y is an integer. + // For tie-to-up case, we can just choose the upper one. + if (BinaryToDecimalRoundingPolicy::prefer_round_down(ret_value) & + is_y_integer) { + --ret_value.significand; + } + } + } + } + return ret_value; + } + + template + JKJ_SAFEBUFFERS static ReturnType + compute_nearest_shorter(int const exponent, + AdditionalArgs... additional_args) noexcept { + ReturnType ret_value; + IntervalType interval_type{additional_args...}; + + // Compute k and beta. + int const minus_k = log::floor_log10_pow2_minus_log10_4_over_3(exponent); + int const beta = exponent + log::floor_log2_pow10(-minus_k); + + // Compute xi and zi. + auto const cache = CachePolicy::template get_cache(-minus_k); + + auto xi = compute_left_endpoint_for_shorter_interval_case(cache, beta); + auto zi = compute_right_endpoint_for_shorter_interval_case(cache, beta); + + // If we don't accept the right endpoint and + // if the right endpoint is an integer, decrease it. + if (!interval_type.include_right_endpoint() && + is_right_endpoint_integer_shorter_interval(exponent)) { + --zi; + } + // If we don't accept the left endpoint or + // if the left endpoint is not an integer, increase it. + if (!interval_type.include_left_endpoint() || + !is_left_endpoint_integer_shorter_interval(exponent)) { + ++xi; + } + + // Try bigger divisor. + ret_value.significand = zi / 10; + + // If succeed, remove trailing zeros if necessary and return. + if (ret_value.significand * 10 >= xi) { + ret_value.exponent = minus_k + 1; + TrailingZeroPolicy::template on_trailing_zeros(ret_value); + return ret_value; + } + + // Otherwise, compute the round-up of y. + TrailingZeroPolicy::template no_trailing_zeros(ret_value); + ret_value.significand = compute_round_up_for_shorter_interval_case(cache, beta); + ret_value.exponent = minus_k; + + // When tie occurs, choose one of them according to the rule. + if (BinaryToDecimalRoundingPolicy::prefer_round_down(ret_value) && + exponent >= shorter_interval_tie_lower_threshold && + exponent <= shorter_interval_tie_upper_threshold) { + --ret_value.significand; + } + else if (ret_value.significand < xi) { + ++ret_value.significand; + } + return ret_value; + } + + template + JKJ_SAFEBUFFERS static ReturnType + compute_left_closed_directed(carrier_uint const two_fc, int exponent) noexcept { + ////////////////////////////////////////////////////////////////////// + // Step 1: Schubfach multiplier calculation + ////////////////////////////////////////////////////////////////////// + + ReturnType ret_value; + + // Compute k and beta. + int const minus_k = log::floor_log10_pow2(exponent) - kappa; + auto const cache = CachePolicy::template get_cache(-minus_k); + int const beta = exponent + log::floor_log2_pow10(-minus_k); + + // Compute xi and deltai. + // 10^kappa <= deltai < 10^(kappa + 1) + auto const deltai = compute_delta(cache, beta); + auto [xi, is_x_integer] = compute_mul(two_fc << beta, cache); + + // Deal with the unique exceptional cases + // 29711844 * 2^-82 + // = 6.1442653300000000008655037797566933477355632930994033813476... * 10^-18 + // and 29711844 * 2^-81 + // = 1.2288530660000000001731007559513386695471126586198806762695... * 10^-17 + // for binary32. + if constexpr (std::is_same_v) { + if (exponent <= -80) { + is_x_integer = false; + } + } + + if (!is_x_integer) { + ++xi; + } + + ////////////////////////////////////////////////////////////////////// + // Step 2: Try larger divisor; remove trailing zeros if necessary + ////////////////////////////////////////////////////////////////////// + + constexpr auto big_divisor = compute_power(std::uint32_t(10)); + + // Using an upper bound on xi, we might be able to optimize the division + // better than the compiler; we are computing xi / big_divisor here. + ret_value.significand = + div::divide_by_pow10(xi); + auto r = std::uint32_t(xi - big_divisor * ret_value.significand); + + if (r != 0) { + ++ret_value.significand; + r = big_divisor - r; + } + + if (r > deltai) { + goto small_divisor_case_label; + } + else if (r == deltai) { + // Compare the fractional parts. + // This branch is never taken for the exceptional cases + // 2f_c = 29711482, e = -81 + // (6.1442649164096937243516663440523473127541365101933479309082... * 10^-18) + // and 2f_c = 29711482, e = -80 + // (1.2288529832819387448703332688104694625508273020386695861816... * 10^-17). + auto const [zi_parity, is_z_integer] = + compute_mul_parity(two_fc + 2, cache, beta); + if (zi_parity || is_z_integer) { + goto small_divisor_case_label; + } + } + + // The ceiling is inside, so we are done. + ret_value.exponent = minus_k + kappa + 1; + TrailingZeroPolicy::template on_trailing_zeros(ret_value); + return ret_value; + + + ////////////////////////////////////////////////////////////////////// + // Step 3: Find the significand with the smaller divisor + ////////////////////////////////////////////////////////////////////// + + small_divisor_case_label: + ret_value.significand *= 10; + ret_value.significand -= div::small_division_by_pow10(r); + ret_value.exponent = minus_k + kappa; + TrailingZeroPolicy::template no_trailing_zeros(ret_value); + return ret_value; + } + + template + JKJ_SAFEBUFFERS static ReturnType + compute_right_closed_directed(carrier_uint const two_fc, int const exponent, + bool shorter_interval) noexcept { + ////////////////////////////////////////////////////////////////////// + // Step 1: Schubfach multiplier calculation + ////////////////////////////////////////////////////////////////////// + + ReturnType ret_value; + + // Compute k and beta. + int const minus_k = + log::floor_log10_pow2(exponent - (shorter_interval ? 1 : 0)) - kappa; + auto const cache = CachePolicy::template get_cache(-minus_k); + int const beta = exponent + log::floor_log2_pow10(-minus_k); + + // Compute zi and deltai. + // 10^kappa <= deltai < 10^(kappa + 1) + auto const deltai = + shorter_interval ? compute_delta(cache, beta - 1) : compute_delta(cache, beta); + carrier_uint const zi = compute_mul(two_fc << beta, cache).result; + + + ////////////////////////////////////////////////////////////////////// + // Step 2: Try larger divisor; remove trailing zeros if necessary + ////////////////////////////////////////////////////////////////////// + + constexpr auto big_divisor = compute_power(std::uint32_t(10)); + + // Using an upper bound on zi, we might be able to optimize the division better than + // the compiler; we are computing zi / big_divisor here. + ret_value.significand = + div::divide_by_pow10(zi); + auto const r = std::uint32_t(zi - big_divisor * ret_value.significand); + + if (r > deltai) { + goto small_divisor_case_label; + } + else if (r == deltai) { + // Compare the fractional parts. + if (!compute_mul_parity(two_fc - (shorter_interval ? 1 : 2), cache, beta) + .parity) { + goto small_divisor_case_label; + } + } + + // The floor is inside, so we are done. + ret_value.exponent = minus_k + kappa + 1; + TrailingZeroPolicy::template on_trailing_zeros(ret_value); + return ret_value; + + + ////////////////////////////////////////////////////////////////////// + // Step 3: Find the significand with the small divisor + ////////////////////////////////////////////////////////////////////// + + small_divisor_case_label: + ret_value.significand *= 10; + ret_value.significand += div::small_division_by_pow10(r); + ret_value.exponent = minus_k + kappa; + TrailingZeroPolicy::template no_trailing_zeros(ret_value); + return ret_value; + } + + // Remove trailing zeros from n and return the number of zeros removed. + JKJ_FORCEINLINE static int remove_trailing_zeros(carrier_uint& n) noexcept { + assert(n != 0); + + if constexpr (std::is_same_v) { + constexpr auto mod_inv_5 = std::uint32_t(0xcccc'cccd); + constexpr auto mod_inv_25 = mod_inv_5 * mod_inv_5; + + int s = 0; + while (true) { + auto q = bits::rotr(n * mod_inv_25, 2); + if (q <= std::numeric_limits::max() / 100) { + n = q; + s += 2; + } + else { + break; + } + } + auto q = bits::rotr(n * mod_inv_5, 1); + if (q <= std::numeric_limits::max() / 10) { + n = q; + s |= 1; + } + + return s; + } + else { + static_assert(std::is_same_v); + + // Divide by 10^8 and reduce to 32-bits if divisible. + // Since ret_value.significand <= (2^53 * 1000 - 1) / 1000 < 10^16, + // n is at most of 16 digits. + + // This magic number is ceil(2^90 / 10^8). + constexpr auto magic_number = std::uint64_t(12379400392853802749ull); + auto nm = wuint::umul128(n, magic_number); + + // Is n is divisible by 10^8? + if ((nm.high() & ((std::uint64_t(1) << (90 - 64)) - 1)) == 0 && + nm.low() < magic_number) { + // If yes, work with the quotient. + auto n32 = std::uint32_t(nm.high() >> (90 - 64)); + + constexpr auto mod_inv_5 = std::uint32_t(0xcccc'cccd); + constexpr auto mod_inv_25 = mod_inv_5 * mod_inv_5; + + int s = 8; + while (true) { + auto q = bits::rotr(n32 * mod_inv_25, 2); + if (q <= std::numeric_limits::max() / 100) { + n32 = q; + s += 2; + } + else { + break; + } + } + auto q = bits::rotr(n32 * mod_inv_5, 1); + if (q <= std::numeric_limits::max() / 10) { + n32 = q; + s |= 1; + } + + n = n32; + return s; + } + + // If n is not divisible by 10^8, work with n itself. + constexpr auto mod_inv_5 = std::uint64_t(0xcccc'cccc'cccc'cccd); + constexpr auto mod_inv_25 = mod_inv_5 * mod_inv_5; + + int s = 0; + while (true) { + auto q = bits::rotr(n * mod_inv_25, 2); + if (q <= std::numeric_limits::max() / 100) { + n = q; + s += 2; + } + else { + break; + } + } + auto q = bits::rotr(n * mod_inv_5, 1); + if (q <= std::numeric_limits::max() / 10) { + n = q; + s |= 1; + } + + return s; + } + } + + static compute_mul_result compute_mul(carrier_uint u, + cache_entry_type const& cache) noexcept { + if constexpr (std::is_same_v) { + auto r = wuint::umul96_upper64(u, cache); + return {carrier_uint(r >> 32), carrier_uint(r) == 0}; + } + else { + static_assert(std::is_same_v); + auto r = wuint::umul192_upper128(u, cache); + return {r.high(), r.low() == 0}; + } + } + + static constexpr std::uint32_t compute_delta(cache_entry_type const& cache, + int beta) noexcept { + if constexpr (std::is_same_v) { + return std::uint32_t(cache >> (cache_bits - 1 - beta)); + } + else { + static_assert(std::is_same_v); + return std::uint32_t(cache.high() >> (carrier_bits - 1 - beta)); + } + } + + static compute_mul_parity_result compute_mul_parity(carrier_uint two_f, + cache_entry_type const& cache, + int beta) noexcept { + assert(beta >= 1); + assert(beta < 64); + + if constexpr (std::is_same_v) { + auto r = wuint::umul96_lower64(two_f, cache); + return {((r >> (64 - beta)) & 1) != 0, std::uint32_t(r >> (32 - beta)) == 0}; + } + else { + static_assert(std::is_same_v); + auto r = wuint::umul192_lower128(two_f, cache); + return {((r.high() >> (64 - beta)) & 1) != 0, + ((r.high() << beta) | (r.low() >> (64 - beta))) == 0}; + } + } + + static constexpr carrier_uint + compute_left_endpoint_for_shorter_interval_case(cache_entry_type const& cache, + int beta) noexcept { + if constexpr (std::is_same_v) { + return carrier_uint((cache - (cache >> (significand_bits + 2))) >> + (cache_bits - significand_bits - 1 - beta)); + } + else { + static_assert(std::is_same_v); + return (cache.high() - (cache.high() >> (significand_bits + 2))) >> + (carrier_bits - significand_bits - 1 - beta); + } + } + + static constexpr carrier_uint + compute_right_endpoint_for_shorter_interval_case(cache_entry_type const& cache, + int beta) noexcept { + if constexpr (std::is_same_v) { + return carrier_uint((cache + (cache >> (significand_bits + 1))) >> + (cache_bits - significand_bits - 1 - beta)); + } + else { + static_assert(std::is_same_v); + return (cache.high() + (cache.high() >> (significand_bits + 1))) >> + (carrier_bits - significand_bits - 1 - beta); + } + } + + static constexpr carrier_uint + compute_round_up_for_shorter_interval_case(cache_entry_type const& cache, + int beta) noexcept { + if constexpr (std::is_same_v) { + return (carrier_uint(cache >> (cache_bits - significand_bits - 2 - beta)) + 1) / + 2; + } + else { + static_assert(std::is_same_v); + return ((cache.high() >> (carrier_bits - significand_bits - 2 - beta)) + 1) / 2; + } + } + + static constexpr bool + is_right_endpoint_integer_shorter_interval(int exponent) noexcept { + return exponent >= case_shorter_interval_right_endpoint_lower_threshold && + exponent <= case_shorter_interval_right_endpoint_upper_threshold; + } + + static constexpr bool is_left_endpoint_integer_shorter_interval(int exponent) noexcept { + return exponent >= case_shorter_interval_left_endpoint_lower_threshold && + exponent <= case_shorter_interval_left_endpoint_upper_threshold; + } + }; + + + //////////////////////////////////////////////////////////////////////////////////////// + // Policy holder. + //////////////////////////////////////////////////////////////////////////////////////// + + namespace policy_impl { + // The library will specify a list of accepted kinds of policies and their defaults, and + // the user will pass a list of policies. The aim of helper classes/functions here is to + // do the following: + // 1. Check if the policy parameters given by the user are all valid; that means, + // each of them should be of the kinds specified by the library. + // If that's not the case, then the compilation fails. + // 2. Check if multiple policy parameters for the same kind is specified by the user. + // If that's the case, then the compilation fails. + // 3. Build a class deriving from all policies the user have given, and also from + // the default policies if the user did not specify one for some kinds. + // A policy belongs to a certain kind if it is deriving from a base class. + + // For a given kind, find a policy belonging to that kind. + // Check if there are more than one such policies. + enum class policy_found_info { not_found, unique, repeated }; + template + struct found_policy_pair { + using policy = Policy; + static constexpr auto found_info = info; + }; + + template + struct base_default_pair { + using base = Base; + + template + static constexpr FoundPolicyInfo get_policy_impl(FoundPolicyInfo) { + return {}; + } + template + static constexpr auto get_policy_impl(FoundPolicyInfo, FirstPolicy, + RemainingPolicies... remainings) { + if constexpr (std::is_base_of_v) { + if constexpr (FoundPolicyInfo::found_info == policy_found_info::not_found) { + return get_policy_impl( + found_policy_pair{}, + remainings...); + } + else { + return get_policy_impl( + found_policy_pair{}, + remainings...); + } + } + else { + return get_policy_impl(FoundPolicyInfo{}, remainings...); + } + } + + template + static constexpr auto get_policy(Policies... policies) { + return get_policy_impl( + found_policy_pair{}, + policies...); + } + }; + template + struct base_default_pair_list {}; + + // Check if a given policy belongs to one of the kinds specified by the library. + template + constexpr bool check_policy_validity(Policy, base_default_pair_list<>) { + return false; + } + template + constexpr bool check_policy_validity( + Policy, + base_default_pair_list) { + return std::is_base_of_v || + check_policy_validity( + Policy{}, base_default_pair_list{}); + } + + template + constexpr bool check_policy_list_validity(BaseDefaultPairList) { + return true; + } + + template + constexpr bool check_policy_list_validity(BaseDefaultPairList, FirstPolicy, + RemainingPolicies... remaining_policies) { + return check_policy_validity(FirstPolicy{}, BaseDefaultPairList{}) && + check_policy_list_validity(BaseDefaultPairList{}, remaining_policies...); + } + + // Build policy_holder. + template + struct found_policy_pair_list { + static constexpr bool repeated = repeated_; + }; + + template + struct policy_holder : Policies... {}; + + template + constexpr auto + make_policy_holder_impl(base_default_pair_list<>, + found_policy_pair_list, + Policies...) { + return found_policy_pair_list{}; + } + + template + constexpr auto make_policy_holder_impl( + base_default_pair_list, + found_policy_pair_list, Policies... policies) { + using new_found_policy_pair = + decltype(FirstBaseDefaultPair::get_policy(policies...)); + + return make_policy_holder_impl( + base_default_pair_list{}, + found_policy_pair_list < repeated || + new_found_policy_pair::found_info == policy_found_info::repeated, + new_found_policy_pair, FoundPolicyPairs... > {}, policies...); + } + + template + constexpr auto convert_to_policy_holder(found_policy_pair_list, + RawPolicies...) { + return policy_holder{}; + } + + template + constexpr auto + convert_to_policy_holder(found_policy_pair_list, + RawPolicies... policies) { + return convert_to_policy_holder( + found_policy_pair_list{}, + typename FirstFoundPolicyPair::policy{}, policies...); + } + + template + constexpr auto make_policy_holder(BaseDefaultPairList, Policies... policies) { + static_assert(check_policy_list_validity(BaseDefaultPairList{}, Policies{}...), + "jkj::dragonbox: an invalid policy is specified"); + + using policy_pair_list = decltype(make_policy_holder_impl( + BaseDefaultPairList{}, found_policy_pair_list{}, policies...)); + + static_assert(!policy_pair_list::repeated, + "jkj::dragonbox: each policy should be specified at most once"); + + return convert_to_policy_holder(policy_pair_list{}); + } + } + } + + + //////////////////////////////////////////////////////////////////////////////////////// + // The interface function. + //////////////////////////////////////////////////////////////////////////////////////// + + template , class... Policies> + JKJ_FORCEINLINE JKJ_SAFEBUFFERS auto + to_decimal(signed_significand_bits signed_significand_bits, + unsigned int exponent_bits, Policies... policies) noexcept { + // Build policy holder type. + using namespace detail::policy_impl; + using policy_holder = decltype(make_policy_holder( + base_default_pair_list, + base_default_pair, + base_default_pair, + base_default_pair, + base_default_pair>{}, + policies...)); + + using return_type = + decimal_fp; + + return_type ret = policy_holder::delegate( + signed_significand_bits, + [exponent_bits, signed_significand_bits](auto interval_type_provider) { + using format = typename FloatTraits::format; + constexpr auto tag = decltype(interval_type_provider)::tag; + + auto two_fc = signed_significand_bits.remove_sign_bit_and_shift(); + auto exponent = int(exponent_bits); + + if constexpr (tag == decimal_to_binary_rounding::tag_t::to_nearest) { + // Is the input a normal number? + if (exponent != 0) { + exponent += format::exponent_bias - format::significand_bits; + + // Shorter interval case; proceed like Schubfach. + // One might think this condition is wrong, since when exponent_bits == 1 + // and two_fc == 0, the interval is actually regular. However, it turns out + // that this seemingly wrong condition is actually fine, because the end + // result is anyway the same. + // + // [binary32] + // (fc-1/2) * 2^e = 1.175'494'28... * 10^-38 + // (fc-1/4) * 2^e = 1.175'494'31... * 10^-38 + // fc * 2^e = 1.175'494'35... * 10^-38 + // (fc+1/2) * 2^e = 1.175'494'42... * 10^-38 + // + // Hence, shorter_interval_case will return 1.175'494'4 * 10^-38. + // 1.175'494'3 * 10^-38 is also a correct shortest representation that will + // be rejected if we assume shorter interval, but 1.175'494'4 * 10^-38 is + // closer to the true value so it doesn't matter. + // + // [binary64] + // (fc-1/2) * 2^e = 2.225'073'858'507'201'13... * 10^-308 + // (fc-1/4) * 2^e = 2.225'073'858'507'201'25... * 10^-308 + // fc * 2^e = 2.225'073'858'507'201'38... * 10^-308 + // (fc+1/2) * 2^e = 2.225'073'858'507'201'63... * 10^-308 + // + // Hence, shorter_interval_case will return 2.225'073'858'507'201'4 * + // 10^-308. This is indeed of the shortest length, and it is the unique one + // closest to the true value among valid representations of the same length. + static_assert(std::is_same_v || + std::is_same_v); + + if (two_fc == 0) { + return decltype(interval_type_provider)::invoke_shorter_interval_case( + signed_significand_bits, [exponent](auto... additional_args) { + return detail::impl:: + template compute_nearest_shorter< + return_type, + typename decltype(interval_type_provider):: + shorter_interval_type, + typename policy_holder::trailing_zero_policy, + typename policy_holder:: + binary_to_decimal_rounding_policy, + typename policy_holder::cache_policy>( + exponent, additional_args...); + }); + } + + two_fc |= (decltype(two_fc)(1) << (format::significand_bits + 1)); + } + // Is the input a subnormal number? + else { + exponent = format::min_exponent - format::significand_bits; + } + + return decltype(interval_type_provider)::invoke_normal_interval_case( + signed_significand_bits, [two_fc, exponent](auto... additional_args) { + return detail::impl:: + template compute_nearest_normal< + return_type, + typename decltype(interval_type_provider)::normal_interval_type, + typename policy_holder::trailing_zero_policy, + typename policy_holder::binary_to_decimal_rounding_policy, + typename policy_holder::cache_policy>(two_fc, exponent, + additional_args...); + }); + } + else if constexpr (tag == decimal_to_binary_rounding::tag_t::left_closed_directed) { + // Is the input a normal number? + if (exponent != 0) { + exponent += format::exponent_bias - format::significand_bits; + two_fc |= (decltype(two_fc)(1) << (format::significand_bits + 1)); + } + // Is the input a subnormal number? + else { + exponent = format::min_exponent - format::significand_bits; + } + + return detail::impl::template compute_left_closed_directed< + return_type, typename policy_holder::trailing_zero_policy, + typename policy_holder::cache_policy>(two_fc, exponent); + } + else { + static_assert(tag == decimal_to_binary_rounding::tag_t::right_closed_directed); + + bool shorter_interval = false; + + // Is the input a normal number? + if (exponent != 0) { + if (two_fc == 0 && exponent != 1) { + shorter_interval = true; + } + exponent += format::exponent_bias - format::significand_bits; + two_fc |= (decltype(two_fc)(1) << (format::significand_bits + 1)); + } + // Is the input a subnormal number? + else { + exponent = format::min_exponent - format::significand_bits; + } + + return detail::impl::template compute_right_closed_directed< + return_type, typename policy_holder::trailing_zero_policy, + typename policy_holder::cache_policy>(two_fc, exponent, shorter_interval); + } + }); + + policy_holder::handle_sign(signed_significand_bits, ret); + return ret; + } + + template , class... Policies> + JKJ_FORCEINLINE JKJ_SAFEBUFFERS auto to_decimal(Float x, Policies... policies) noexcept { + auto const br = float_bits(x); + auto const exponent_bits = br.extract_exponent_bits(); + auto const s = br.remove_exponent_bits(exponent_bits); + assert(br.is_finite()); + + return to_decimal(s, exponent_bits, policies...); + } +} + +#undef JKJ_FORCEINLINE +#undef JKJ_SAFEBUFFERS +#undef JKJ_DRAGONBOX_HAS_BUILTIN + +#endif diff --git a/server/dragonbox/dragonbox_to_chars.h b/server/dragonbox/dragonbox_to_chars.h new file mode 100644 index 0000000..ca5384f --- /dev/null +++ b/server/dragonbox/dragonbox_to_chars.h @@ -0,0 +1,108 @@ +// Copyright 2020-2022 Junekey Jeon +// +// The contents of this file may be used under the terms of +// the Apache License v2.0 with LLVM Exceptions. +// +// (See accompanying file LICENSE-Apache or copy at +// https://llvm.org/foundation/relicensing/LICENSE.txt) +// +// Alternatively, the contents of this file may be used under the terms of +// the Boost Software License, Version 1.0. +// (See accompanying file LICENSE-Boost or copy at +// https://www.boost.org/LICENSE_1_0.txt) +// +// Unless required by applicable law or agreed to in writing, this software +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. + +#ifndef JKJ_HEADER_DRAGONBOX_TO_CHARS +#define JKJ_HEADER_DRAGONBOX_TO_CHARS + +#include "dragonbox.h" + +namespace jkj::dragonbox { + namespace to_chars_detail { + template + extern char* to_chars(typename FloatTraits::carrier_uint significand, int exponent, + char* buffer) noexcept; + + // Avoid needless ABI overhead incurred by tag dispatch. + template + char* to_chars_n_impl(float_bits br, char* buffer) noexcept { + auto const exponent_bits = br.extract_exponent_bits(); + auto const s = br.remove_exponent_bits(exponent_bits); + + if (br.is_finite(exponent_bits)) { + if (s.is_negative()) { + *buffer = '-'; + ++buffer; + } + if (br.is_nonzero()) { + auto result = to_decimal( + s, exponent_bits, policy::sign::ignore, policy::trailing_zero::ignore, + typename PolicyHolder::decimal_to_binary_rounding_policy{}, + typename PolicyHolder::binary_to_decimal_rounding_policy{}, + typename PolicyHolder::cache_policy{}); + return to_chars_detail::to_chars(result.significand, + result.exponent, buffer); + } + else { + *buffer = '0'; + return buffer + 1; + } + } + else { + if (s.has_all_zero_significand_bits()) { + if (s.is_negative()) { + *buffer = '-'; + ++buffer; + } + std::memcpy(buffer, "Infinity", 8); + return buffer + 8; + } + else { + std::memcpy(buffer, "NaN", 3); + return buffer + 3; + } + } + } + } + + // Returns the next-to-end position + template , class... Policies> + char* to_chars_n(Float x, char* buffer, Policies... policies) noexcept { + using namespace jkj::dragonbox::detail::policy_impl; + using policy_holder = decltype(make_policy_holder( + base_default_pair_list, + base_default_pair, + base_default_pair>{}, + policies...)); + + return to_chars_detail::to_chars_n_impl(float_bits(x), + buffer); + } + + // Null-terminate and bypass the return value of fp_to_chars_n + template , class... Policies> + char* to_chars(Float x, char* buffer, Policies... policies) noexcept { + auto ptr = to_chars_n(x, buffer, policies...); + *ptr = '\0'; + return ptr; + } + + // Maximum required buffer size (excluding null-terminator) + template + inline constexpr std::size_t max_output_string_length = + std::is_same_v + ? + // sign(1) + significand(9) + decimal_point(1) + exp_marker(1) + exp_sign(1) + exp(2) + (1 + 9 + 1 + 1 + 1 + 2) + : + // format == ieee754_format::binary64 + // sign(1) + significand(17) + decimal_point(1) + exp_marker(1) + exp_sign(1) + exp(3) + (1 + 17 + 1 + 1 + 1 + 3); +} + +#endif diff --git a/server/dragonbox/dragonbox_to_chars.hpp b/server/dragonbox/dragonbox_to_chars.hpp new file mode 100644 index 0000000..7199d74 --- /dev/null +++ b/server/dragonbox/dragonbox_to_chars.hpp @@ -0,0 +1,521 @@ +// Copyright 2020-2022 Junekey Jeon +// +// The contents of this file may be used under the terms of +// the Apache License v2.0 with LLVM Exceptions. +// +// (See accompanying file LICENSE-Apache or copy at +// https://llvm.org/foundation/relicensing/LICENSE.txt) +// +// Alternatively, the contents of this file may be used under the terms of +// the Boost Software License, Version 1.0. +// (See accompanying file LICENSE-Boost or copy at +// https://www.boost.org/LICENSE_1_0.txt) +// +// Unless required by applicable law or agreed to in writing, this software +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. + +#pragma once + +#include "dragonbox_to_chars.h" + +#if defined(__GNUC__) || defined(__clang__) + #define JKJ_FORCEINLINE inline __attribute__((always_inline)) +#elif defined(_MSC_VER) + #define JKJ_FORCEINLINE __forceinline +#else + #define JKJ_FORCEINLINE inline +#endif + +namespace jkj::dragonbox { + namespace to_chars_detail { + // These "//"'s are to prevent clang-format to ruin this nice alignment. + // Thanks to reddit user u/mcmcc: + // https://www.reddit.com/r/cpp/comments/so3wx9/dragonbox_110_is_released_a_fast_floattostring/hw8z26r/?context=3 + static constexpr char radix_100_table[] = { + '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', // + '0', '5', '0', '6', '0', '7', '0', '8', '0', '9', // + '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', // + '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', // + '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', // + '2', '5', '2', '6', '2', '7', '2', '8', '2', '9', // + '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', // + '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', // + '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', // + '4', '5', '4', '6', '4', '7', '4', '8', '4', '9', // + '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', // + '5', '5', '5', '6', '5', '7', '5', '8', '5', '9', // + '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', // + '6', '5', '6', '6', '6', '7', '6', '8', '6', '9', // + '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', // + '7', '5', '7', '6', '7', '7', '7', '8', '7', '9', // + '8', '0', '8', '1', '8', '2', '8', '3', '8', '4', // + '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', // + '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', // + '9', '5', '9', '6', '9', '7', '9', '8', '9', '9' // + }; + static constexpr char radix_100_head_table[] = { + '0', '.', '1', '.', '2', '.', '3', '.', '4', '.', // + '5', '.', '6', '.', '7', '.', '8', '.', '9', '.', // + '1', '.', '1', '.', '1', '.', '1', '.', '1', '.', // + '1', '.', '1', '.', '1', '.', '1', '.', '1', '.', // + '2', '.', '2', '.', '2', '.', '2', '.', '2', '.', // + '2', '.', '2', '.', '2', '.', '2', '.', '2', '.', // + '3', '.', '3', '.', '3', '.', '3', '.', '3', '.', // + '3', '.', '3', '.', '3', '.', '3', '.', '3', '.', // + '4', '.', '4', '.', '4', '.', '4', '.', '4', '.', // + '4', '.', '4', '.', '4', '.', '4', '.', '4', '.', // + '5', '.', '5', '.', '5', '.', '5', '.', '5', '.', // + '5', '.', '5', '.', '5', '.', '5', '.', '5', '.', // + '6', '.', '6', '.', '6', '.', '6', '.', '6', '.', // + '6', '.', '6', '.', '6', '.', '6', '.', '6', '.', // + '7', '.', '7', '.', '7', '.', '7', '.', '7', '.', // + '7', '.', '7', '.', '7', '.', '7', '.', '7', '.', // + '8', '.', '8', '.', '8', '.', '8', '.', '8', '.', // + '8', '.', '8', '.', '8', '.', '8', '.', '8', '.', // + '9', '.', '9', '.', '9', '.', '9', '.', '9', '.', // + '9', '.', '9', '.', '9', '.', '9', '.', '9', '.' // + }; + + // These digit generation routines are inspired by James Anhalt's itoa algorithm: + // https://github.com/jeaiii/itoa + // The main idea is for given n, find y such that floor(10^k * y / 2^32) = n holds, + // where k is an appropriate integer depending on the length of n. + // For example, if n = 1234567, we set k = 6. In this case, we have + // floor(y / 2^32) = 1, + // floor(10^2 * ((10^0 * y) mod 2^32) / 2^32) = 23, + // floor(10^2 * ((10^2 * y) mod 2^32) / 2^32) = 45, and + // floor(10^2 * ((10^4 * y) mod 2^32) / 2^32) = 67. + // See https://jk-jeon.github.io/posts/2022/02/jeaiii-algorithm/ for more explanation. + + JKJ_FORCEINLINE static void print_9_digits(std::uint32_t s32, int& exponent, + char*& buffer) noexcept { + // -- IEEE-754 binary32 + // Since we do not cut trailing zeros in advance, s32 must be of 6~9 digits + // unless the original input was subnormal. + // In particular, when it is of 9 digits it shouldn't have any trailing zeros. + // -- IEEE-754 binary64 + // In this case, s32 must be of 7~9 digits unless the input is subnormal, + // and it shouldn't have any trailing zeros if it is of 9 digits. + if (s32 >= 1'0000'0000) { + // 9 digits. + // 1441151882 = ceil(2^57 / 1'0000'0000) + 1 + auto prod = s32 * std::uint64_t(1441151882); + prod >>= 25; + std::memcpy(buffer, radix_100_head_table + std::uint32_t(prod >> 32) * 2, 2); + + prod = std::uint32_t(prod) * std::uint64_t(100); + std::memcpy(buffer + 2, radix_100_table + std::uint32_t(prod >> 32) * 2, 2); + prod = std::uint32_t(prod) * std::uint64_t(100); + std::memcpy(buffer + 4, radix_100_table + std::uint32_t(prod >> 32) * 2, 2); + prod = std::uint32_t(prod) * std::uint64_t(100); + std::memcpy(buffer + 6, radix_100_table + std::uint32_t(prod >> 32) * 2, 2); + prod = std::uint32_t(prod) * std::uint64_t(100); + std::memcpy(buffer + 8, radix_100_table + std::uint32_t(prod >> 32) * 2, 2); + + exponent += 8; + buffer += 10; + } + else if (s32 >= 100'0000) { + // 7 or 8 digits. + // 281474978 = ceil(2^48 / 100'0000) + 1 + auto prod = s32 * std::uint64_t(281474978); + prod >>= 16; + auto two_digits = std::uint32_t(prod >> 32); + // If s32 is of 8 digits, increase the exponent by 7. + // Otherwise, increase it by 6. + exponent += (6 + unsigned(two_digits >= 10)); + + // Write the first digit and the decimal point. + std::memcpy(buffer, radix_100_head_table + two_digits * 2, 2); + // This third character may be overwritten later but we don't care. + buffer[2] = radix_100_table[two_digits * 2 + 1]; + + // Remaining 6 digits are all zero? + if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 100'0000)) { + // The number of characters actually written is: + // 1, if only the first digit is nonzero, which means that either s32 is of 7 + // digits or it is of 8 digits but the second digit is zero, or + // 3, otherwise. + // Note that buffer[2] is never zero if s32 is of 7 digits, because the input is + // never zero. + buffer += (1 + (unsigned(two_digits >= 10) & unsigned(buffer[2] > '0')) * 2); + } + else { + // At least one of the remaining 6 digits are nonzero. + // After this adjustment, now the first destination becomes buffer + 2. + buffer += unsigned(two_digits >= 10); + + // Obtain the next two digits. + prod = std::uint32_t(prod) * std::uint64_t(100); + two_digits = std::uint32_t(prod >> 32); + std::memcpy(buffer + 2, radix_100_table + two_digits * 2, 2); + + // Remaining 4 digits are all zero? + if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 1'0000)) { + buffer += (3 + unsigned(buffer[3] > '0')); + } + else { + // At least one of the remaining 4 digits are nonzero. + + // Obtain the next two digits. + prod = std::uint32_t(prod) * std::uint64_t(100); + two_digits = std::uint32_t(prod >> 32); + std::memcpy(buffer + 4, radix_100_table + two_digits * 2, 2); + + // Remaining 2 digits are all zero? + if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 100)) { + buffer += (5 + unsigned(buffer[5] > '0')); + } + else { + // Obtain the last two digits. + prod = std::uint32_t(prod) * std::uint64_t(100); + two_digits = std::uint32_t(prod >> 32); + std::memcpy(buffer + 6, radix_100_table + two_digits * 2, 2); + + buffer += (7 + unsigned(buffer[7] > '0')); + } + } + } + } + else if (s32 >= 1'0000) { + // 5 or 6 digits. + // 429497 = ceil(2^32 / 1'0000) + auto prod = s32 * std::uint64_t(429497); + auto two_digits = std::uint32_t(prod >> 32); + + // If s32 is of 6 digits, increase the exponent by 5. + // Otherwise, increase it by 4. + exponent += (4 + unsigned(two_digits >= 10)); + + // Write the first digit and the decimal point. + std::memcpy(buffer, radix_100_head_table + two_digits * 2, 2); + // This third character may be overwritten later but we don't care. + buffer[2] = radix_100_table[two_digits * 2 + 1]; + + // Remaining 4 digits are all zero? + if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 1'0000)) { + // The number of characters actually written is 1 or 3, similarly to the case of + // 7 or 8 digits. + buffer += (1 + (unsigned(two_digits >= 10) & unsigned(buffer[2] > '0')) * 2); + } + else { + // At least one of the remaining 4 digits are nonzero. + // After this adjustment, now the first destination becomes buffer + 2. + buffer += unsigned(two_digits >= 10); + + // Obtain the next two digits. + prod = std::uint32_t(prod) * std::uint64_t(100); + two_digits = std::uint32_t(prod >> 32); + std::memcpy(buffer + 2, radix_100_table + two_digits * 2, 2); + + // Remaining 2 digits are all zero? + if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 100)) { + buffer += (3 + unsigned(buffer[3] > '0')); + } + else { + // Obtain the last two digits. + prod = std::uint32_t(prod) * std::uint64_t(100); + two_digits = std::uint32_t(prod >> 32); + std::memcpy(buffer + 4, radix_100_table + two_digits * 2, 2); + + buffer += (5 + unsigned(buffer[5] > '0')); + } + } + } + else if (s32 >= 100) { + // 3 or 4 digits. + // 42949673 = ceil(2^32 / 100) + auto prod = s32 * std::uint64_t(42949673); + auto two_digits = std::uint32_t(prod >> 32); + + // If s32 is of 4 digits, increase the exponent by 3. + // Otherwise, increase it by 2. + exponent += (2 + int(two_digits >= 10)); + + // Write the first digit and the decimal point. + std::memcpy(buffer, radix_100_head_table + two_digits * 2, 2); + // This third character may be overwritten later but we don't care. + buffer[2] = radix_100_table[two_digits * 2 + 1]; + + // Remaining 2 digits are all zero? + if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 100)) { + // The number of characters actually written is 1 or 3, similarly to the case of + // 7 or 8 digits. + buffer += (1 + (unsigned(two_digits >= 10) & unsigned(buffer[2] > '0')) * 2); + } + else { + // At least one of the remaining 2 digits are nonzero. + // After this adjustment, now the first destination becomes buffer + 2. + buffer += unsigned(two_digits >= 10); + + // Obtain the last two digits. + prod = std::uint32_t(prod) * std::uint64_t(100); + two_digits = std::uint32_t(prod >> 32); + std::memcpy(buffer + 2, radix_100_table + two_digits * 2, 2); + + buffer += (3 + unsigned(buffer[3] > '0')); + } + } + else { + // 1 or 2 digits. + // If s32 is of 2 digits, increase the exponent by 1. + exponent += int(s32 >= 10); + + // Write the first digit and the decimal point. + std::memcpy(buffer, radix_100_head_table + s32 * 2, 2); + // This third character may be overwritten later but we don't care. + buffer[2] = radix_100_table[s32 * 2 + 1]; + + // The number of characters actually written is 1 or 3, similarly to the case of + // 7 or 8 digits. + buffer += (1 + (unsigned(s32 >= 10) & unsigned(buffer[2] > '0')) * 2); + } + } + + template <> + char* to_chars>(std::uint32_t s32, int exponent, + char* buffer) noexcept { + // Print significand. + print_9_digits(s32, exponent, buffer); + + // Print exponent and return + if (exponent < 0) { + std::memcpy(buffer, "E-", 2); + buffer += 2; + exponent = -exponent; + } + else if (exponent > 0) { + buffer[0] = 'E'; + buffer += 1; + } + else { + return buffer; + } + + if (exponent >= 10) { + std::memcpy(buffer, &radix_100_table[exponent * 2], 2); + buffer += 2; + } + else { + buffer[0] = char('0' + exponent); + buffer += 1; + } + + return buffer; + } + + template <> + char* to_chars>(std::uint64_t const significand, + int exponent, char* buffer) noexcept { + // Print significand by decomposing it into a 9-digit block and a 8-digit block. + std::uint32_t first_block, second_block; + bool no_second_block; + + if (significand >= 1'0000'0000) { + first_block = std::uint32_t(significand / 1'0000'0000); + second_block = std::uint32_t(significand) - first_block * 1'0000'0000; + exponent += 8; + no_second_block = (second_block == 0); + } + else { + first_block = std::uint32_t(significand); + no_second_block = true; + } + + if (no_second_block) { + print_9_digits(first_block, exponent, buffer); + } + else { + // We proceed similarly to print_9_digits(), but since we do not need to remove + // trailing zeros, the procedure is a bit simpler. + if (first_block >= 1'0000'0000) { + // The input is of 17 digits, thus there should be no trailing zero at all. + // The first block is of 9 digits. + // 1441151882 = ceil(2^57 / 1'0000'0000) + 1 + auto prod = first_block * std::uint64_t(1441151882); + prod >>= 25; + std::memcpy(buffer, radix_100_head_table + std::uint32_t(prod >> 32) * 2, 2); + + prod = std::uint32_t(prod) * std::uint64_t(100); + std::memcpy(buffer + 2, radix_100_table + std::uint32_t(prod >> 32) * 2, 2); + prod = std::uint32_t(prod) * std::uint64_t(100); + std::memcpy(buffer + 4, radix_100_table + std::uint32_t(prod >> 32) * 2, 2); + prod = std::uint32_t(prod) * std::uint64_t(100); + std::memcpy(buffer + 6, radix_100_table + std::uint32_t(prod >> 32) * 2, 2); + prod = std::uint32_t(prod) * std::uint64_t(100); + std::memcpy(buffer + 8, radix_100_table + std::uint32_t(prod >> 32) * 2, 2); + + // The second block is of 8 digits. + // 281474978 = ceil(2^48 / 100'0000) + 1 + prod = second_block * std::uint64_t(281474978); + prod >>= 16; + prod += 1; + std::memcpy(buffer + 10, radix_100_table + std::uint32_t(prod >> 32) * 2, 2); + prod = std::uint32_t(prod) * std::uint64_t(100); + std::memcpy(buffer + 12, radix_100_table + std::uint32_t(prod >> 32) * 2, 2); + prod = std::uint32_t(prod) * std::uint64_t(100); + std::memcpy(buffer + 14, radix_100_table + std::uint32_t(prod >> 32) * 2, 2); + prod = std::uint32_t(prod) * std::uint64_t(100); + std::memcpy(buffer + 16, radix_100_table + std::uint32_t(prod >> 32) * 2, 2); + + exponent += 8; + buffer += 18; + } + else { + if (first_block >= 100'0000) { + // 7 or 8 digits. + // 281474978 = ceil(2^48 / 100'0000) + 1 + auto prod = first_block * std::uint64_t(281474978); + prod >>= 16; + auto two_digits = std::uint32_t(prod >> 32); + + std::memcpy(buffer, radix_100_head_table + two_digits * 2, 2); + buffer[2] = radix_100_table[two_digits * 2 + 1]; + + exponent += (6 + unsigned(two_digits >= 10)); + buffer += unsigned(two_digits >= 10); + + // Print remaining 6 digits. + prod = std::uint32_t(prod) * std::uint64_t(100); + std::memcpy(buffer + 2, radix_100_table + std::uint32_t(prod >> 32) * 2, 2); + prod = std::uint32_t(prod) * std::uint64_t(100); + std::memcpy(buffer + 4, radix_100_table + std::uint32_t(prod >> 32) * 2, 2); + prod = std::uint32_t(prod) * std::uint64_t(100); + std::memcpy(buffer + 6, radix_100_table + std::uint32_t(prod >> 32) * 2, 2); + + buffer += 8; + } + else if (first_block >= 1'0000) { + // 5 or 6 digits. + // 429497 = ceil(2^32 / 1'0000) + auto prod = first_block * std::uint64_t(429497); + auto two_digits = std::uint32_t(prod >> 32); + + std::memcpy(buffer, radix_100_head_table + two_digits * 2, 2); + buffer[2] = radix_100_table[two_digits * 2 + 1]; + + exponent += (4 + unsigned(two_digits >= 10)); + buffer += unsigned(two_digits >= 10); + + // Print remaining 4 digits. + prod = std::uint32_t(prod) * std::uint64_t(100); + std::memcpy(buffer + 2, radix_100_table + std::uint32_t(prod >> 32) * 2, 2); + prod = std::uint32_t(prod) * std::uint64_t(100); + std::memcpy(buffer + 4, radix_100_table + std::uint32_t(prod >> 32) * 2, 2); + + buffer += 6; + } + else if (first_block >= 100) { + // 3 or 4 digits. + // 42949673 = ceil(2^32 / 100) + auto prod = first_block * std::uint64_t(42949673); + auto two_digits = std::uint32_t(prod >> 32); + + std::memcpy(buffer, radix_100_head_table + two_digits * 2, 2); + buffer[2] = radix_100_table[two_digits * 2 + 1]; + + exponent += (2 + unsigned(two_digits >= 10)); + buffer += unsigned(two_digits >= 10); + + // Print remaining 2 digits. + prod = std::uint32_t(prod) * std::uint64_t(100); + std::memcpy(buffer + 2, radix_100_table + std::uint32_t(prod >> 32) * 2, 2); + + buffer += 4; + } + else { + // 1 or 2 digits. + std::memcpy(buffer, radix_100_head_table + first_block * 2, 2); + buffer[2] = radix_100_table[first_block * 2 + 1]; + + exponent += unsigned(first_block >= 10); + buffer += (2 + unsigned(first_block >= 10)); + } + + // Next, print the second block. + // The second block is of 8 digits, but we may have trailing zeros. + // 281474978 = ceil(2^48 / 100'0000) + 1 + auto prod = second_block * std::uint64_t(281474978); + prod >>= 16; + prod += 1; + auto two_digits = std::uint32_t(prod >> 32); + std::memcpy(buffer, radix_100_table + two_digits * 2, 2); + + // Remaining 6 digits are all zero? + if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 100'0000)) { + buffer += (1 + unsigned(buffer[1] > '0')); + } + else { + // Obtain the next two digits. + prod = std::uint32_t(prod) * std::uint64_t(100); + two_digits = std::uint32_t(prod >> 32); + std::memcpy(buffer + 2, radix_100_table + two_digits * 2, 2); + + // Remaining 4 digits are all zero? + if (std::uint32_t(prod) <= + std::uint32_t((std::uint64_t(1) << 32) / 1'0000)) { + buffer += (3 + unsigned(buffer[3] > '0')); + } + else { + // Obtain the next two digits. + prod = std::uint32_t(prod) * std::uint64_t(100); + two_digits = std::uint32_t(prod >> 32); + std::memcpy(buffer + 4, radix_100_table + two_digits * 2, 2); + + // Remaining 2 digits are all zero? + if (std::uint32_t(prod) <= + std::uint32_t((std::uint64_t(1) << 32) / 100)) { + buffer += (5 + unsigned(buffer[5] > '0')); + } + else { + // Obtain the last two digits. + prod = std::uint32_t(prod) * std::uint64_t(100); + two_digits = std::uint32_t(prod >> 32); + std::memcpy(buffer + 6, radix_100_table + two_digits * 2, 2); + buffer += (7 + unsigned(buffer[7] > '0')); + } + } + } + } + } + + // Print exponent and return + if (exponent < 0) { + std::memcpy(buffer, "E-", 2); + buffer += 2; + exponent = -exponent; + } + else if (exponent > 0) { + buffer[0] = 'E'; + buffer += 1; + } + else { + return buffer; + } + + if (exponent >= 100) { + // d1 = exponent / 10; d2 = exponent % 10; + // 6554 = ceil(2^16 / 10) + auto prod = std::uint32_t(exponent) * std::uint32_t(6554); + auto d1 = prod >> 16; + prod = std::uint16_t(prod) * std::uint32_t(5); // * 10 + auto d2 = prod >> 15; // >> 16 + std::memcpy(buffer, &radix_100_table[d1 * 2], 2); + buffer[2] = char('0' + d2); + buffer += 3; + } + else if (exponent >= 10) { + std::memcpy(buffer, &radix_100_table[exponent * 2], 2); + buffer += 2; + } + else { + buffer[0] = char('0' + exponent); + buffer += 1; + } + + return buffer; + } + } +} + diff --git a/server/gc.h b/server/gc.h new file mode 100644 index 0000000..7bc8d8d --- /dev/null +++ b/server/gc.h @@ -0,0 +1,63 @@ +#ifndef __AQ_USE_THREADEDGC__ +#include +class GC { +private:; + + size_t max_slots, + interval, forced_clean, + forceclean_timer = 0; + uint64_t max_size; + bool running, alive; +// ptr, dealloc, ref, sz + uint32_t threshould; + void *q, *q_back; + void* handle; + std::atomic slot_pos; + std::atomic alive_cnt; + std::atomic current_size; + volatile bool lock; + // maybe use volatile std::thread::id instead +protected: + void acquire_lock(); + void release_lock(); + void gc(); + void daemon(); + void start_deamon(); + void terminate_daemon(); + +public: + void reg(void* v, uint32_t sz = 1, + void(*f)(void*) = free + ); + + GC( + uint64_t max_size = 0xfffffff, uint32_t max_slots = 4096, + uint32_t interval = 10000, uint32_t forced_clean = 1000000, + uint32_t threshould = 64 //one seconds + ) : max_size(max_size), max_slots(max_slots), + interval(interval), forced_clean(forced_clean), + threshould(threshould) { + + start_deamon(); + GC::gc_handle = this; + } // 256 MB + + ~GC(){ + terminate_daemon(); + } + static GC* gc_handle; + constexpr static void(*_free) (void*) = free; +}; + +#else +class GC { +public: + GC(uint32_t) = default; + void reg( + void* v, uint32_t = 0, + void(*f)(void*) = free + ) const { f(v); } + static GC* gc; + constexpr static void(*_free) (void*) = free; +} +#endif diff --git a/server/gc.hpp b/server/gc.hpp deleted file mode 100644 index 4c66060..0000000 --- a/server/gc.hpp +++ /dev/null @@ -1,53 +0,0 @@ -#pragma once -#include -#include -#include -#include -class GC { - template - using vector = vector_type; - template - using tuple = std::tuple; - size_t current_size, max_size, interval, forced_clean; - bool running, alive; -// ptr, dealloc, ref, sz - vector> q; - std::thread handle; - void gc() - { - - } - void reg(void* v, uint32_t ref, uint32_t sz, - void(*f)(void*) = [](void* v) {free (v); }) { - current_size += sz; - if (current_size > max_size) - gc(); - q.push_back({ v, f }); - } - void daemon() { - using namespace std::chrono; - while (alive) { - if (running) { - gc(); - std::this_thread::sleep_for(microseconds(interval)); - } - else { - std::this_thread::sleep_for(10ms); - } - } - } - void start_deamon() { - handle = std::thread(&daemon); - alive = true; - } - void terminate_daemon() { - running = false; - alive = false; - using namespace std::chrono; - - if (handle.joinable()) { - std::this_thread::sleep_for(microseconds(1000 + std::max(static_cast(10000), interval))); - handle.join(); - } - } -}; \ No newline at end of file diff --git a/server/jeaiii_to_text.h b/server/jeaiii_to_text.h new file mode 100644 index 0000000..63fd9ee --- /dev/null +++ b/server/jeaiii_to_text.h @@ -0,0 +1,121 @@ +#pragma once +// Copyright (c) 2022 James Edward Anhalt III - https://github.com/jeaiii/itoa +using u32 = decltype(0xffffffff); +using u64 = decltype(0xffffffffffffffff); + +static_assert(u32(-1) > 0, "u32 must be unsigned"); +static_assert(u32(0xffffffff) + u32(1) == u32(0), "u32 must be 32 bits"); +static_assert(u64(-1) > 0, "u64 must be unsigned"); +static_assert(u64(0xffffffffffffffff) + u32(1) == u32(0), "u64 must be 64 bits"); + +constexpr auto digits_00_99 = + "00010203040506070809" "10111213141516171819" "20212223242526272829" "30313233343536373839" "40414243444546474849" + "50515253545556575859" "60616263646566676869" "70717273747576777879" "80818283848586878889" "90919293949596979899"; + +struct pair { char t, o; }; + +#define JEAIII_W(I, U) *(pair*)&b[I] = *(pair*)&digits_00_99[(U) * 2] +#define JEAIII_A(I, N) t = (u64(1) << (32 + N / 5 * N * 53 / 16)) / u32(1e##N) + 1 + N / 6 - N / 8, t *= u, t >>= N / 5 * N * 53 / 16, t += N / 6 * 4, JEAIII_W(I, t >> 32) +#define JEAIII_S(I) b[I] = char(u64(10) * u32(t) >> 32) + '0' +#define JEAIII_D(I) t = u64(100) * u32(t), JEAIII_W(I, t >> 32) + +#define JEAIII_C0(I) b[I] = char(u) + '0' +#define JEAIII_C1(I) JEAIII_W(I, u) +#define JEAIII_C2(I) JEAIII_A(I, 1), JEAIII_S(I + 2) +#define JEAIII_C3(I) JEAIII_A(I, 2), JEAIII_D(I + 2) +#define JEAIII_C4(I) JEAIII_A(I, 3), JEAIII_D(I + 2), JEAIII_S(I + 4) +#define JEAIII_C5(I) JEAIII_A(I, 4), JEAIII_D(I + 2), JEAIII_D(I + 4) +#define JEAIII_C6(I) JEAIII_A(I, 5), JEAIII_D(I + 2), JEAIII_D(I + 4), JEAIII_S(I + 6) +#define JEAIII_C7(I) JEAIII_A(I, 6), JEAIII_D(I + 2), JEAIII_D(I + 4), JEAIII_D(I + 6) +#define JEAIII_C8(I) JEAIII_A(I, 7), JEAIII_D(I + 2), JEAIII_D(I + 4), JEAIII_D(I + 6), JEAIII_S(I + 8) +#define JEAIII_C9(I) JEAIII_A(I, 8), JEAIII_D(I + 2), JEAIII_D(I + 4), JEAIII_D(I + 6), JEAIII_D(I + 8) + +#define JEAIII_L(N, A, B) u < u32(1e##N) ? A : B +#define JEAIII_L09(F) JEAIII_L(2, JEAIII_L(1, F(0), F(1)), JEAIII_L(6, JEAIII_L(4, JEAIII_L(3, F(2), F(3)), JEAIII_L(5, F(4), F(5))), JEAIII_L(8, JEAIII_L(7, F(6), F(7)), JEAIII_L(9, F(8), F(9))))) +#define JEAIII_L03(F) JEAIII_L(2, JEAIII_L(1, F(0), F(1)), JEAIII_L(3, F(2), F(3))) + +#define JEAIII_K(N) (JEAIII_C##N(0), b + N + 1) +#define JEAIII_KX(N) (JEAIII_C##N(0), u = x, JEAIII_C7(N + 1), b + N + 9) +#define JEAIII_KYX(N) (JEAIII_C##N(0), u = y, JEAIII_C7(N + 1), u = x, JEAIII_C7(N + 9), b + N + 17) + +template struct _cond { using type = F; }; +template struct _cond { using type = T; }; +template using cond = typename _cond::type; + +template inline char* to_text_from_integer(char* b, T i) +{ + u64 t = u64(i); + + if (i < T(0)) + t = u64(0) - t, b[0] = '-', ++b; + + u32 u = cond, unsigned char>, bool>(t); + + // if our input type fits in 32bits, or its value does, ctreat as 32bit (the line above ensures the compiler can still know the range limits of the input type) + // and optimize out cases for small integer types (if only c++ had a builtin way to get the unsigned type from a signed type) + if (sizeof(i) <= sizeof(u) || u == t) + return JEAIII_L09(JEAIII_K); + + u32 x = t % 100000000u; + u = u32(t /= 100000000u); + + // t / 10^8 (fits in 32 bit), t % 10^8 -> ~17.5 digits + if (u == t) + return JEAIII_L09(JEAIII_KX); + + // t / 10^16 (1-4 digits), t / 10^8 % 10^8, t % 10^8 + u32 y = t % 100000000u; + u = u32(t / 100000000u); + return JEAIII_L03(JEAIII_KYX); +} + +inline char* to_text(char text[], signed char i) { return to_text_from_integer(text, i); } +inline char* to_text(char text[], unsigned char i) { return to_text_from_integer(text, i); } +inline char* to_text(char text[], short i) { return to_text_from_integer(text, i); } +inline char* to_text(char text[], unsigned short i) { return to_text_from_integer(text, i); } +inline char* to_text(char text[], int i) { return to_text_from_integer(text, i); } +inline char* to_text(char text[], unsigned int i) { return to_text_from_integer(text, i); } +inline char* to_text(char text[], long i) { return to_text_from_integer(text, i); } +inline char* to_text(char text[], unsigned long i) { return to_text_from_integer(text, i); } +inline char* to_text(char text[], long long i) { return to_text_from_integer(text, i); } +inline char* to_text(char text[], unsigned long long i) { return to_text_from_integer(text, i); } + +// Copyright (c) 2022 Bill Sun + +//#if defined(SIZEOF___INT128) || (defined(SIZEOF___INT128_T) && defined(SIZEOF___UINT128_T)) +constexpr static __uint128_t _10_19 = 10000000000000000000ull, + _10_37 = _10_19*_10_19 / 10; + +template +char* jeaiii_i128(char* buf, T v){ + if constexpr (std::is_signed_v) { + if (v < 0){ + *(buf++) = '0'; + v = -v; + } + } + if (v > _10_37){ + uint8_t vv = uint8_t(v/_10_37); + // vv <<= 1; + // if (vv < 20) + // *buf ++ = digits_00_99[vv + 1]; + // else{ + // memcpy(buf, digits_00_99 + vv, 2); + // buf += 2; + // } + + *(buf++) = vv%10 + '0'; + vv/=10; + if (vv) { + *buf = *(buf-1); + *(buf++-1) = vv + '0'; + } + } + + if (v > _10_19) + buf = to_text(buf, uint64_t((v/_10_19) % _10_19)); + + buf = to_text(buf, uint64_t(v % _10_19)); + return buf; +} +// #endif diff --git a/server/io.cpp b/server/libaquery.cpp similarity index 59% rename from server/io.cpp rename to server/libaquery.cpp index 878c0b6..93a03da 100644 --- a/server/io.cpp +++ b/server/libaquery.cpp @@ -1,20 +1,20 @@ #include "pch_msc.hpp" #include "io.h" -#include "table.h" #include #include #include #include "utils.h" +#include "libaquery.h" #include char* gbuf = nullptr; void setgbuf(char* buf) { - static char* b = 0; - if (buf == 0) + static char* b = nullptr; + if (buf == nullptr) gbuf = b; else { gbuf = buf; @@ -63,6 +63,7 @@ T getInt(const char*& buf){ } return ret; } + template char* intToString(T val, char* buf){ @@ -275,6 +276,44 @@ inline const char* str(const bool& v) { return v ? "true" : "false"; } + +Context::Context() { + current.memory_map = new std::unordered_map; + init_session(); +} + +Context::~Context() { + auto memmap = (std::unordered_map*) this->current.memory_map; + delete memmap; +} + +void Context::init_session(){ + if (log_level == LOG_INFO){ + memset(&(this->current.stats), 0, sizeof(Session::Statistic)); + } + auto memmap = (std::unordered_map*) this->current.memory_map; + memmap->clear(); +} + +void Context::end_session(){ + auto memmap = (std::unordered_map*) this->current.memory_map; + for (auto& mem : *memmap) { + mem.second(mem.first); + } + memmap->clear(); +} + +void* Context::get_module_function(const char* fname){ + auto fmap = static_cast*> + (this->module_function_maps); + // printf("%p\n", fmap->find("mydiv")->second); + // for (const auto& [key, value] : *fmap){ + // printf("%s %p\n", key.c_str(), value); + // } + auto ret = fmap->find(fname); + return ret == fmap->end() ? nullptr : ret->second; +} + // template // inline void vector_type<_Ty>::out(uint32_t n, const char* sep) const // { @@ -288,3 +327,195 @@ inline const char* str(const bool& v) { // } // std::cout << ')'; // } + +#include "gc.h" +#include +#include +#ifndef __AQ_USE_THREADEDGC__ + +struct gcmemory_t{ + void* memory; + void (*deallocator)(void*); +}; + +using memoryqueue_t = gcmemory_t*; +void GC::acquire_lock() { + // auto this_tid = std::this_thread::get_id(); + // while(lock != this_tid) + // { + // while(lock != this_tid && lock != std::thread::id()) { + // std::this_thread::sleep_for(std::chrono::milliseconds(0)); + // } + // lock = this_tid; + // } +} + +void GC::release_lock(){ + // lock = std::thread::id(); +} + +void GC::gc() +{ + auto _q = static_cast(q); + auto _q_back = static_cast(q_back); + if (slot_pos == 0) + return; + auto t = _q; + lock = true; + while(alive_cnt != 0); + q = _q_back; + uint32_t _slot = slot_pos; + slot_pos = 0; + current_size = 0; + lock = false; + q_back = t; + + for(uint32_t i = 0; i < _slot; ++i){ + if (_q[i].memory != nullptr && _q[i].deallocator != nullptr) + _q[i].deallocator(_q[i].memory); + } + memset(_q, 0, sizeof(gcmemory_t) * _slot); + running = false; +} + +void GC::daemon() { + using namespace std::chrono; + + while (alive) { + if (running) { + if (current_size - max_size > 0 || + forceclean_timer > forced_clean) + { + gc(); + forceclean_timer = 0; + } + std::this_thread::sleep_for(microseconds(interval)); + forceclean_timer += interval; + } + else { + std::this_thread::sleep_for(10ms); + forceclean_timer += 10000; + } + } +} + +void GC::start_deamon() { + q = new gcmemory_t[max_slots << 1]; + q_back = new memoryqueue_t[max_slots << 1]; + lock = false; + slot_pos = 0; + current_size = 0; + alive_cnt = 0; + alive = true; + handle = new std::thread(&GC::daemon, this); +} + +void GC::terminate_daemon() { + running = false; + alive = false; + decltype(auto) _handle = static_cast(handle); + delete[] static_cast(q); + delete[] static_cast(q_back); + using namespace std::chrono; + std::this_thread::sleep_for(microseconds(1000 + std::max(static_cast(10000), interval))); + + if (_handle->joinable()) { + _handle->join(); + } + delete _handle; +} + +void GC::reg(void* v, uint32_t sz, void(*f)(void*)) { //~ 40ns expected v. free ~ 75ns + if (v == nullptr || f == nullptr) + return; + if (sz < threshould){ + f(v); + return; + } + auto _q = static_cast(q); + while(lock); + ++alive_cnt; + current_size += sz; + auto _slot = (slot_pos += 1); + _q[_slot] = {v, f}; + --alive_cnt; + running = true; +} + +#endif + +GC* GC::gc_handle = nullptr; + +#include "dragonbox/dragonbox_to_chars.hpp" + + +template<> +char* +aq_to_chars(void* value, char* buffer) { + return jkj::dragonbox::to_chars_n(*static_cast(value), buffer); +} +template<> +char* +aq_to_chars(void* value, char* buffer) { + return jkj::dragonbox::to_chars_n(*static_cast(value), buffer); +} + +template<> +inline char* +aq_to_chars(void* value, char* buffer) { + if (*static_cast(value)){ + memcpy(buffer, "true", 4); + return buffer + 4; + } + else{ + memcpy(buffer, "false", 5); + return buffer + 5; + } +} + +template<> +char* +aq_to_chars(void* value, char* buffer) { + const auto src = *static_cast(value); + const auto len = strlen(src); + memcpy(buffer, src, len); + return buffer + len; +} + +template<> +char* +aq_to_chars(void* value, char* buffer) { + const auto& src = *static_cast(value); + buffer = to_text(buffer, src.year); + *buffer++ = '-'; + buffer = to_text(buffer, src.month); + *buffer++ = '-'; + buffer = to_text(buffer, src.day); + return buffer; +} + +template<> +char* +aq_to_chars(void* value, char* buffer) { + const auto& src = *static_cast(value); + buffer = to_text(buffer, src.hours); + *buffer++ = ':'; + buffer = to_text(buffer, src.minutes); + *buffer++ = ':'; + buffer = to_text(buffer, src.seconds); + *buffer++ = ':'; + buffer = to_text(buffer, src.ms); + return buffer; +} + +template<> +char* +aq_to_chars(void* value, char* buffer) { + auto& src = *static_cast(value); + buffer = aq_to_chars(static_cast(&src.date), buffer); + *buffer++ = ' '; + buffer = aq_to_chars(static_cast(&src.time), buffer); + return buffer; +} + + diff --git a/server/libaquery.h b/server/libaquery.h index 551d205..cc0b5a9 100644 --- a/server/libaquery.h +++ b/server/libaquery.h @@ -1,8 +1,37 @@ #ifndef _AQUERY_H #define _AQUERY_H -#include "table.h" +#ifdef __INTELLISENSE__ + #define __AQUERY_ITC_USE_SEMPH__ + #define THREADING + #define __AQ_THREADED_GC__ +#endif + #include +#include +class aq_timer { +private: + std::chrono::high_resolution_clock::time_point now; +public: + aq_timer(){ + now = std::chrono::high_resolution_clock::now(); + } + void reset(){ + now = std::chrono::high_resolution_clock::now(); + } + long long elapsed(){ + long long ret = (std::chrono::high_resolution_clock::now() - now).count(); + reset(); + return ret; + } + long long lap() const{ + long long ret = (std::chrono::high_resolution_clock::now() - now).count(); + return ret; + } +}; + +#include "table.h" + enum Log_level { LOG_INFO, @@ -15,9 +44,16 @@ enum Backend_Type { BACKEND_MonetDB, BACKEND_MariaDB }; + +struct QueryStats{ + long long monet_time; + long long postproc_time; +}; struct Config{ - int running, new_query, server_mode, - backend_type, has_dll, exec_time, n_buffers; + int running, new_query, server_mode, + backend_type, has_dll, + n_buffers; + QueryStats stats; int buffer_sizes[]; }; @@ -47,7 +83,10 @@ struct Context{ #ifdef THREADING void* thread_pool; #endif - printf_type print = printf; +#ifdef __AQ_THREADED_GC__ + void* gc; +#endif + printf_type print = &printf; Context(); virtual ~Context(); template @@ -67,6 +106,8 @@ struct Context{ std::unordered_map cols; }; + + #ifdef _WIN32 #define __DLLEXPORT__ __declspec(dllexport) __stdcall #else @@ -76,4 +117,40 @@ struct Context{ #define __AQEXPORT__(_Ty) extern "C" _Ty __DLLEXPORT__ typedef void (*deallocator_t) (void*); + +#include +#include "jeaiii_to_text.h" + +template +inline std::enable_if_t, char *> +aq_to_chars(void* value, char* buffer) { + return to_text(buffer, *static_cast(value)); +} + +template +inline std::enable_if_t, char *> +aq_to_chars(void* value, char* buffer) { + return buffer; +} + +#ifdef __SIZEOF_INT128__ +template<> +inline char* +aq_to_chars<__int128_t>(void* value, char* buffer) { + return jeaiii_i128<__int128_t>(buffer, *static_cast<__int128_t*>(value)); +} + +template<> +inline char* +aq_to_chars<__uint128_t>(void* value, char* buffer) { + return jeaiii_i128<__uint128_t>(buffer, *static_cast<__uint128_t*>(value)); +} +#endif + +template<> char* aq_to_chars(void* , char*); +template<> char* aq_to_chars(void* , char*); +template<> char* aq_to_chars(void* , char*); +template<> char* aq_to_chars(void* , char*); +template<> char* aq_to_chars(void* , char*); +template<> char* aq_to_chars(void* , char*); #endif diff --git a/server/monetdb_conn.cpp b/server/monetdb_conn.cpp index b29f1a8..b3fbd1e 100644 --- a/server/monetdb_conn.cpp +++ b/server/monetdb_conn.cpp @@ -2,12 +2,14 @@ #include "libaquery.h" #include +#include #include "monetdb_conn.h" #include "monetdbe.h" #include "table.h" + #undef static_assert -const char* monetdbe_type_str[] = { +constexpr const char* monetdbe_type_str[] = { "monetdbe_bool", "monetdbe_int8_t", "monetdbe_int16_t", "monetdbe_int32_t", "monetdbe_int64_t", #ifdef HAVE_HGE "monetdbe_int128_t", @@ -20,7 +22,7 @@ const char* monetdbe_type_str[] = { "monetdbe_type_unknown" } ; -const unsigned char monetdbe_type_szs[] = { +inline constexpr static unsigned char monetdbe_type_szs[] = { sizeof(monetdbe_column_bool::null_value), sizeof(monetdbe_column_int8_t::null_value), sizeof(monetdbe_column_int16_t::null_value), sizeof(monetdbe_column_int32_t::null_value), sizeof(monetdbe_column_int64_t::null_value), @@ -36,7 +38,19 @@ const unsigned char monetdbe_type_szs[] = { 1 }; +namespace types{ + constexpr const Type_t monetdbe_type_aqtypes[] = { + ABOOL, AINT8, AINT16, AINT32, AINT64, +#ifdef HAVE_HGE + AINT128, +#endif + AUINT64, AFLOAT, ADOUBLE, ASTR, + // blob? + AINT64, + ADATE, ATIME, ATIMESTAMP, ERROR + }; +} Server::Server(Context* cxt){ if (cxt){ @@ -80,7 +94,7 @@ void Server::connect(Context *cxt){ else{ if(server) free(server); - this->server = 0; + this->server = nullptr; status = false; puts(ret == -1 ? "Allocation Error." : "Internal Database Error."); } @@ -103,7 +117,7 @@ void Server::exec(const char* q){ bool Server::haserror(){ if (last_error){ - last_error = 0; + last_error = nullptr; return true; } else{ @@ -111,12 +125,53 @@ bool Server::haserror(){ } } + +void Server::print_results(const char* sep, const char* end){ + + if (!haserror()){ + auto _res = static_cast (res); + const auto& ncols = _res->ncols; + monetdbe_column** cols = static_cast(malloc(sizeof(monetdbe_column*) * ncols)); + std::string* printf_string = new std::string[ncols]; + const char** col_data = static_cast (malloc(sizeof(char*) * ncols)); + uint8_t* szs = static_cast(alloca(ncols)); + std::string header_string = ""; + const char* err_msg = nullptr; + for(uint32_t i = 0; i < ncols; ++i){ + err_msg = monetdbe_result_fetch(_res, &cols[i], i); + printf_string[i] = + std::string(types::printf_str[types::monetdbe_type_aqtypes[cols[i]->type]]) + + (i < ncols - 1 ? sep : ""); + puts(printf_string[i].c_str()); + puts(monetdbe_type_str[cols[i]->type]); + col_data[i] = static_cast(cols[i]->data); + szs [i] = monetdbe_type_szs[cols[i]->type]; + header_string = header_string + cols[i]->name + sep + '|' + sep; + } + const size_t l_sep = strlen(sep) + 1; + if (header_string.size() - l_sep >= 0) + header_string.resize(header_string.size() - l_sep); + header_string += end + std::string(header_string.size(), '=') + end; + fputs(header_string.c_str(), stdout); + for(uint64_t i = 0; i < cnt; ++i){ + for(uint32_t j = 0; j < ncols; ++j){ + printf(printf_string[j].c_str(), *((void**)col_data[j])); + col_data[j] += szs[j]; + } + fputs(end, stdout); + } + free(cols); + delete[] printf_string; + free(col_data); + } +} + void Server::close(){ if(this->server){ auto server = static_cast(this->server); monetdbe_close(*(server)); free(server); - this->server = 0; + this->server = nullptr; } } @@ -130,7 +185,7 @@ void* Server::getCol(int col_idx){ auto _ret_col = static_cast(this->ret_col); cnt = _ret_col->count; printf("Dbg: Getting col %s, type: %s\n", - _ret_col->name, monetdbe_type_str[_ret_col->type]); + _ret_col->name, monetdbe_type_str[_ret_col->type]); return _ret_col->data; } else{ @@ -140,7 +195,7 @@ void* Server::getCol(int col_idx){ else{ puts("Error: No result."); } - return 0; + return nullptr; } Server::~Server(){ @@ -149,10 +204,10 @@ Server::~Server(){ bool Server::havehge() { #if defined(_MONETDBE_LIB_) and defined(HAVE_HGE) - puts("true"); + // puts("true"); return HAVE_HGE; #else - puts("false"); + // puts("false"); return false; #endif } diff --git a/server/monetdb_conn.h b/server/monetdb_conn.h index 467cb2c..9894218 100644 --- a/server/monetdb_conn.h +++ b/server/monetdb_conn.h @@ -22,6 +22,9 @@ struct Server{ void close(); bool haserror(); static bool havehge(); + void test(const char*); + void print_results(const char* sep = " ", const char* end = "\n"); + friend void print_monetdb_results(Server* srv, const char* sep, const char* end, int limit); ~Server(); }; diff --git a/server/server.cpp b/server/server.cpp index 2105545..6514093 100644 --- a/server/server.cpp +++ b/server/server.cpp @@ -1,47 +1,137 @@ #include "pch_msc.hpp" -#include "../csv.h" #include #include #include +#include #include "libaquery.h" #include "monetdb_conn.h" #ifdef THREADING #include "threading.h" #endif + #ifdef _WIN32 #include "winhelper.h" #else #include #include #include +#include + +// fast numeric to string conversion +#include "jeaiii_to_text.h" +#include "dragonbox/dragonbox_to_chars.h" + struct SharedMemory { + std::atomic a; int hFileMap; void* pData; - SharedMemory(const char* fname) { + explicit SharedMemory(const char* fname) { hFileMap = open(fname, O_RDWR, 0); if (hFileMap != -1) - pData = mmap(NULL, 8, PROT_READ | PROT_WRITE, MAP_SHARED, hFileMap, 0); + pData = mmap(nullptr, 8, PROT_READ | PROT_WRITE, MAP_SHARED, hFileMap, 0); else - pData = 0; + pData = nullptr; } - void FreeMemoryMap() { + void FreeMemoryMap() const { + // automatically unmapped in posix + } +}; +#ifndef __USE_STD_SEMAPHORE__ +#ifdef __APPLE__ +#include +class A_Semaphore { +private: + dispatch_semaphore_t native_handle; +public: + A_Semaphore(bool v = false) { + native_handle = dispatch_semaphore_create(v); + } + void acquire() { + // puts("acquire"); + dispatch_semaphore_wait(native_handle, DISPATCH_TIME_FOREVER); + } + void release() { + // puts("release"); + dispatch_semaphore_signal(native_handle); + } + ~A_Semaphore() { + } +}; +#else +#include +class A_Semaphore { +private: + sem_t native_handle; +public: + A_Semaphore(bool v = false) { + sem_init(&native_handle, v, 1); + } + void acquire() { + sem_wait(&native_handle); + } + void release() { + sem_post(&native_handle); + } + ~A_Semaphore() { + sem_destroy(&native_handle); + } +}; +#endif +#endif +#endif + +#ifdef __USE_STD_SEMAPHORE__ +#define __AQUERY_ITC_USE_SEMPH__ +#include +class A_Semaphore { +private: + std::binary_semaphore native_handle; +public: + A_Semaphore(bool v = false) { + native_handle = std::binary_semaphore(v); + } + void acquire() { + native_handle.acquire(); } + void release() { + native_handle.release(); + } + ~A_Semaphore() { } }; #endif -#include "aggregations.h" +#ifdef __AQUERY_ITC_USE_SEMPH__ +A_Semaphore prompt{ true }, engine{ false }; +#define PROMPT_ACQUIRE() prompt.acquire() +#define PROMPT_RELEASE() prompt.release() +#define ENGINE_ACQUIRE() engine.acquire() +#define ENGINE_RELEASE() engine.release() +#else +#define PROMPT_ACQUIRE() +#define PROMPT_RELEASE() std::this_thread::sleep_for(std::chrono::nanoseconds(0)) +#define ENGINE_ACQUIRE() +#define ENGINE_RELEASE() +#endif + typedef int (*code_snippet)(void*); typedef void (*module_init_fn)(Context*); -int test_main(); int n_recv = 0; char** n_recvd = nullptr; +__AQEXPORT__(void) wait_engine(){ + PROMPT_ACQUIRE(); +} + +__AQEXPORT__(void) wake_engine(){ + ENGINE_RELEASE(); +} + extern "C" void __DLLEXPORT__ receive_args(int argc, char**argv){ n_recv = argc; n_recvd = argv; @@ -71,42 +161,99 @@ __AQEXPORT__(bool) have_hge(){ #endif } -Context::Context() { - current.memory_map = new std::unordered_map; - init_session(); -} +using prt_fn_t = char* (*)(void*, char*); -Context::~Context() { - auto memmap = (std::unordered_map*) this->current.memory_map; - delete memmap; -} -void Context::init_session(){ - if (log_level == LOG_INFO){ - memset(&(this->current.stats), 0, sizeof(Session::Statistic)); - } - auto memmap = (std::unordered_map*) this->current.memory_map; - memmap->clear(); -} +constexpr prt_fn_t monetdbe_prtfns[] = { + aq_to_chars, aq_to_chars, aq_to_chars, aq_to_chars, + aq_to_chars, +#if __SIZEOF_INT128__ + aq_to_chars<__int128_t>, +#endif + aq_to_chars, aq_to_chars, aq_to_chars, + aq_to_chars, aq_to_chars, + aq_to_chars, aq_to_chars, aq_to_chars, + + // should be last: + aq_to_chars +}; + +#include "monetdbe.h" +inline constexpr static unsigned char monetdbe_type_szs[] = { + sizeof(monetdbe_column_bool::null_value), sizeof(monetdbe_column_int8_t::null_value), + sizeof(monetdbe_column_int16_t::null_value), sizeof(monetdbe_column_int32_t::null_value), + sizeof(monetdbe_column_int64_t::null_value), +#ifdef __SIZEOF_INT128__ + sizeof(monetdbe_column_int128_t::null_value), +#endif + sizeof(monetdbe_column_size_t::null_value), sizeof(monetdbe_column_float::null_value), + sizeof(monetdbe_column_double::null_value), + sizeof(monetdbe_column_str::null_value), sizeof(monetdbe_column_blob::null_value), + sizeof(monetdbe_data_date), sizeof(monetdbe_data_time), sizeof(monetdbe_data_timestamp), + + // should be last: + 1 +}; +constexpr uint32_t output_buffer_size = 65536; +void print_monetdb_results(Server* srv, const char* sep = " ", const char* end = "\n", + uint32_t limit = std::numeric_limits::max()) { + if (!srv->haserror() && srv->cnt && limit){ + char buffer[output_buffer_size]; + auto _res = static_cast (srv->res); + const auto& ncols = _res->ncols; + monetdbe_column** cols = static_cast(malloc(sizeof(monetdbe_column*) * ncols)); + prt_fn_t *prtfns = (prt_fn_t*) alloca(sizeof(prt_fn_t) * ncols); + char** col_data = static_cast (alloca(sizeof(char*) * ncols)); + uint8_t* szs = static_cast(alloca(ncols)); + std::string header_string = ""; + const char* err_msg = nullptr; + const size_t l_sep = strlen(sep); + const size_t l_end = strlen(end); + char* _buffer = buffer; + + for(uint32_t i = 0; i < ncols; ++i){ + err_msg = monetdbe_result_fetch(_res, &cols[i], i); + if(err_msg) { goto cleanup; } + col_data[i] = static_cast(cols[i]->data); + prtfns[i] = monetdbe_prtfns[cols[i]->type]; + szs [i] = monetdbe_type_szs[cols[i]->type]; + header_string = header_string + cols[i]->name + sep + '|' + sep; + } -void Context::end_session(){ - auto memmap = (std::unordered_map*) this->current.memory_map; - for (auto& mem : *memmap) { - mem.second(mem.first); + if(l_sep > 512 || l_end > 512) { + puts("Error: separator or end string too long"); + goto cleanup; + } + if (header_string.size() - l_sep - 1>= 0) + header_string.resize(header_string.size() - l_sep - 1); + header_string += end + std::string(header_string.size(), '=') + end; + fputs(header_string.c_str(), stdout); + for(uint64_t i = 0; i < srv->cnt; ++i){ + for(uint32_t j = 0; j < ncols; ++j){ + //copy the field to buf + _buffer = prtfns[j](col_data[j], _buffer); + if (j != ncols - 1){ + memcpy(_buffer, sep, l_sep); + _buffer += l_sep; + } + col_data[j] += szs[j]; + } + memcpy(_buffer, end, l_end); + _buffer += l_end; + if(output_buffer_size - (_buffer - buffer) <= 1024){ + fwrite(buffer, 1, _buffer - buffer, stdout); + _buffer = buffer; + } + } + memcpy(_buffer, end, l_end); + _buffer += l_end; + if (_buffer != buffer) + fwrite(buffer, 1, _buffer - buffer, stdout); +cleanup: + free(cols); } - memmap->clear(); } -void* Context::get_module_function(const char* fname){ - auto fmap = static_cast*> - (this->module_function_maps); - // printf("%p\n", fmap->find("mydiv")->second); - // for (const auto& [key, value] : *fmap){ - // printf("%s %p\n", key.c_str(), value); - // } - auto ret = fmap->find(fname); - return ret == fmap->end() ? nullptr : ret->second; -} void initialize_module(const char* module_name, void* module_handle, Context* cxt){ auto _init_module = reinterpret_cast(dlsym(module_handle, "init_session")); @@ -119,15 +266,16 @@ void initialize_module(const char* module_name, void* module_handle, Context* cx } int dll_main(int argc, char** argv, Context* cxt){ + aq_timer timer; Config *cfg = reinterpret_cast(argv[0]); std::unordered_map user_module_map; - if (cxt->module_function_maps == 0) + if (cxt->module_function_maps == nullptr) cxt->module_function_maps = new std::unordered_map(); auto module_fn_map = static_cast*>(cxt->module_function_maps); auto buf_szs = cfg->buffer_sizes; - void** buffers = (void**)malloc(sizeof(void*) * cfg->n_buffers); + void** buffers = (void**) malloc (sizeof(void*) * cfg->n_buffers); for (int i = 0; i < cfg->n_buffers; i++) buffers[i] = static_cast(argv[i + 1]); @@ -135,19 +283,28 @@ int dll_main(int argc, char** argv, Context* cxt){ cxt->cfg = cfg; cxt->n_buffers = cfg->n_buffers; cxt->sz_bufs = buf_szs; - cxt->alt_server = NULL; - + if (cfg->backend_type == BACKEND_MonetDB && cxt->alt_server == nullptr) + { + auto alt_server = new Server(cxt); + alt_server->exec("SELECT '**** WELCOME TO AQUERY++! ****';"); + puts(*(const char**)(alt_server->getCol(0))); + cxt->alt_server = alt_server; + } while(cfg->running){ + ENGINE_ACQUIRE(); if (cfg->new_query) { - void *handle = 0; - void *user_module_handle = 0; + cfg->stats.postproc_time = 0; + cfg->stats.monet_time = 0; + + void *handle = nullptr; + void *user_module_handle = nullptr; if (cfg->backend_type == BACKEND_MonetDB){ - if (cxt->alt_server == 0) + if (cxt->alt_server == nullptr) cxt->alt_server = new Server(cxt); Server* server = reinterpret_cast(cxt->alt_server); if(n_recv > 0){ if (cfg->backend_type == BACKEND_AQuery || cfg->has_dll) { - handle = dlopen("./dll.so", RTLD_LAZY); + handle = dlopen("./dll.so", RTLD_NOW); } for (const auto& module : user_module_map){ initialize_module(module.first.c_str(), module.second, cxt); @@ -159,14 +316,18 @@ int dll_main(int argc, char** argv, Context* cxt){ switch(n_recvd[i][0]){ case 'Q': // SQL query for monetdbe { + timer.reset(); server->exec(n_recvd[i] + 1); - printf("Exec Q%d: %s", i, n_recvd[i]); + cfg->stats.monet_time += timer.elapsed(); + // printf("Exec Q%d: %s", i, n_recvd[i]); } break; case 'P': // Postprocessing procedure if(handle && !server->haserror()) { code_snippet c = reinterpret_cast(dlsym(handle, n_recvd[i]+1)); + timer.reset(); c(cxt); + cfg->stats.postproc_time += timer.elapsed(); } break; case 'M': // Load Module @@ -193,12 +354,21 @@ int dll_main(int argc, char** argv, Context* cxt){ //printf("F::: %p\n", module_fn_map->find("mydiv") != module_fn_map->end() ? module_fn_map->find("mydiv")->second : nullptr); } break; + case 'O': + { + if(!server->haserror()){ + timer.reset(); + print_monetdb_results(server); + cfg->stats.postproc_time += timer.elapsed(); + } + } + break; case 'U': // Unload Module { auto mname = n_recvd[i] + 1; auto it = user_module_map.find(mname); if (user_module_handle == it->second) - user_module_handle = 0; + user_module_handle = nullptr; dlclose(it->second); user_module_map.erase(it); } @@ -207,8 +377,9 @@ int dll_main(int argc, char** argv, Context* cxt){ } if(handle) { dlclose(handle); - handle = 0; + handle = nullptr; } + printf("%lld, %lld", cfg->stats.monet_time, cfg->stats.postproc_time); cxt->end_session(); n_recv = 0; } @@ -217,7 +388,7 @@ int dll_main(int argc, char** argv, Context* cxt){ } else{ server->last_error = nullptr; - continue; + //goto finalize; } } @@ -230,9 +401,11 @@ int dll_main(int argc, char** argv, Context* cxt){ if (handle) dlclose(handle); cfg->new_query = 0; } - std::this_thread::sleep_for(std::chrono::milliseconds(100)); + //puts(cfg->running? "true": "false"); +//finalize: + PROMPT_RELEASE(); } - + return 0; } @@ -263,20 +436,21 @@ extern "C" int __DLLEXPORT__ main(int argc, char** argv) { #ifdef __AQ_BUILD_LAUNCHER__ return launcher(argc, argv); #endif - puts("running"); + // puts("running"); Context* cxt = new Context(); - cxt->log("%d %s\n", argc, argv[1]); + // cxt->log("%d %s\n", argc, argv[1]); #ifdef THREADING auto tp = new ThreadPool(); cxt->thread_pool = tp; #endif +#ifdef __AQ_THREADED_GC__ + cxt->gc_thread = new std::thread(gc_thread, cxt); +#endif const char* shmname; if (argc < 0) return dll_main(argc, argv, cxt); - else if (argc <= 1) - return test_main(); else shmname = argv[1]; SharedMemory shm = SharedMemory(shmname); @@ -310,56 +484,3 @@ extern "C" int __DLLEXPORT__ main(int argc, char** argv) { return 0; } -#include "utils.h" -#include "table_ext_monetdb.hpp" -int test_main() -{ - Context* cxt = new Context(); - if (cxt->alt_server == 0) - cxt->alt_server = new Server(cxt); - Server* server = reinterpret_cast(cxt->alt_server); - - const char* qs[]= { - "QCREATE TABLE trade(stocksymbol INT, time INT, quantity INT, price INT);", - "QCOPY OFFSET 2 INTO trade FROM 'w:/gg/AQuery++/data/trade_numerical.csv' ON SERVER USING DELIMITERS ',';", - "QSELECT stocksymbol, (SUM((quantity * price)) / SUM(quantity)) AS weighted_average FROM trade GROUP BY stocksymbol ;", - "Pdll_5lYrMY", - "QSELECT stocksymbol, price FROM trade ORDER BY time ;", - "Pdll_4Sg6Ri", - "QSELECT stocksymbol, quantity, price FROM trade ORDER BY time ;", - "Pdll_5h4kL2", - "QSELECT stocksymbol, price FROM trade ORDER BY time ;", - "Pdll_7tEWCO", - "QSELECT query_c.weighted_moving_averages, query_c.stocksymbol FROM query_c;", - "Pdll_7FCPnF" - }; - n_recv = sizeof(qs)/(sizeof (char*)); - n_recvd = const_cast(qs); - void* handle = 0; - handle = dlopen("./dll.so", RTLD_LAZY); - cxt->init_session(); - for (int i = 0; i < n_recv; ++i) - { - //printf("%s, %d\n", n_recvd[i], n_recvd[i][0] == 'Q'); - switch (n_recvd[i][0]) { - case 'Q': // SQL query for monetdbe - { - server->exec(n_recvd[i] + 1); - printf("Exec Q%d: %s\n", i, n_recvd[i]); - } - break; - case 'P': // Postprocessing procedure - if (handle && !server->haserror()) { - code_snippet c = reinterpret_cast(dlsym(handle, n_recvd[i] + 1)); - c(cxt); - } - break; - } - } - n_recv = 0; - - //static_assert(std::is_same_v()), std::integer_sequence>, ""); - - return 0; -} - diff --git a/server/table.h b/server/table.h index 56c7a4b..af26ae7 100644 --- a/server/table.h +++ b/server/table.h @@ -9,6 +9,7 @@ #include #include #include +#include #include "io.h" #include "hasher.h" @@ -74,7 +75,16 @@ public: this->container = (_Ty*)container; this->name = name; } - template