Merge pull request #5 from sunyinqi0508/dev

Merge from dev branch
dev
sunyinqi0508 2 years ago committed by GitHub
commit c4e92abf61
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

2
.gitignore vendored

@ -51,12 +51,14 @@ k
**/Debug
**/Release
test*.c*
data/benchmark
*.csv
!test.csv
!test2.csv
!moving_avg.csv
!nyctx100.csv
!network.csv
!test_complex.csv
*.out
*.asm
!mmw.so

@ -1,25 +1,34 @@
OS_SUPPORT =
MonetDB_LIB =
MonetDB_INC =
Threading =
Defines =
CXXFLAGS = --std=c++1z
ifeq ($(AQ_DEBUG), 1)
OPTFLAGS = -g3
OPTFLAGS = -g3 -fsanitize=address -fsanitize=leak
LINKFLAGS =
else
OPTFLAGS = -O3 -DNDEBUG -fno-stack-protector
LINKFLAGS = -flto -s
endif
LINKFLAGS = -flto # + $(AQ_LINK_FLAG)
SHAREDFLAGS = -shared
FPIC = -fPIC
COMPILER = $(shell $(CXX) --version | grep -q clang && echo clang|| echo gcc)
_COMPILER = $(shell $(CXX) --version | grep -q clang && echo clang|| echo gcc)
COMPILER = $(strip $(_COMPILER))
LIBTOOL = ar rcs
USELIB_FLAG = -Wl,--whole-archive,libaquery.a -Wl,-no-whole-archive
LIBAQ_SRC = server/server.cpp server/monetdb_conn.cpp server/io.cpp
LIBAQ_OBJ = server.o monetdb_conn.o io.o
LIBAQ_SRC = server/monetdb_conn.cpp server/libaquery.cpp
LIBAQ_OBJ = monetdb_conn.o libaquery.o
SEMANTIC_INTERPOSITION = -fno-semantic-interposition
RANLIB = ranlib
_LINKER_BINARY = $(shell `$(CXX) -print-prog-name=ld` -v 2>&1 | grep -q LLVM && echo lld || echo ld)
LINKER_BINARY = $(strip $(_LINKER_BINARY))
ifeq ($(LINKER_BINARY), ld)
LINKER_FLAGS = -Wl,--allow-multiple-definition
else
LINKER_FLAGS =
endif
ifeq ($(COMPILER), clang )
ifeq ($(COMPILER), clang)
CLANG_GE_10 = $(shell expr `$(CXX) -dumpversion | cut -f1 -d.` \>= 10)
ifneq ($(CLANG_GE_10), 1)
SEMANTIC_INTERPOSITION =
@ -49,7 +58,7 @@ ifeq ($(OS),Windows_NT)
MonetDB_LIB += msc-plugin/monetdbe.dll
MonetDB_INC += -Imonetdb/msvc
LIBTOOL = gcc-ar rcs
ifeq ($(COMPILER), clang )
ifeq ($(COMPILER), clang)
FPIC =
endif
else
@ -61,7 +70,7 @@ else
USELIB_FLAG = -Wl,-force_load
MonetDB_LIB += -L$(shell brew --prefix monetdb)/lib
MonetDB_INC += -I$(shell brew --prefix monetdb)/include/monetdb
ifeq ($(COMPILER), clang )
ifeq ($(COMPILER), clang)
LIBTOOL = libtool -static -o
endif
ifneq ($(UNAME_M),arm64)
@ -79,43 +88,65 @@ endif
ifeq ($(THREADING),1)
LIBAQ_SRC += server/threading.cpp
LIBAQ_OBJ += threading.o
Threading += -DTHREADING
Defines += -DTHREADING
endif
ifeq ($(AQUERY_ITC_USE_SEMPH), 1)
Defines += -D__AQUERY_ITC_USE_SEMPH__
endif
SHAREDFLAGS += $(FPIC)
CXXFLAGS += $(OPTFLAGS) $(Defines) $(MonetDB_INC)
BINARYFLAGS = $(CXXFLAGS) $(LINKFLAGS) $(MonetDB_LIB)
SHAREDFLAGS += $(FPIC) $(BINARYFLAGS)
info:
$(info $(OPTFLAGS))
$(info $(OS_SUPPORT))
$(info $(OS))
$(info $(Threading))
$(info "test")
$(info $(LIBTOOL))
$(info $(MonetDB_INC))
$(info $(COMPILER))
$(info $(CXX))
$(info $(FPIC))
$(info This makefile script is used in AQuery to automatically build required libraries and executables.)
$(info Run it manually only for debugging purposes.)
$(info Targets (built by `make <target>`):)
$(info $" pch: generate precompiled header)
$(info $" libaquery.a: build static library)
$(info $" server.so: build execution engine)
$(info $" snippet: build generated query snippet)
$(info $" server_uselib: build execution engine using shared library and pch)
$(info $" snippet_uselib: build generated query snippet using shared library and pch)
$(info $" docker: build docker image with name aquery)
$(info $" launcher: build launcher for aquery ./aq)
$(info $" clean: remove all generated binaraies and caches)
$(info )
$(info Variables:)
$(info $" OPTFLAGS: $(OPTFLAGS))
$(info $" OS_SUPPORT: $(OS_SUPPORT))
$(info $" OS: $(OS))
$(info $" Defines: $(Defines))
$(info $" LIBTOOL: $(LIBTOOL))
$(info $" MonetDB_INC: $(MonetDB_INC))
$(info $" COMPILER: $(COMPILER))
$(info $" CXX: $(CXX))
$(info $" LINKER_BINARY: $(LINKER_BINARY))
$(info $" LINKER_FLAGS: $(LINKER_FLAGS))
pch:
$(CXX) -x c++-header server/pch.hpp $(FPIC) $(MonetDB_INC) $(OPTFLAGS) $(CXXFLAGS) $(Threading)
libaquery.a:
$(CXX) -c $(FPIC) $(PCHFLAGS) $(LIBAQ_SRC) $(MonetDB_INC) $(MonetDB_LIB) $(OS_SUPPORT) $(Threading) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) &&\
$(CXX) -x c++-header server/pch.hpp $(FPIC) $(CXXFLAGS)
libaquery:
$(CXX) -c $(FPIC) $(PCHFLAGS) $(LIBAQ_SRC) $(OS_SUPPORT) $(CXXFLAGS) &&\
$(LIBTOOL) libaquery.a $(LIBAQ_OBJ) &&\
$(RANLIB) libaquery.a
warmup:
$(CXX) msc-plugin/dummy.cpp libaquery.a $(SHAREDFLAGS) -o dll.so
server.bin:
$(CXX) $(LIBAQ_SRC) $(LINKFLAGS) $(OS_SUPPORT) $(Threading) $(MonetDB_INC) $(MonetDB_LIB) $(OPTFLAGS) $(CXXFLAGS) -o server.bin
$(CXX) $(LIBAQ_SRC) $(OS_SUPPORT) $(BINARYFLAGS) -o server.bin
launcher:
$(CXX) -D__AQ_BUILD_LAUNCHER__ $(LIBAQ_SRC) $(LINKFLAGS) $(OS_SUPPORT) $(Threading) $(MonetDB_INC) $(MonetDB_LIB) $(OPTFLAGS) $(CXXFLAGS) -o aq
$(CXX) -D__AQ_BUILD_LAUNCHER__ server/server.cpp $(LIBAQ_SRC) $(OS_SUPPORT) $(BINARYFLAGS) -o aq
server.so:
# $(CXX) -z muldefs server/server.cpp server/monetdb_conn.cpp -fPIC -shared $(OS_SUPPORT) monetdb/msvc/monetdbe.dll --std=c++1z -O3 -march=native -o server.so -I./monetdb/msvc
$(CXX) $(SHAREDFLAGS) $(PCHFLAGS) $(LIBAQ_SRC) $(OS_SUPPORT) $(Threading) $(MonetDB_INC) $(MonetDB_LIB) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) -o server.so
$(CXX) $(PCHFLAGS) $(LIBAQ_SRC) server/server.cpp $(OS_SUPPORT) $(SHAREDFLAGS) -o server.so
server_uselib:
$(CXX) $(SHAREDFLAGS) $(USELIB_FLAG),libaquery.a $(MonetDB_LIB) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) -o server.so
$(CXX) $(LINKER_FLAGS) server/server.cpp libaquery.a $(SHAREDFLAGS) -o server.so
snippet:
$(CXX) $(SHAREDFLAGS) $(PCHFLAGS) out.cpp $(LIBAQ_SRC) $(MonetDB_INC) $(MonetDB_LIB) $(Threading) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) -o dll.so
$(CXX) $(PCHFLAGS) out.cpp $(LIBAQ_SRC) $(SHAREDFLAGS) -o dll.so
snippet_uselib:
$(CXX) $(SHAREDFLAGS) $(PCHFLAGS) out.cpp libaquery.a $(MonetDB_INC) $(Threading) $(MonetDB_LIB) $(OPTFLAGS) $(LINKFLAGS) $(CXXFLAGS) -o dll.so
$(CXX) $(PCHFLAGS) out.cpp libaquery.a $(SHAREDFLAGS) -o dll.so
docker:
docker build -t aquery .

@ -226,9 +226,38 @@ DROP TABLE my_table IF EXISTS
- File name can also be absolute path.
- See `data/q1.sql` for more information
## Combine Queries
- `UNION ALL` is a bag union of two query results with same schema. e.g.
```
SELECT * FROM table 1 UNION ALL SELECT * FROM table 2
```
- `EXCEPT` clause will return the difference of two query results. e.g.
## Delete Data:
- Use a query like `DELETE FROM <table_name> [WHERE <conditions>]` to delete rows from a table that matches the conditions.
## Performance Measurement
- Execution time can be recorded using the `stats` command described above.
- `stats` command without any argument will show the execution time of all queries executed so far.
- `stats reset` will reset the timer for total execution time printed by `stats` command above.
- `stats on` will show execution time for every following query until a `stats off` command is received.
## MonetDB Passthrough for Hybrid Engine
AQuery++ supports MonetDB passthrough for hybrid engine. Simply put standard SQL queries inside a \<sql> \</sql> block. <br>
Each query inside an sql block must be separated by a semicolon. And they will be sent to MonetDB directly which means they should be written in MonetDB dialect instead of AQuery dialect. Please refer to the [MonetDB documentation](https://www.monetdb.org/documentation-Sep2022/user-guide/sql-summary/) for more information.
For example:
```
CREATE TABLE my_table (c1 INT, c2 INT, c3 STRING)
INSERT INTO my_table VALUES(10, 20, "example"), (20, 30, "example2")
<sql>
INSERT INTO my_table VALUES(10, 20, "example3");
CREATE INDEX idx1 ON my_table(c1);
</sql>
SELECT * FROM my_table WHERE c1 > 10
```
## Built-in functions:
- `avg[s]`: average of a column. `avgs(col), avgs(w, col)` is rolling and moving average with window `w` of the column `col`.
- `var[s]`, `stddev[s]`: [moving/rolling] **population** variance, standard deviation.
@ -250,7 +279,7 @@ DROP TABLE my_table IF EXISTS
- AQuery++ supports different execution engines thanks to the decoupled compiler structure.
- Hybrid Execution Engine: decouples the query into two parts. The sql-compliant part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed.
- AQuery Library: A set of header based libraries that provide column arithmetic and operations inspired by array programming languages like kdb. This library is used by C++ post-processor code which can significantly reduce the complexity of generated code, reducing compile time while maintaining the best performance. The set of libraries can also be used by UDFs as well as User modules which makes it easier for users to write simple but powerful extensions.
# Roadmap
- [x] SQL Parser -> AQuery Parser (Front End)
- [x] AQuery-C++ Compiler (Back End)

@ -2,7 +2,7 @@
## GLOBAL CONFIGURATION FLAGS
version_string = '0.4.9a'
version_string = '0.5.3a'
add_path_to_ldpath = True
rebuild_backend = False
run_backend = True
@ -11,6 +11,9 @@ cygroot = 'c:/msys64/usr/bin'
msbuildroot = ''
os_platform = 'unknown'
build_driver = 'Auto'
compilation_output = True
## END GLOBAL CONFIGURATION FLAGS
def init_config():
global __config_initialized__, os_platform, msbuildroot, build_driver
@ -21,7 +24,8 @@ def init_config():
import os
from engine.utils import add_dll_dir
# os.environ['CXX'] = 'C:/Program Files/LLVM/bin/clang.exe'
# os.environ['THREADING'] = '1'
os.environ['THREADING'] = '1'
os.environ['AQUERY_ITC_USE_SEMPH'] = '1'
if ('__config_initialized__' not in globals() or
not __config_initialized__):

@ -243,8 +243,8 @@ RESERVED = MatchFirst([
WITHIN,
INTO,
])
L_INLINE = Literal("<k>").suppress()
R_INLINE = Literal("</k>").suppress()
L_INLINE = Literal("<sql>").suppress()
R_INLINE = Literal("</sql>").suppress()
LBRACE = Literal("{").suppress()
RBRACE = Literal("}").suppress()
LSB = Literal("[").suppress()

@ -8,6 +8,7 @@
#
from sre_parse import WHITESPACE
from mo_parsing.helpers import restOfLine
from mo_parsing.infix import delimited_list
from mo_parsing.whitespaces import NO_WHITESPACE, Whitespace
@ -65,7 +66,7 @@ def parser(literal_string, ident, sqlserver=False):
var_name = ~RESERVED + ident
inline_kblock = (L_INLINE + SkipTo(R_INLINE, include=True))("c")
inline_sqlblock = (L_INLINE + SkipTo(R_INLINE, include=True))("sql")
# EXPRESSIONS
expr = Forward()
column_type, column_definition, column_def_references = get_column_type(
@ -568,8 +569,9 @@ def parser(literal_string, ident, sqlserver=False):
| assign("comment", EQ + literal_string)
| assign("default character set", EQ + var_name)
| assign("default charset", EQ + var_name)
)
+ Optional(AS.suppress() + infix_notation(query, [])("query"))
)
+ Optional(AS.suppress() + query("query"))
# investigate why infix_notation(query, []) eats up the rest of queries
)("create_table")
create_view = (
@ -655,7 +657,8 @@ def parser(literal_string, ident, sqlserver=False):
) / to_json_call
load_data = (
keyword("data") ("file_type")
Optional(keyword("complex")("complex"))
+ keyword("data") ("file_type")
+ keyword("infile")("loc")
+ literal_string ("file")
+ INTO
@ -667,6 +670,12 @@ def parser(literal_string, ident, sqlserver=False):
+ keyword("by").suppress()
+ literal_string ("term")
)
+ Optional(
keyword("element").suppress()
+ keyword("terminated").suppress()
+ keyword("by").suppress()
+ literal_string ("ele")
)
)
module_func_def = (
@ -716,7 +725,7 @@ def parser(literal_string, ident, sqlserver=False):
)("stmts"), ";")
other_stmt = (
inline_kblock
inline_sqlblock
| udf
) ("stmts")

@ -16,6 +16,7 @@ class checksums:
server : Optional[Union[bytes, bool]] = None
sources : Optional[Union[Dict[str, bytes], bool]] = None
env : str = ''
def calc(self, compiler_name, libaquery_a = 'libaquery.a' ,
pch_hpp_gch = 'server/pch.hpp.gch',
server = 'server.so'
@ -24,7 +25,8 @@ class checksums:
self.env = (aquery_config.os_platform +
machine() +
aquery_config.build_driver +
compiler_name
compiler_name +
aquery_config.version_string
)
for key in self.__dict__.keys():
try:
@ -71,14 +73,14 @@ class checksums:
class build_manager:
sourcefiles = [
'build.py', 'Makefile',
'server/server.cpp', 'server/io.cpp',
'server/server.cpp', 'server/libaquery.cpp',
'server/monetdb_conn.cpp', 'server/threading.cpp',
'server/winhelper.cpp'
]
headerfiles = ['server/aggregations.h', 'server/hasher.h', 'server/io.h',
'server/libaquery.h', 'server/monetdb_conn.h', 'server/pch.hpp',
'server/table.h', 'server/threading.h', 'server/types.h', 'server/utils.h',
'server/winhelper.h', 'server/gc.hpp', 'server/vector_type.hpp',
'server/winhelper.h', 'server/gc.h', 'server/vector_type.hpp',
'server/table_ext_monetdb.hpp'
]
@ -92,6 +94,9 @@ class build_manager:
return False
def build(self, stdout = sys.stdout, stderr = sys.stderr):
ret = True
if not aquery_config.compilation_output:
stdout = nullstream
stderr = nullstream
for c in self.build_cmd:
if c:
try: # only last success matters
@ -100,6 +105,8 @@ class build_manager:
ret = False
pass
return ret
def warmup(self):
return True
class MakefileDriver(DriverBase):
def __init__(self, mgr : 'build_manager') -> None:
@ -111,9 +118,9 @@ class build_manager:
mgr.cxx = os.environ['CXX']
if 'AQ_DEBUG' not in os.environ:
os.environ['AQ_DEBUG'] = '0' if mgr.OptimizationLv else '1'
def libaquery_a(self):
self.build_cmd = [['rm', 'libaquery.a'],['make', 'libaquery.a']]
self.build_cmd = [['rm', 'libaquery.a'],['make', 'libaquery']]
return self.build()
def pch(self):
self.build_cmd = [['rm', 'server/pch.hpp.gch'], ['make', 'pch']]
@ -166,6 +173,10 @@ class build_manager:
self.build_cmd = [[aquery_config.msbuildroot, loc, self.opt, self.platform]]
return self.build()
def warmup(self):
self.build_cmd = [['make', 'warmup']]
return self.build()
#class PythonDriver(DriverBase):
# def __init__(self, mgr : 'build_manager') -> None:
# super().__init__(mgr)
@ -221,6 +232,9 @@ class build_manager:
current.calc(self.cxx, libaquery_a)
with open('.cached', 'wb') as cache_sig:
cache_sig.write(pickle.dumps(current))
self.driver.warmup()
else:
if aquery_config.os_platform == 'mac':
os.system('./arch-check.sh')

61
csv.h

@ -1,4 +1,4 @@
// Copyright: (2012-2015) Ben Strasser <code@ben-strasser.net>
// Copyright: (2012-2015) Ben Strasser <code@ben-strasser.net>, 2022 Bill Sun
// License: BSD-3
//
// All rights reserved.
@ -49,6 +49,7 @@
#include <cerrno>
#include <istream>
#include <limits>
#include "server/vector_type.hpp"
namespace io{
////////////////////////////////////////////////////////////////////////////
@ -974,8 +975,7 @@ namespace io{
return;
}
x = 10*x+y;
}else
throw error::no_digit();
}
++col;
}
}
@ -1005,8 +1005,7 @@ namespace io{
return;
}
x = 10*x-y;
}else
throw error::no_digit();
}
++col;
}
return;
@ -1080,19 +1079,37 @@ namespace io{
}
x *= base;
}
}else{
if(*col != '\0')
throw error::no_digit();
}
if(is_neg)
x = -x;
}
template<class overflow_policy> void parse(char*col, float&x) { parse_float(col, x); }
template<class overflow_policy> void parse(char*col, double&x) { parse_float(col, x); }
template<class overflow_policy> void parse(char*col, long double&x) { parse_float(col, x); }
template<class overflow_policy, class T, char sep2 = ';'>
void parse_vector(char* col, vector_type<T>& x) {
while (*col != '\0') {
char* next_col = col;
while (*next_col != sep2 && *next_col != '\0')
++next_col;
while (*next_col == ' ' || *next_col == '\t' ||
*next_col == sep2 || *next_col == '\r' ||
*next_col == '\n')
++next_col;
char _next_end = *next_col;
*next_col = '\0';
T y;
::io::detail::parse<overflow_policy>(col, y);
x.emplace_back(y);
col = next_col;
*next_col = _next_end;
}
}
template<class overflow_policy, class T>
void parse(char*col, T&x){
// Mute unused variable compiler warning
@ -1108,6 +1125,7 @@ namespace io{
}
template<unsigned column_count,
char sep2 = ';',
class trim_policy = trim_chars<' ', '\t'>,
class quote_policy = no_quote_escape<','>,
class overflow_policy = throw_on_overflow,
@ -1234,7 +1252,23 @@ namespace io{
parse_helper(r+1, cols...);
}
template<class T, class ...ColType>
void parse_helper(std::size_t r, vector_type<T>&t, ColType&...cols){
if(row[r]){
try{
try{
::io::detail::parse_vector<overflow_policy, T, sep2>(row[r], t);
}catch(error::with_column_content&err){
err.set_column_content(row[r]);
throw;
}
}catch(error::with_column_name&err){
err.set_column_name(column_names[r].c_str());
throw;
}
}
parse_helper(r+1, cols...);
}
public:
template<class ...ColType>
bool read_row(ColType& ...cols){
@ -1269,5 +1303,12 @@ namespace io{
}
};
}
template <unsigned column_count, char sep1 = ',', char sep2 = ';'>
using AQCSVReader = io::CSVReader<column_count, sep2,
io::trim_chars<(char)32, (char)9>, io::no_quote_escape<sep1>,
io::ignore_overflow, io::empty_line_comment
>;
#endif

@ -1,11 +1,21 @@
a, b, c, d
1,1,2,2
2,1,2,2
2,4,3,4
1,2,2,2
1,2,3,4
4,2,1,4
2,1,3,4
2,1,3,3
2,1,1,2
1,2,3,4
3,2,4,2
1,2,3,3
3,2,1,2
2,1,2,2
2,1,4,2
3,3,4,4
2,2,3,1
2,3,4,4
2,4,1,2
3,4,1,2
2,3,2,2
1,2,3,1

1 a b c d
2 1 1 2 2
3 2 1 2 2
4 2 4 3 4
5 1 2 2 2
6 1 2 3 4
7 4 2 1 4
8 2 1 3 4 3
9 2 1 1 2
10 1 2 3 4
11 3 2 4 2
12 1 2 3 3
13 3 2 1 2
14 2 1 2 4 2
15 3 3 4 4
16 2 2 3 1
17 2 3 4 4
18 2 4 1 2
19 3 4 1 2
20 2 3 2 2
21 1 2 3 1

@ -0,0 +1,6 @@
a,b,c
5e-3, 3;4 ;5e-3;6.32,7
1,2,3
4,5;6;7;8;9, 0
3 ,2 ; 4; 5.7; -.3; 5., 6
-3.12312,-4E+7;67456746744567;75,4
1 a b c
2 5e-3 3;4 ;5e-3;6.32 7
3 1 2 3
4 4 5;6;7;8;9 0
5 3 2 ; 4; 5.7; -.3; 5. 6
6 -3.12312 -4E+7;67456746744567;75 4

@ -151,5 +151,5 @@ int gen_stock_data(int argc, char* argv[]){
}
int main(int argc, char* argv[]){
gen_stock_data(argc, argv);
return gen_stock_data(argc, argv);
}

@ -1,8 +1,9 @@
from copy import deepcopy
from engine.utils import base62uuid, defval
from aquery_config import have_hge
from typing import Dict, List
from aquery_config import have_hge
from engine.utils import base62uuid, defval
type_table: Dict[str, "Types"] = {}
class Types:
@ -65,10 +66,10 @@ class Types:
return self.sqlname
@staticmethod
def decode(aquery_type : str, vector_type:str = 'ColRef') -> "Types":
if (aquery_type.startswith('vec')):
def decode(aquery_type : str, vector_type:str = 'vector_type') -> "Types":
if (aquery_type.lower().startswith('vec')):
return VectorT(Types.decode(aquery_type[3:]), vector_type)
return type_table[aquery_type]
return type_table[aquery_type.lower()]
class TypeCollection:
def __init__(self, sz, deftype, fptype = None, utype = None, *, collection = None) -> None:
@ -121,7 +122,7 @@ class VectorT(Types):
return f'{self.vector_type}<{self.inner_type.name}>'
@property
def sqlname(self) -> str:
return 'BIGINT'
return 'HUGEINT' # Store vector_type into 16 bit integers
@property
def cname(self) -> str:
return f'{self.vector_type}<{self.inner_type.cname}>'
@ -142,7 +143,7 @@ fp_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', FloatT, DoubleT
temporal_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', DateT, TimeT, TimeStampT)
builtin_types : Dict[str, Types] = {
'string' : StrT,
**_ty_make_dict('t.sqlname.lower()', AnyT, TextT, VarcharT),
**_ty_make_dict('t.sqlname.lower()', AnyT, TextT, VarcharT, HgeT),
**int_types, **fp_types, **temporal_types}
def get_int128_support():
@ -294,7 +295,7 @@ opadd = OperatorBase('add', 2, auto_extension, cname = '+', sqlname = '+', call
# monetdb wont extend int division to fp type
# opdiv = OperatorBase('div', 2, fp(auto_extension), cname = '/', sqlname = '/', call = binary_op_behavior)
opdiv = OperatorBase('div', 2, auto_extension, cname = '/', sqlname = '/', call = binary_op_behavior)
opmul = OperatorBase('mul', 2, fp(auto_extension), cname = '*', sqlname = '*', call = binary_op_behavior)
opmul = OperatorBase('mul', 2, auto_extension, cname = '*', sqlname = '*', call = binary_op_behavior)
opsub = OperatorBase('sub', 2, auto_extension, cname = '-', sqlname = '-', call = binary_op_behavior)
opmod = OperatorBase('mod', 2, auto_extension_int, cname = '%', sqlname = '%', call = binary_op_behavior)
opneg = OperatorBase('neg', 1, as_is, cname = '-', sqlname = '-', call = unary_op_behavior)
@ -323,10 +324,14 @@ fnfirst = OperatorBase('first', 1, as_is, cname = 'frist', sqlname = 'FRIST', ca
#fnavg = OperatorBase('avg', 1, fp(ext(auto_extension)), cname = 'avg', sqlname = 'AVG', call = fn_behavior)
fnsum = OperatorBase('sum', 1, long_return, cname = 'sum', sqlname = 'SUM', call = fn_behavior)
fnavg = OperatorBase('avg', 1, lfp_return, cname = 'avg', sqlname = 'AVG', call = fn_behavior)
fnvar = OperatorBase('var', 1, lfp_return, cname = 'var', sqlname = 'VAR_POP', call = fn_behavior)
fnstd = OperatorBase('stddev', 1, lfp_return, cname = 'stddev', sqlname = 'STDDEV_POP', call = fn_behavior)
fnmaxs = OperatorBase('maxs', [1, 2], ty_clamp(as_is, -1), cname = 'maxs', sqlname = 'MAXS', call = windowed_fn_behavor)
fnmins = OperatorBase('mins', [1, 2], ty_clamp(as_is, -1), cname = 'mins', sqlname = 'MINS', call = windowed_fn_behavor)
fnsums = OperatorBase('sums', [1, 2], ext(ty_clamp(auto_extension, -1)), cname = 'sums', sqlname = 'SUMS', call = windowed_fn_behavor)
fnavgs = OperatorBase('avgs', [1, 2], fp(ext(ty_clamp(auto_extension, -1))), cname = 'avgs', sqlname = 'AVGS', call = windowed_fn_behavor)
fnvars = OperatorBase('vars', [1, 2], fp(ext(ty_clamp(auto_extension, -1))), cname = 'vars', sqlname = 'VARS', call = windowed_fn_behavor)
fnstds = OperatorBase('stddevs', [1, 2], fp(ext(ty_clamp(auto_extension, -1))), cname = 'stddevs', sqlname = 'STDDEVS', call = windowed_fn_behavor)
fncnt = OperatorBase('count', 1, int_return, cname = 'count', sqlname = 'COUNT', call = count_behavior)
fnpack = OperatorBase('pack', -1, pack_return, cname = 'pack', sqlname = 'PACK', call = pack_behavior)
# special
@ -360,8 +365,14 @@ builtin_cstdlib = _op_make_dict(fnsqrt, fnlog, fnsin, fncos, fntan, fnpow)
builtin_func = _op_make_dict(fnmax, fnmin, fnsum, fnavg, fnmaxs,
fnmins, fndeltas, fnratios, fnlast,
fnfirst, fnsums, fnavgs, fncnt,
fnpack, fntrunc, fnprev, fnnext)
fnpack, fntrunc, fnprev, fnnext,
fnvar, fnvars, fnstd, fnstds)
user_module_func = {}
builtin_operators : Dict[str, OperatorBase] = {**builtin_binary_arith, **builtin_binary_logical,
**builtin_unary_arith, **builtin_unary_logical, **builtin_unary_special, **builtin_func, **builtin_cstdlib,
**user_module_func}
type_table = {**builtin_types, **type_table}
# Additional Aliases for type names
type_table['boolean'] = BoolT

@ -1,6 +1,6 @@
from collections import OrderedDict
from collections.abc import MutableMapping, Mapping
import uuid
from collections import OrderedDict
from collections.abc import Mapping, MutableMapping
lower_alp = 'abcdefghijklmnopqrstuvwxyz'
upper_alp = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
@ -107,6 +107,8 @@ def defval(val, default):
# escape must be readonly
from typing import Mapping, Set
def remove_last(pattern : str, string : str, escape : Set[str] = set()) -> str:
idx = string.rfind(pattern)
if idx == -1:
@ -126,9 +128,11 @@ class _Counter:
return cnt
import re
ws = re.compile(r'\s+')
import os
def add_dll_dir(dll: str):
import sys
if sys.version_info.major >= 3 and sys.version_info.minor >7 and os.name == 'nt':
@ -144,3 +148,13 @@ def clamp(val, minval, maxval):
def escape_qoutes(string : str):
return re.sub(r'^\'', r'\'',re.sub(r'([^\\])\'', r'\1\'', string))
def get_innermost(sl):
if sl and type(sl) is dict:
if 'literal' in sl and type(sl['literal']) is str:
return f"'{get_innermost(sl['literal'])}'"
return get_innermost(next(iter(sl.values()), None))
elif sl and type(sl) is list:
return get_innermost(sl[0])
else:
return sl

@ -221,7 +221,7 @@
<ItemGroup>
<ClInclude Include="..\csv.h" />
<ClInclude Include="..\server\aggregations.h" />
<ClInclude Include="..\server\gc.hpp" />
<ClInclude Include="..\server\gc.h" />
<ClInclude Include="..\server\hasher.h" />
<ClInclude Include="..\server\io.h" />
<ClInclude Include="..\server\libaquery.h" />
@ -238,7 +238,7 @@
<ItemGroup>
<ClCompile Include="..\server\server.cpp" />
<ClCompile Include="..\server\winhelper.cpp" />
<ClCompile Include="..\server\io.cpp" />
<ClCompile Include="..\server\libaquery.cpp" />
<ClCompile Include="..\server\monetdb_conn.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />

@ -1,4 +1,5 @@
import aquery_config
help_message = '''\
======================================================
AQUERY COMMANDLINE HELP
@ -82,31 +83,31 @@ if __name__ == '__main__':
import os
from dataclasses import dataclass
import atexit
import ctypes
import enum
import time
import mmap
import os
# import dbconn
import re
import subprocess
import sys
import threading
import time
from dataclasses import dataclass
from typing import Callable, List, Optional
import numpy as np
from mo_parsing import ParseException
import aquery_parser as parser
import engine
import engine.projection
import engine.ddl
import engine.projection
import reconstruct as xengine
import subprocess
import mmap
import sys
from engine.utils import base62uuid
import atexit
import threading
import ctypes
import numpy as np
from engine.utils import ws
from engine.utils import add_dll_dir
from engine.utils import nullstream
from build import build_manager
from engine.utils import add_dll_dir, base62uuid, nullstream, ws
## CLASSES BEGIN
class RunType(enum.Enum):
@ -159,9 +160,11 @@ class QueryStats:
class Config:
__all_attrs__ = ['running', 'new_query', 'server_mode',
'backend_type', 'has_dll',
'postproc_time', 'sql_time',
'n_buffers'
'n_buffers',
]
__i64_attrs__ = [
'monetdb_time', 'postproc_time'
]
__init_attributes__ = False
@staticmethod
@ -170,26 +173,42 @@ class Config:
from functools import partial
for _i, attr in enumerate(Config.__all_attrs__):
if not hasattr(Config, attr):
setattr(Config, attr, property(partial(Config.getter, i = _i), partial(Config.setter, i = _i)))
setattr(Config, attr, property(
partial(Config.getter, i = _i), partial(Config.setter, i = _i)
))
for _i, attr in enumerate(Config.__i64_attrs__):
if not hasattr(Config, attr):
setattr(Config, attr, property(
partial(Config.i64_getter, i = _i), partial(Config.i64_setter, i = _i)
))
Config.__init_attributes__ = True
def __init__(self, mode, nq = 0, n_bufs = 0, bf_szs = []) -> None:
Config.__init_self__()
self.int_size = 4
self.n_attrib = len(Config.__all_attrs__)
self.buf = bytearray((self.n_attrib + n_bufs) * self.int_size)
self.np_buf = np.ndarray(shape=(self.n_attrib), buffer=self.buf, dtype=np.int32)
self.buf = bytearray((self.n_attrib + n_bufs) * 4 +
len(self.__i64_attrs__) * 8
)
self.np_buf = np.ndarray(shape = (self.n_attrib), buffer = self.buf, dtype = np.int32)
self.np_i64buf = np.ndarray(shape = len(self.__i64_attrs__), buffer = self.buf,
dtype = np.int64, offset = 4 * len(self.__all_attrs__))
self.new_query = nq
self.server_mode = mode.value
self.running = 1
self.backend_type = Backend_Type.BACKEND_AQuery.value
self.backend_type = Backend_Type.BACKEND_MonetDB.value
self.has_dll = 0
self.n_buffers = n_bufs
self.monetdb_time = 0
self.postproc_time = 0
def getter (self, *, i):
return self.np_buf[i]
def setter(self, v, *, i):
self.np_buf[i] = v
def i64_getter (self, *, i):
return self.np_i64buf[i]
def i64_setter(self, v, *, i):
self.np_i64buf[i] = v
def set_bufszs(self, buf_szs):
for i in range(min(len(buf_szs), self.n_buffers)):
@ -208,6 +227,8 @@ class PromptState():
test_parser = True
server_mode: RunType = RunType.Threaded
server_bin = 'server.bin' if server_mode == RunType.IPC else 'server.so'
wait_engine = lambda: None
wake_engine = lambda: None
set_ready = lambda: None
get_ready = lambda: None
server_status = lambda: False
@ -298,12 +319,14 @@ def init_threaded(state : PromptState):
if aquery_config.run_backend:
server_so = ctypes.CDLL('./'+state.server_bin)
state.send = server_so['receive_args']
state.wait_engine = server_so['wait_engine']
state.wake_engine = server_so['wake_engine']
aquery_config.have_hge = server_so['have_hge']()
if aquery_config.have_hge != 0:
from engine.types import get_int128_support
get_int128_support()
state.th = threading.Thread(target=server_so['main'], args=(-1, ctypes.POINTER(ctypes.c_char_p)(state.cfg.c)), daemon=True)
state.th.start()
state.th.start()
def init_prompt() -> PromptState:
aquery_config.init_config()
@ -336,6 +359,8 @@ def init_prompt() -> PromptState:
rm = lambda: None
def __set_ready():
state.cfg.new_query = 1
state.wake_engine()
state.set_ready = __set_ready
state.get_ready = lambda: aquery_config.run_backend and state.cfg.new_query
if aquery_config.run_backend:
@ -374,14 +399,23 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
payload = None
keep = True
cxt = engine.initialize()
parser.parse('SELECT "**** WELCOME TO AQUERY++! ****";')
# state.currstats = QueryStats()
# state.need_print = False
while running():
try:
if state.server_status():
state.init()
state.init(state)
# *** busy waiting ***
# while state.get_ready():
# time.sleep(.00001)
while state.get_ready():
time.sleep(.00001)
state.wait_engine()
if state.need_print:
print(f'MonetDB Time: {state.cfg.monetdb_time/10**9}, '
f'PostProc Time: {state.cfg.postproc_time/10**9}')
state.cfg.monetdb_time = state.cfg.postproc_time = 0
state.currstats.print(state.stats, need_print=state.need_print)
try:
og_q : str = next()
@ -407,7 +441,7 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
for t in cxt.tables:
lst_cols = []
for c in t.columns:
lst_cols.append(f'{c.name} : {c.type}')
lst_cols.append(f'{c.name} : {c.type.name}')
print(f'{t.table_name} ({", ".join(lst_cols)})')
continue
elif q.startswith('help'):
@ -498,17 +532,17 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
rm(state)
exit()
elif q.startswith('sh'):
from distutils.spawn import find_executable
from shutil import which
qs = re.split(r'[ \t]', q)
shells = ('zsh', 'bash', 'sh', 'fish', 'cmd', 'pwsh', 'powershell', 'csh', 'tcsh', 'ksh')
shell_path = ''
if len(qs) > 1 and qs[1] in shells:
shell_path = find_executable(qs[1])
shell_path = which(qs[1])
if shell_path:
os.system(shell_path)
else:
for sh in shells:
shell_path = find_executable(sh)
shell_path = which(sh)
if shell_path:
os.system(shell_path)
break
@ -575,7 +609,7 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
state.stats.print(clear = False)
continue
trimed = ws.sub(' ', og_q).split(' ')
if trimed[0].lower().startswith('f'):
if len(trimed) > 1 and trimed[0].lower().startswith('fi') or trimed[0].lower() == 'f':
fn = 'stock.a' if len(trimed) <= 1 or len(trimed[1]) == 0 \
else trimed[1]
try:
@ -605,7 +639,8 @@ def prompt(running = lambda:True, next = lambda:input('> '), state = None):
print("\nBye.")
raise
except ValueError as e:
import code, traceback
import code
import traceback
__stdin = os.dup(0)
raise_exception = True
sh = code.InteractiveConsole({**globals(), **locals()})

@ -1,4 +1,5 @@
from reconstruct.ast import Context, ast_node
saved_cxt = None
def initialize(cxt = None, keep = False):

@ -1,12 +1,14 @@
from binascii import Error
from copy import deepcopy
from dataclasses import dataclass
from enum import Enum, auto
from typing import Set, Tuple, Dict, Union, List, Optional
from typing import Dict, List, Optional, Set, Tuple, Union
from engine.types import *
from engine.utils import enlist, base62uuid, base62alp, get_legal_name
from reconstruct.storage import Context, TableInfo, ColRef
from engine.utils import (base62alp, base62uuid, enlist, get_innermost,
get_legal_name)
from reconstruct.storage import ColRef, Context, TableInfo
class ast_node:
header = []
types = dict()
@ -28,8 +30,8 @@ class ast_node:
def emit(self, code):
self.context.emit(code)
def add(self, code):
self.sql += code + ' '
def add(self, code, sp = ' '):
self.sql += code + sp
def addc(self, code):
self.ccode += code + '\n'
@ -51,26 +53,60 @@ class ast_node:
self.context.sql_end()
from reconstruct.expr import expr, fastscan
class SubqType(Enum):
WITH = auto()
FROM = auto()
PROJECTION = auto()
FILTER = auto()
GROUPBY = auto()
ORDERBY = auto()
NONE = auto()
class projection(ast_node):
name = 'projection'
first_order = 'select'
def __init__(self,
parent : Optional["ast_node"],
node,
context : Optional[Context] = None,
force_use_spgb : bool = False,
subq_type: SubqType = SubqType.NONE
):
self.force_use_spgb = force_use_spgb
self.subq_type = subq_type
super().__init__(parent, node, context)
def init(self, _):
# skip default init
pass
def produce(self, node):
self.add('SELECT')
self.has_postproc = False
self.has_postproc = 'into' in node
if 'select' in node:
p = node['select']
self.distinct = False
elif 'select_distinct' in node:
p = node['select_distinct']
self.distinct = True
if 'with' in node:
with_table = node['with']['name']
with_table_name = tuple(with_table.keys())[0]
with_table_cols = tuple(with_table.values())[0]
self.with_clause = projection(self, node['with']['value'], subq_type=SubqType.WITH)
self.with_clause.out_table.add_alias(with_table_name)
for new_name, col in zip(with_table_cols, self.with_clause.out_table.columns):
col.rename(new_name)
self.with_clause.out_table.contextname_cpp
# in monetdb, in cxt
else:
self.with_clause = None
self.limit = None
if 'limit' in node:
self.limit = node['limit']
self.projections = p if type(p) is list else [p]
if self.parent is None:
self.context.sql_begin()
@ -99,8 +135,9 @@ class projection(ast_node):
if type(self.datasource) is join:
self.datasource.process_join_conditions()
if 'groupby' in node:
self.context.special_gb = groupby.check_special(self, node['groupby'])
self.context.special_gb = self.force_use_spgb
if 'groupby' in node: # if groupby clause contains special stuff
self.context.special_gb |= groupby.check_special(self, node['groupby'])
def consume(self, node):
# deal with projections
@ -158,6 +195,11 @@ class projection(ast_node):
this_type = [c.type for c in _datasource]
compound = [c.compound for c in _datasource]
proj_expr = [expr(self, c.name) for c in _datasource]
for pe in proj_expr:
if pe.is_ColExpr:
pe.cols_mentioned = {pe.raw_col}
else:
pe.cols_mentioned = set()
else:
y = lambda x:x
count = lambda : 'count(*)'
@ -203,8 +245,14 @@ class projection(ast_node):
self.out_table.add_cols(cols, new = False)
self.proj_map = proj_map
if 'groupby' in node:
self.group_node = groupby(self, node['groupby'])
if self.group_node.terminate:
self.context.abandon_query()
projection(self.parent, node, self.context, True, subq_type=self.subq_type)
return
if self.group_node.use_sp_gb:
self.has_postproc = True
else:
@ -223,7 +271,11 @@ class projection(ast_node):
self.var_table[col.name] = offset
for n in (col.table.alias):
self.var_table[f'{n}.'+col.name] = offset
# monetdb doesn't support select into table
# if 'into' in node:
# self.into_stub = f'{{INTOSTUB{base62uuid(20)}}}'
# self.add(self.into_stub, '')
def finialize(astnode:ast_node):
if(astnode is not None):
self.add(astnode.sql)
@ -235,6 +287,9 @@ class projection(ast_node):
if self.col_ext or self.group_node and self.group_node.use_sp_gb:
self.has_postproc = True
if self.group_node and self.group_node.use_sp_gb :
self.group_node.dedicated_glist
...
o = self.assumptions
if 'orderby' in node:
o.extend(enlist(node['orderby']))
@ -258,7 +313,6 @@ class projection(ast_node):
# cpp module codegen
self.context.has_dll = True
# extract typed-columns from result-set
vid2cname = [0]*len(self.var_table)
self.pyname2cname = dict()
@ -338,28 +392,36 @@ class projection(ast_node):
)
else:
# for funcs evaluate f_i(x, ...)
self.context.emitc(f'{self.out_table.contextname_cpp}->get_col<{key}>() = {val[1]};')
self.context.emitc(f'{self.out_table.contextname_cpp}->get_col<{key}>().initfrom({val[1]}, "{cols[i].name}");')
# print out col_is
if 'into' not in node:
self.context.emitc(f'print(*{self.out_table.contextname_cpp});')
if 'into' not in node and self.subq_type == SubqType.NONE:
if self.limit is None:
self.context.emitc(f'print(*{self.out_table.contextname_cpp});')
else:
self.context.emitc(f'{self.out_table.contextname_cpp}->printall(" ","\\n", nullptr, nullptr, {self.limit});')
if self.outfile and self.has_postproc:
self.outfile.finalize()
if 'into' in node:
self.context.emitc(select_into(self, node['into']).ccode)
self.has_postproc = True
if not self.distinct:
self.finalize()
def finalize(self):
self.context.emitc(f'puts("done.");')
if self.parent is None:
self.context.sql_end()
if self.outfile and not self.has_postproc:
self.context.abandon_postproc()
else:
if self.has_postproc:
self.context.has_dll = True
self.context.postproc_end(self.postproc_fname)
else:
self.context.ccode = ''
if self.limit != 0 and not self.outfile:
self.context.direct_output()
class select_distinct(projection):
first_order = 'select_distinct'
@ -367,18 +429,18 @@ class select_distinct(projection):
super().consume(node)
if self.has_postproc:
self.context.emitc(
f'{self.out_table.table_name}->distinct();'
f'{self.out_table.contextname_cpp}->distinct();'
)
self.finalize()
class select_into(ast_node):
def init(self, node):
def init(self, _):
if isinstance(self.parent, projection):
if self.context.has_dll:
# has postproc put back to monetdb
self.produce = self.produce_cpp
else:
self.produce = self.produce_sql
# if self.parent.has_postproc:
# # has postproc put back to monetdb
self.produce = self.produce_cpp
# else:
# self.produce = self.produce_sql
else:
raise ValueError('parent must be projection')
@ -390,7 +452,8 @@ class select_into(ast_node):
self.ccode = f'{self.parent.out_table.contextname_cpp}->monetdb_append_table(cxt->alt_server, \"{node.lower()}\");'
def produce_sql(self, node):
self.sql = f' INTO {node}'
self.context.sql = self.context.sql.replace(
self.parent.into_stub, f'INTO {node}', 1)
class orderby(ast_node):
@ -409,7 +472,7 @@ class orderby(ast_node):
o_str += ' ' + 'DESC'
o_list.append(o_str)
self.add(', '.join(o_list))
class scan(ast_node):
class Position(Enum):
@ -586,6 +649,10 @@ class groupby(ast_node):
return True
return False
def init(self, _):
self.terminate = False
super().init(_)
def produce(self, node):
if not isinstance(self.parent, projection):
raise ValueError('groupby can only be used in projection')
@ -593,8 +660,10 @@ class groupby(ast_node):
node = enlist(node)
o_list = []
self.refs = set()
self.gb_cols = set()
# dedicated_glist -> cols populated for special group by
self.dedicated_glist : List[Tuple[expr, Set[ColRef]]] = []
self.use_sp_gb = False
self.use_sp_gb = self.parent.force_use_spgb
for g in node:
self.datasource.rec = set()
g_expr = expr(self, g['value'])
@ -610,7 +679,24 @@ class groupby(ast_node):
if 'sort' in g and f'{g["sort"]}'.lower() == 'desc':
g_str = g_str + ' ' + 'DESC'
o_list.append(g_str)
if g_expr.is_ColExpr:
self.gb_cols.add(g_expr.raw_col)
else:
self.gb_cols.add(g_expr.sql)
for projs in self.parent.proj_map.values():
if self.use_sp_gb:
break
if (projs[2].is_compound and
not ((projs[2].is_ColExpr and projs[2].raw_col in self.gb_cols) or
projs[2].sql in self.gb_cols)
) and (not self.parent.force_use_spgb):
self.use_sp_gb = True
break
if self.use_sp_gb and not self.parent.force_use_spgb:
self.terminate = True
return
if not self.use_sp_gb:
self.dedicated_gb = None
self.add(', '.join(o_list))
@ -916,38 +1002,64 @@ class insert(ast_node):
name = 'insert'
first_order = name
def init(self, node):
values = node['query']
complex_query_kw = ['from', 'where', 'groupby', 'having', 'orderby', 'limit']
if any([kw in values for kw in complex_query_kw]):
values['into'] = node['insert']
proj_cls = (select_distinct
if 'select_distinct' in values
else projection)
proj_cls(None, values, self.context)
self.produce = lambda*_:None
self.spawn = lambda*_:None
self.consume = lambda*_:None
if 'query' in node:
values = node['query']
complex_query_kw = ['from', 'where', 'groupby', 'having', 'orderby', 'limit']
if any([kw in values for kw in complex_query_kw]):
values['into'] = node['insert']
proj_cls = (select_distinct
if 'select_distinct' in values
else projection)
proj_cls(None, values, self.context)
self.produce = lambda*_:None
self.spawn = lambda*_:None
self.consume = lambda*_:None
else:
super().init(node)
def produce(self, node):
values = node['query']['select']
keys = []
if 'query' in node:
if 'select' in node['query']:
values = enlist(node['query']['select'])
if 'columns' in node:
keys = node['columns']
values = [v['value'] for v in values]
elif 'union_all' in node['query']:
values = [[v['select']['value']] for v in node['query']['union_all']]
if 'columns' in node:
keys = node['columns']
else:
values = enlist(node['values'])
_vals = []
for v in values:
if isinstance(v, dict):
keys = v.keys()
v = list(v.values())
v = [f"'{vv}'" if type(vv) is str else vv for vv in v]
_vals.append(v)
values = _vals
keys = f'({", ".join(keys)})' if keys else ''
tbl = node['insert']
self.sql = f'INSERT INTO {tbl} VALUES('
self.sql = f'INSERT INTO {tbl}{keys} VALUES'
# if len(values) != table.n_cols:
# raise ValueError("Column Mismatch")
values = [values] if isinstance(values, list) and not isinstance(values[0], list) else values
list_values = []
for i, s in enumerate(enlist(values)):
if 'value' in s:
list_values.append(f"{s['value']}")
else:
# subquery, dispatch to select astnode
pass
self.sql += ', '.join(list_values) + ')'
for l in values:
inner_list_values = []
for s in enlist(l):
if type(s) is dict and 'value' in s:
s = s['value']
inner_list_values.append(f"{get_innermost(s)}")
list_values.append(f"({', '.join(inner_list_values)})")
self.sql += ', '.join(list_values)
class delete_table(ast_node):
class delete_from(ast_node):
name = 'delete'
first_order = name
def init(self, node):
@ -959,6 +1071,31 @@ class delete_table(ast_node):
if 'where' in node:
self.sql += filter(self, node['where']).sql
class union_all(ast_node):
name = 'union_all'
first_order = name
sql_name = 'UNION ALL'
def produce(self, node):
queries = node[self.name]
generated_queries : List[Optional[projection]] = [None] * len(queries)
is_standard = True
for i, q in enumerate(queries):
if 'select' in q:
generated_queries[i] = projection(self, q)
is_standard &= not generated_queries[i].has_postproc
if is_standard:
self.sql = f' {self.sql_name} '.join([q.sql for q in generated_queries])
else:
raise NotImplementedError(f"{self.sql_name} only support standard sql for now")
def consume(self, node):
super().consume(node)
self.context.direct_output()
class except_clause(union_all):
name = 'except'
first_order = name
sql_name = 'EXCEPT'
class load(ast_node):
name="load"
first_order = name
@ -967,6 +1104,9 @@ class load(ast_node):
if node['load']['file_type'] == 'module':
self.produce = self.produce_module
self.module = True
elif 'complex' in node['load']:
self.produce = self.produce_cpp
self.consume = lambda *_: None
elif self.context.dialect == 'MonetDB':
self.produce = self.produce_monetdb
else:
@ -998,7 +1138,7 @@ class load(ast_node):
self.context.queries.append(f'F{fname}')
ret_type = VoidT
if 'ret_type' in f:
ret_type = Types.decode(f['ret_type'])
ret_type = Types.decode(f['ret_type'], vector_type='vector_type')
nargs = 0
arglist = ''
if 'vars' in f:
@ -1008,7 +1148,7 @@ class load(ast_node):
nargs = len(arglist)
arglist = ', '.join(arglist)
# create c++ stub
cpp_stub = f'{ret_type.cname} (*{fname})({arglist}) = nullptr;'
cpp_stub = f'{"vectortype_cstorage" if isinstance(ret_type, VectorT) else ret_type.cname} (*{fname})({arglist}) = nullptr;'
self.context.module_stubs += cpp_stub + '\n'
self.context.module_map[fname] = cpp_stub
#registration for parser
@ -1035,7 +1175,56 @@ class load(ast_node):
self.sql = f'{s1} \'{p}\' {s2} '
if 'term' in node:
self.sql += f' {s3} \'{node["term"]["literal"]}\''
def produce_cpp(self, node):
self.context.has_dll = True
self.context.headers.add('"csv.h"')
node = node['load']
self.postproc_fname = 'ld_' + base62uuid(5)
self.context.postproc_begin(self.postproc_fname)
table:TableInfo = self.context.tables_byname[node['table']]
self.sql = F"SELECT {', '.join([c.name for c in table.columns])} FROM {table.table_name};"
self.emit(self.sql+';\n')
self.context.sql_end()
length_name = 'len_' + base62uuid(6)
self.context.emitc(f'auto {length_name} = server->cnt;')
out_typenames = [t.type.cname for t in table.columns]
outtable_col_nameslist = ', '.join([f'"{c.name}"' for c in table.columns])
self.outtable_col_names = 'names_' + base62uuid(4)
self.context.emitc(f'const char* {self.outtable_col_names}[] = {{{outtable_col_nameslist}}};')
self.out_table = 'tbl_' + base62uuid(4)
self.context.emitc(f'auto {self.out_table} = new TableInfo<{",".join(out_typenames)}>("{table.table_name}", {self.outtable_col_names});')
for i, c in enumerate(table.columns):
c.cxt_name = 'c_' + base62uuid(6)
self.context.emitc(f'decltype(auto) {c.cxt_name} = {self.out_table}->get_col<{i}>();')
self.context.emitc(f'{c.cxt_name}.initfrom({length_name}, server->getCol({i}), "{table.columns[i].name}");')
csv_reader_name = 'csv_reader_' + base62uuid(6)
col_types = [c.type.cname for c in table.columns]
col_tmp_names = ['tmp_'+base62uuid(8) for _ in range(len(table.columns))]
#col_names = ','.join([f'"{c.name}"' for c in table.columns])
term_field = ',' if 'term' not in node else node['term']['literal']
term_ele = ';' if 'ele' not in node else node['ele']['literal']
self.context.emitc(f'AQCSVReader<{len(col_types)}, \'{term_field.strip()[0]}\', \'{term_ele.strip()[0]}\'> {csv_reader_name}("{node["file"]["literal"]}");')
# self.context.emitc(f'{csv_reader_name}.read_header(io::ignore_extra_column, {col_names});')
self.context.emitc(f'{csv_reader_name}.next_line();')
for t, n in zip(col_types, col_tmp_names):
self.context.emitc(f'{t} {n};')
self.context.emitc(f'while({csv_reader_name}.read_row({",".join(col_tmp_names)})) {{ \n')
for i, c in enumerate(table.columns):
# self.context.emitc(f'print({col_tmp_names[i]});')
self.context.emitc(f'{c.cxt_name}.emplace_back({col_tmp_names[i]});')
self.context.emitc('}')
# self.context.emitc(f'print(*{self.out_table});')
self.context.emitc(f'{self.out_table}->monetdb_append_table(cxt->alt_server, "{table.table_name}");')
self.context.postproc_end(self.postproc_fname)
class outfile(ast_node):
name="_outfile"
def __init__(self, parent, node, context = None, *, sql = None):
@ -1062,6 +1251,13 @@ class outfile(ast_node):
filename = node['loc']['literal'] if 'loc' in node else node['literal']
import os
p = os.path.abspath('.').replace('\\', '/') + '/' + filename
print('Warning: file {p} exists and will be overwritten')
if os.path.exists(p):
try:
os.remove(p)
except OSError:
print(f'Error: file {p} exists and cannot be removed')
self.sql = f'COPY {self.parent.sql} INTO \'{p}\''
d = ','
e = '\\n'
@ -1137,7 +1333,7 @@ class udf(ast_node):
def produce(self, node):
from engine.utils import get_legal_name, check_legal_name
from engine.utils import check_legal_name, get_legal_name
node = node[self.name]
# register udf
self.agg = 'Agg' in node
@ -1232,7 +1428,7 @@ class udf(ast_node):
def consume(self, node):
from engine.utils import get_legal_name, check_legal_name
from engine.utils import check_legal_name, get_legal_name
node = node[self.name]
if 'params' in node:
@ -1339,7 +1535,25 @@ class udf(ast_node):
return udf.ReturnPattern.elemental_return
else:
return udf.ReturnPattern.bulk_return
class passthru_sql(ast_node):
name = 'sql'
first_order = name
import re
# escapestr = r'''(?:((?:[^;"']|"[^"]*"|'[^']*')+)|(?:--[^\r\n]*[\r|\n])+)'''
# escape_comment = fr'''(?:{escapestr}|{escapestr}*-{escapestr}*)'''
seprator = re.compile(r'''((?:[^;"']|"[^"]*"|'[^']*')+)''')
def __init__(self, _, node, context:Context):
sqls = passthru_sql.seprator.split(node['sql'])
for sql in sqls:
sq = sql.strip(' \t\n\r;')
if sq:
context.queries.append('Q' + sql.strip('\r\n\t ;') + ';')
lq = sq.lower()
if lq.startswith('select'):
context.queries.append('O')
class user_module_function(OperatorBase):
def __init__(self, name, nargs, ret_type, context : Context):
super().__init__(name, nargs, lambda *_: ret_type, call=fn_behavior)
@ -1355,4 +1569,5 @@ def include(objs):
import sys
include(sys.modules[__name__])

@ -1,7 +1,8 @@
from typing import Optional, Set
from engine.types import *
from reconstruct.ast import ast_node
from reconstruct.storage import ColRef, Context
from engine.types import *
# TODO: Decouple expr and upgrade architecture
# C_CODE : get ccode/sql code?
@ -31,6 +32,7 @@ class expr(ast_node):
def __init__(self, parent, node, *, c_code = None, supress_undefined = False):
from reconstruct.ast import projection, udf
# gen2 expr have multi-passes
# first pass parse json into expr tree
# generate target code in later passes upon need
@ -78,7 +80,7 @@ class expr(ast_node):
ast_node.__init__(self, parent, node, None)
def init(self, _):
from reconstruct.ast import projection, _tmp_join_union
from reconstruct.ast import _tmp_join_union, projection
parent = self.parent
self.is_compound = parent.is_compound if type(parent) is expr else False
if type(parent) in [projection, expr, _tmp_join_union]:
@ -88,11 +90,13 @@ class expr(ast_node):
self.udf_map = parent.context.udf_map
self.func_maps = {**builtin_func, **self.udf_map, **user_module_func}
self.operators = {**builtin_operators, **self.udf_map, **user_module_func}
self.ext_aggfuncs = ['sum', 'avg', 'count', 'min', 'max', 'last', 'first', 'prev', 'next']
self.ext_aggfuncs = ['sum', 'avg', 'count', 'min', 'max',
'last', 'first', 'prev', 'next', 'var',
'stddev']
def produce(self, node):
from engine.utils import enlist
from reconstruct.ast import udf
from reconstruct.ast import udf, projection
if type(node) is dict:
if 'literal' in node:
@ -166,8 +170,17 @@ class expr(ast_node):
special_func = [*self.context.udf_map.keys(), *self.context.module_map.keys(),
"maxs", "mins", "avgs", "sums", "deltas", "last", "first",
"ratios", "pack", "truncate"]
if self.context.special_gb:
"stddevs", "vars", "ratios", "pack", "truncate"]
if (
self.context.special_gb
or
(
type(self.root.parent) is projection
and
self.root.parent.force_use_spgb
)
):
special_func = [*special_func, *self.ext_aggfuncs]
if key in special_func and not self.is_special:
@ -333,7 +346,8 @@ class expr(ast_node):
self.type = ByteT
elif type(node) is float:
self.type = DoubleT
self.sql = f'{{"CAST({node} AS DOUBLE)" if not c_code else "{node}f"}}'
def finalize(self, override = False):
from reconstruct.ast import udf
if self.codebuf is None or override:

@ -1,12 +1,14 @@
from typing import Dict, List, Set
from engine.types import *
from engine.utils import CaseInsensitiveDict, base62uuid, enlist
from typing import List, Dict, Set
class ColRef:
def __init__(self, _ty, cobj, table:'TableInfo', name, id, compound = False, _ty_args = None):
self.type : Types = AnyT
if type(_ty) is str:
self.type = builtin_types[_ty.lower()]
self.type = Types.decode(_ty)
if _ty_args:
self.type = self.type(enlist(_ty_args))
elif type(_ty) is Types:
@ -17,6 +19,7 @@ class ColRef:
self.alias = set()
self.id = id # position in table
self.compound = compound # compound field (list as a field)
self.cxt_name = ''
# e.g. order by, group by, filter by expressions
self.__arr__ = (_ty, cobj, table, name, id)
@ -42,6 +45,14 @@ class ColRef:
alias = table_name
return f'{alias}.{self.get_name()}'
def rename(self, name):
self.alias.discard(self.name)
self.table.columns_byname.pop(self.name, None)
self.name = name
self.table.columns_byname[name] = self
return self
def __getitem__(self, key):
if type(key) is str:
return getattr(self, key)
@ -94,6 +105,17 @@ class TableInfo:
return
self.cxt.tables_byname[alias] = self
self.alias.add(alias)
def rename(self, name):
if name in self.cxt.tables_byname.keys():
print(f"Error: table name {name} already exists")
return
self.cxt.tables_byname.pop(self.table_name, None)
self.alias.discard(self.table_name)
self.table_name = name
self.cxt.tables_byname[name] = self
self.alias.add(name)
def parse_col_names(self, colExpr) -> ColRef:
parsedColExpr = colExpr.split('.')
@ -134,6 +156,7 @@ class Context:
self.queries = []
self.module_init_loc = 0
self.special_gb = False
self.has_dll = False
def __init__(self):
self.tables_byname = dict()
@ -147,7 +170,6 @@ class Context:
self.udf_agg_map = dict()
self.use_columnstore = False
self.print = print
self.has_dll = False
self.dialect = 'MonetDB'
self.is_msvc = False
self.have_hge = False
@ -223,6 +245,14 @@ class Context:
self.queries.append('P' + proc_name)
self.finalize_query()
def abandon_query(self):
self.sql = ''
self.ccode = ''
self.finalize_query()
def direct_output(self):
self.queries.append('O')
def abandon_postproc(self):
self.ccode = ''
self.finalize_query()

@ -5,14 +5,13 @@
struct minEval{
double value;
double values;
int* values;
double eval;
long left; // how many on its left
double* record;
long max;
long** count;
long* sorted; // sorted d
};
minEval giniSparse(double** data, long* result, long* d, long size, long col, long classes, long* totalT){

@ -1,5 +1,11 @@
OPT_FLASG =
ifneq ($(DEBUG), 1)
OPT_FLAGS = -Ofast -march=native -flto -DNDEBUG
else
OPT_FLAGS = -g3 -D_DEBUG -fsanitize=leak -fsanitize=address
endif
example:
$(CXX) -shared -fPIC example.cpp aquery_mem.cpp -fno-semantic-interposition -Ofast -march=native -flto --std=c++1z -o ../test.so
irf:
$(CXX) -shared -fPIC RF.cpp irf.cpp incrementalDecisionTree.cpp aquery_mem.cpp Evaluation.cpp -fno-semantic-interposition -Ofast -march=native -flto --std=c++1z -o ../libirf.so
$(CXX) -shared -fPIC RF.cpp irf.cpp incrementalDecisionTree.cpp aquery_mem.cpp Evaluation.cpp -fno-semantic-interposition $(OPT_FLAGS) --std=c++1z -o ../libirf.so
all: example

@ -4,9 +4,6 @@
#include "../server/table.h"
DecisionTree* dt = nullptr;
long pt = 0;
double** data = nullptr;
long* result = nullptr;
__AQEXPORT__(bool) newtree(int height, long f, ColRef<int> sparse, double forget, long maxf, long noclasses, Evaluation e, long r, long rb){
if(sparse.size!=f)return 0;
@ -19,14 +16,13 @@ __AQEXPORT__(bool) newtree(int height, long f, ColRef<int> sparse, double forget
return 1;
}
__AQEXPORT__(bool) additem(ColRef<double>X, long y, long size){
long j = 0;
if(size>0){
free(data);
free(result);
pt = 0;
data=(double**)malloc(size*sizeof(double*));
result=(long*)malloc(size*sizeof(long));
__AQEXPORT__(bool) fit(ColRef<ColRef<double>> X, ColRef<int> y){
if(X.size != y.size)return 0;
double** data = (double**)malloc(X.size*sizeof(double*));
long* result = (long*)malloc(y.size*sizeof(long));
for(long i=0; i<X.size; i++){
data[i] = X.container[i].container;
result[i] = y.container[i];
}
data[pt] = (double*)malloc(X.size*sizeof(double));
for(j=0; j<X.size; j++){
@ -36,19 +32,32 @@ __AQEXPORT__(bool) additem(ColRef<double>X, long y, long size){
pt ++;
return 1;
}
__AQEXPORT__(bool) fit(){
if(pt<=0)return 0;
dt->fit(data, result, pt);
return 1;
__AQEXPORT__(bool) fit(vector_type<vector_type<double>> v, vector_type<long> res){
double** data = (double**)malloc(v.size*sizeof(double*));
for(int i = 0; i < v.size; ++i)
data[i] = v.container[i].container;
dt->fit(data, res.container, v.size);
return true;
}
__AQEXPORT__(ColRef_storage) predict(){
int* result = (int*)malloc(pt*sizeof(int));
for(long i=0; i<pt; i++){
result[i]=dt->Test(data[i], dt->DTree);
}
__AQEXPORT__(vectortype_cstorage) predict(vector_type<vector_type<double>> v){
int* result = (int*)malloc(v.size*sizeof(int));
return ColRef_storage(new ColRef_storage(result, pt, 0, "prediction", 0), 1, 0, "prediction", 0);
for(long i=0; i<v.size; i++){
result[i]=dt->Test(v.container[i].container, dt->DTree);
//printf("%d ", result[i]);
}
auto container = (vector_type<int>*)malloc(sizeof(vector_type<int>));
container->size = v.size;
container->capacity = 0;
container->container = result;
// container->out(10);
// ColRef<vector_type<int>>* col = (ColRef<vector_type<int>>*)malloc(sizeof(ColRef<vector_type<int>>));
auto ret = vectortype_cstorage{.container = container, .size = 1, .capacity = 0};
// col->initfrom(ret, "sibal");
// print(*col);
return ret;
//return true;
}

@ -1,6 +1,6 @@
debug:
g++ -g3 -O0 server/server.cpp server/io.cpp -o a.out -Wall -Wextra -Wpedantic -lpthread
g++ -g3 -O0 server/server.cpp server/libaquery.cpp -o a.out -Wall -Wextra -Wpedantic -lpthread
test:
g++ --std=c++1z -g3 -O0 server.cpp io.cpp -o a.out -Wall -Wextra -Wpedantic -lpthread
g++ --std=c++1z -g3 -O0 server.cpp libaquery.cpp -o a.out -Wall -Wextra -Wpedantic -lpthread

@ -202,6 +202,102 @@ decayed_t<VT, types::GetFPType<types::GetLongType<T>>> avgw(uint32_t w, const VT
return ret;
}
template<class T, template<typename ...> class VT, bool sd = false>
decayed_t<VT, types::GetFPType<types::GetLongType<T>>> varw(uint32_t w, const VT<T>& arr) {
using FPType = types::GetFPType<types::GetLongType<T>>;
const uint32_t& len = arr.size;
decayed_t<VT, FPType> ret(len);
uint32_t i = 0;
types::GetLongType<T> s{};
w = w > len ? len : w;
FPType EnX {}, MnX{};
if (len) {
s = arr[0];
MnX = 0;
EnX = arr[0];
ret[i++] = 0;
}
for (; i < len; ++i){
s += arr[i];
FPType _EnX = s / (FPType)(i + 1);
MnX += (arr[i] - EnX) * (arr[i] - _EnX);
EnX = _EnX;
ret[i] = MnX / (FPType)(i + 1);
if constexpr(sd) ret[i-1] = sqrt(ret[i-1]);
}
const float rw = 1.f / (float)w;
s *= rw;
for (; i < len; ++i){
const auto dw = arr[i] - arr[i - w - 1];
const auto sw = arr[i] + arr[i - w - 1];
const auto dex = dw * rw;
ret[i] = ret[i-1] - dex*(s + s + dex - sw);
if constexpr(sd) ret[i-1] = sqrt(ret[i-1]);
s += dex;
}
if constexpr(sd)
if(i)
ret[i-1] = sqrt(ret[i-1]);
return ret;
}
template<class T, template<typename ...> class VT>
types::GetFPType<types::GetLongType<decays<T>>> var(const VT<T>& arr) {
typedef types::GetFPType<types::GetLongType<decays<T>>> FPType;
const uint32_t& len = arr.size;
uint32_t i = 0;
types::GetLongType<T> s{0};
types::GetLongType<T> ssq{0};
if (len) {
s = arr[0];
ssq = arr[0] * arr[0];
}
for (; i < len; ++i){
s += arr[i];
ssq += arr[i] * arr[i];
}
return (ssq - s * s / (FPType)(len + 1)) / (FPType)(len + 1);
}
template<class T, template<typename ...> class VT, bool sd = false>
decayed_t<VT, types::GetFPType<types::GetLongType<T>>> vars(const VT<T>& arr) {
typedef types::GetFPType<types::GetLongType<T>> FPType;
const uint32_t& len = arr.size;
decayed_t<VT, FPType> ret(len);
uint32_t i = 0;
types::GetLongType<T> s{};
FPType MnX{};
FPType EnX {};
if (len) {
s = arr[0];
MnX = 0;
EnX = arr[0];
ret[i++] = 0;
}
for (; i < len; ++i){
s += arr[i];
FPType _EnX = s / (FPType)(i + 1);
MnX += (arr[i] - EnX) * (arr[i] - _EnX);
printf("%d %ld ", arr[i], MnX);
EnX = _EnX;
ret[i] = MnX / (FPType)(i + 1);
if constexpr(sd) ret[i] = sqrt(ret[i]);
}
return ret;
}
template<class T, template<typename ...> class VT>
types::GetFPType<types::GetLongType<decays<T>>> stddev(const VT<T>& arr) {
return sqrt(var(arr));
}
template<class T, template<typename ...> class VT>
decayed_t<VT, types::GetFPType<types::GetLongType<T>>> stddevs(const VT<T>& arr) {
return vars<T, VT, true>(arr);
}
template<class T, template<typename ...> class VT>
decayed_t<VT, types::GetFPType<types::GetLongType<T>>> stddevw(uint32_t w, const VT<T>& arr) {
return varw<T, VT, true>(w, arr);
}
// use getSignedType
template<class T, template<typename ...> class VT>
decayed_t<VT, T> deltas(const VT<T>& arr) {
@ -251,26 +347,33 @@ T first(const VT<T>& arr) {
}
#define __DEFAULT_AGGREGATE_FUNCTION__(NAME, RET) \
template <class T> constexpr inline T NAME(const T& v) { return RET; }
template <class T> constexpr T NAME(const T& v) { return RET; }
// non-aggreation count. E.g. SELECT COUNT(col) from table;
template <class T> constexpr inline T count(const T& v) { return 1; }
template <class T> constexpr inline T max(const T& v) { return v; }
template <class T> constexpr inline T min(const T& v) { return v; }
template <class T> constexpr inline T avg(const T& v) { return v; }
template <class T> constexpr inline T sum(const T& v) { return v; }
template <class T> constexpr inline T maxw(uint32_t, const T& v) { return v; }
template <class T> constexpr inline T minw(uint32_t, const T& v) { return v; }
template <class T> constexpr inline T avgw(uint32_t, const T& v) { return v; }
template <class T> constexpr inline T sumw(uint32_t, const T& v) { return v; }
template <class T> constexpr inline T ratiow(uint32_t, const T& v) { return 1; }
template <class T> constexpr inline T maxs(const T& v) { return v; }
template <class T> constexpr inline T mins(const T& v) { return v; }
template <class T> constexpr inline T avgs(const T& v) { return v; }
template <class T> constexpr inline T sums(const T& v) { return v; }
template <class T> constexpr inline T last(const T& v) { return v; }
template <class T> constexpr inline T prev(const T& v) { return v; }
template <class T> constexpr inline T aggnext(const T& v) { return v; }
template <class T> constexpr inline T daltas(const T& v) { return 0; }
template <class T> constexpr inline T ratios(const T& v) { return 1; }
template <class T> constexpr T count(const T&) { return 1; }
template <class T> constexpr T var(const T&) { return 0; }
template <class T> constexpr T vars(const T&) { return 0; }
template <class T> constexpr T varw(uint32_t, const T&) { return 0; }
template <class T> constexpr T stddev(const T&) { return 0; }
template <class T> constexpr T stddevs(const T&) { return 0; }
template <class T> constexpr T stddevw(uint32_t, const T&) { return 0; }
template <class T> constexpr T max(const T& v) { return v; }
template <class T> constexpr T min(const T& v) { return v; }
template <class T> constexpr T avg(const T& v) { return v; }
template <class T> constexpr T sum(const T& v) { return v; }
template <class T> constexpr T maxw(uint32_t, const T& v) { return v; }
template <class T> constexpr T minw(uint32_t, const T& v) { return v; }
template <class T> constexpr T avgw(uint32_t, const T& v) { return v; }
template <class T> constexpr T sumw(uint32_t, const T& v) { return v; }
template <class T> constexpr T ratiow(uint32_t, const T&) { return 1; }
template <class T> constexpr T maxs(const T& v) { return v; }
template <class T> constexpr T mins(const T& v) { return v; }
template <class T> constexpr T avgs(const T& v) { return v; }
template <class T> constexpr T sums(const T& v) { return v; }
template <class T> constexpr T last(const T& v) { return v; }
template <class T> constexpr T prev(const T& v) { return v; }
template <class T> constexpr T aggnext(const T& v) { return v; }
template <class T> constexpr T daltas(const T&) { return 0; }
template <class T> constexpr T ratios(const T&) { return 1; }

File diff suppressed because it is too large Load Diff

@ -0,0 +1,108 @@
// Copyright 2020-2022 Junekey Jeon
//
// The contents of this file may be used under the terms of
// the Apache License v2.0 with LLVM Exceptions.
//
// (See accompanying file LICENSE-Apache or copy at
// https://llvm.org/foundation/relicensing/LICENSE.txt)
//
// Alternatively, the contents of this file may be used under the terms of
// the Boost Software License, Version 1.0.
// (See accompanying file LICENSE-Boost or copy at
// https://www.boost.org/LICENSE_1_0.txt)
//
// Unless required by applicable law or agreed to in writing, this software
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.
#ifndef JKJ_HEADER_DRAGONBOX_TO_CHARS
#define JKJ_HEADER_DRAGONBOX_TO_CHARS
#include "dragonbox.h"
namespace jkj::dragonbox {
namespace to_chars_detail {
template <class Float, class FloatTraits>
extern char* to_chars(typename FloatTraits::carrier_uint significand, int exponent,
char* buffer) noexcept;
// Avoid needless ABI overhead incurred by tag dispatch.
template <class PolicyHolder, class Float, class FloatTraits>
char* to_chars_n_impl(float_bits<Float, FloatTraits> br, char* buffer) noexcept {
auto const exponent_bits = br.extract_exponent_bits();
auto const s = br.remove_exponent_bits(exponent_bits);
if (br.is_finite(exponent_bits)) {
if (s.is_negative()) {
*buffer = '-';
++buffer;
}
if (br.is_nonzero()) {
auto result = to_decimal<Float, FloatTraits>(
s, exponent_bits, policy::sign::ignore, policy::trailing_zero::ignore,
typename PolicyHolder::decimal_to_binary_rounding_policy{},
typename PolicyHolder::binary_to_decimal_rounding_policy{},
typename PolicyHolder::cache_policy{});
return to_chars_detail::to_chars<Float, FloatTraits>(result.significand,
result.exponent, buffer);
}
else {
*buffer = '0';
return buffer + 1;
}
}
else {
if (s.has_all_zero_significand_bits()) {
if (s.is_negative()) {
*buffer = '-';
++buffer;
}
std::memcpy(buffer, "Infinity", 8);
return buffer + 8;
}
else {
std::memcpy(buffer, "NaN", 3);
return buffer + 3;
}
}
}
}
// Returns the next-to-end position
template <class Float, class FloatTraits = default_float_traits<Float>, class... Policies>
char* to_chars_n(Float x, char* buffer, Policies... policies) noexcept {
using namespace jkj::dragonbox::detail::policy_impl;
using policy_holder = decltype(make_policy_holder(
base_default_pair_list<base_default_pair<decimal_to_binary_rounding::base,
decimal_to_binary_rounding::nearest_to_even>,
base_default_pair<binary_to_decimal_rounding::base,
binary_to_decimal_rounding::to_even>,
base_default_pair<cache::base, cache::full>>{},
policies...));
return to_chars_detail::to_chars_n_impl<policy_holder>(float_bits<Float, FloatTraits>(x),
buffer);
}
// Null-terminate and bypass the return value of fp_to_chars_n
template <class Float, class FloatTraits = default_float_traits<Float>, class... Policies>
char* to_chars(Float x, char* buffer, Policies... policies) noexcept {
auto ptr = to_chars_n<Float, FloatTraits>(x, buffer, policies...);
*ptr = '\0';
return ptr;
}
// Maximum required buffer size (excluding null-terminator)
template <class FloatFormat>
inline constexpr std::size_t max_output_string_length =
std::is_same_v<FloatFormat, ieee754_binary32>
?
// sign(1) + significand(9) + decimal_point(1) + exp_marker(1) + exp_sign(1) + exp(2)
(1 + 9 + 1 + 1 + 1 + 2)
:
// format == ieee754_format::binary64
// sign(1) + significand(17) + decimal_point(1) + exp_marker(1) + exp_sign(1) + exp(3)
(1 + 17 + 1 + 1 + 1 + 3);
}
#endif

@ -0,0 +1,521 @@
// Copyright 2020-2022 Junekey Jeon
//
// The contents of this file may be used under the terms of
// the Apache License v2.0 with LLVM Exceptions.
//
// (See accompanying file LICENSE-Apache or copy at
// https://llvm.org/foundation/relicensing/LICENSE.txt)
//
// Alternatively, the contents of this file may be used under the terms of
// the Boost Software License, Version 1.0.
// (See accompanying file LICENSE-Boost or copy at
// https://www.boost.org/LICENSE_1_0.txt)
//
// Unless required by applicable law or agreed to in writing, this software
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.
#pragma once
#include "dragonbox_to_chars.h"
#if defined(__GNUC__) || defined(__clang__)
#define JKJ_FORCEINLINE inline __attribute__((always_inline))
#elif defined(_MSC_VER)
#define JKJ_FORCEINLINE __forceinline
#else
#define JKJ_FORCEINLINE inline
#endif
namespace jkj::dragonbox {
namespace to_chars_detail {
// These "//"'s are to prevent clang-format to ruin this nice alignment.
// Thanks to reddit user u/mcmcc:
// https://www.reddit.com/r/cpp/comments/so3wx9/dragonbox_110_is_released_a_fast_floattostring/hw8z26r/?context=3
static constexpr char radix_100_table[] = {
'0', '0', '0', '1', '0', '2', '0', '3', '0', '4', //
'0', '5', '0', '6', '0', '7', '0', '8', '0', '9', //
'1', '0', '1', '1', '1', '2', '1', '3', '1', '4', //
'1', '5', '1', '6', '1', '7', '1', '8', '1', '9', //
'2', '0', '2', '1', '2', '2', '2', '3', '2', '4', //
'2', '5', '2', '6', '2', '7', '2', '8', '2', '9', //
'3', '0', '3', '1', '3', '2', '3', '3', '3', '4', //
'3', '5', '3', '6', '3', '7', '3', '8', '3', '9', //
'4', '0', '4', '1', '4', '2', '4', '3', '4', '4', //
'4', '5', '4', '6', '4', '7', '4', '8', '4', '9', //
'5', '0', '5', '1', '5', '2', '5', '3', '5', '4', //
'5', '5', '5', '6', '5', '7', '5', '8', '5', '9', //
'6', '0', '6', '1', '6', '2', '6', '3', '6', '4', //
'6', '5', '6', '6', '6', '7', '6', '8', '6', '9', //
'7', '0', '7', '1', '7', '2', '7', '3', '7', '4', //
'7', '5', '7', '6', '7', '7', '7', '8', '7', '9', //
'8', '0', '8', '1', '8', '2', '8', '3', '8', '4', //
'8', '5', '8', '6', '8', '7', '8', '8', '8', '9', //
'9', '0', '9', '1', '9', '2', '9', '3', '9', '4', //
'9', '5', '9', '6', '9', '7', '9', '8', '9', '9' //
};
static constexpr char radix_100_head_table[] = {
'0', '.', '1', '.', '2', '.', '3', '.', '4', '.', //
'5', '.', '6', '.', '7', '.', '8', '.', '9', '.', //
'1', '.', '1', '.', '1', '.', '1', '.', '1', '.', //
'1', '.', '1', '.', '1', '.', '1', '.', '1', '.', //
'2', '.', '2', '.', '2', '.', '2', '.', '2', '.', //
'2', '.', '2', '.', '2', '.', '2', '.', '2', '.', //
'3', '.', '3', '.', '3', '.', '3', '.', '3', '.', //
'3', '.', '3', '.', '3', '.', '3', '.', '3', '.', //
'4', '.', '4', '.', '4', '.', '4', '.', '4', '.', //
'4', '.', '4', '.', '4', '.', '4', '.', '4', '.', //
'5', '.', '5', '.', '5', '.', '5', '.', '5', '.', //
'5', '.', '5', '.', '5', '.', '5', '.', '5', '.', //
'6', '.', '6', '.', '6', '.', '6', '.', '6', '.', //
'6', '.', '6', '.', '6', '.', '6', '.', '6', '.', //
'7', '.', '7', '.', '7', '.', '7', '.', '7', '.', //
'7', '.', '7', '.', '7', '.', '7', '.', '7', '.', //
'8', '.', '8', '.', '8', '.', '8', '.', '8', '.', //
'8', '.', '8', '.', '8', '.', '8', '.', '8', '.', //
'9', '.', '9', '.', '9', '.', '9', '.', '9', '.', //
'9', '.', '9', '.', '9', '.', '9', '.', '9', '.' //
};
// These digit generation routines are inspired by James Anhalt's itoa algorithm:
// https://github.com/jeaiii/itoa
// The main idea is for given n, find y such that floor(10^k * y / 2^32) = n holds,
// where k is an appropriate integer depending on the length of n.
// For example, if n = 1234567, we set k = 6. In this case, we have
// floor(y / 2^32) = 1,
// floor(10^2 * ((10^0 * y) mod 2^32) / 2^32) = 23,
// floor(10^2 * ((10^2 * y) mod 2^32) / 2^32) = 45, and
// floor(10^2 * ((10^4 * y) mod 2^32) / 2^32) = 67.
// See https://jk-jeon.github.io/posts/2022/02/jeaiii-algorithm/ for more explanation.
JKJ_FORCEINLINE static void print_9_digits(std::uint32_t s32, int& exponent,
char*& buffer) noexcept {
// -- IEEE-754 binary32
// Since we do not cut trailing zeros in advance, s32 must be of 6~9 digits
// unless the original input was subnormal.
// In particular, when it is of 9 digits it shouldn't have any trailing zeros.
// -- IEEE-754 binary64
// In this case, s32 must be of 7~9 digits unless the input is subnormal,
// and it shouldn't have any trailing zeros if it is of 9 digits.
if (s32 >= 1'0000'0000) {
// 9 digits.
// 1441151882 = ceil(2^57 / 1'0000'0000) + 1
auto prod = s32 * std::uint64_t(1441151882);
prod >>= 25;
std::memcpy(buffer, radix_100_head_table + std::uint32_t(prod >> 32) * 2, 2);
prod = std::uint32_t(prod) * std::uint64_t(100);
std::memcpy(buffer + 2, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
prod = std::uint32_t(prod) * std::uint64_t(100);
std::memcpy(buffer + 4, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
prod = std::uint32_t(prod) * std::uint64_t(100);
std::memcpy(buffer + 6, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
prod = std::uint32_t(prod) * std::uint64_t(100);
std::memcpy(buffer + 8, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
exponent += 8;
buffer += 10;
}
else if (s32 >= 100'0000) {
// 7 or 8 digits.
// 281474978 = ceil(2^48 / 100'0000) + 1
auto prod = s32 * std::uint64_t(281474978);
prod >>= 16;
auto two_digits = std::uint32_t(prod >> 32);
// If s32 is of 8 digits, increase the exponent by 7.
// Otherwise, increase it by 6.
exponent += (6 + unsigned(two_digits >= 10));
// Write the first digit and the decimal point.
std::memcpy(buffer, radix_100_head_table + two_digits * 2, 2);
// This third character may be overwritten later but we don't care.
buffer[2] = radix_100_table[two_digits * 2 + 1];
// Remaining 6 digits are all zero?
if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 100'0000)) {
// The number of characters actually written is:
// 1, if only the first digit is nonzero, which means that either s32 is of 7
// digits or it is of 8 digits but the second digit is zero, or
// 3, otherwise.
// Note that buffer[2] is never zero if s32 is of 7 digits, because the input is
// never zero.
buffer += (1 + (unsigned(two_digits >= 10) & unsigned(buffer[2] > '0')) * 2);
}
else {
// At least one of the remaining 6 digits are nonzero.
// After this adjustment, now the first destination becomes buffer + 2.
buffer += unsigned(two_digits >= 10);
// Obtain the next two digits.
prod = std::uint32_t(prod) * std::uint64_t(100);
two_digits = std::uint32_t(prod >> 32);
std::memcpy(buffer + 2, radix_100_table + two_digits * 2, 2);
// Remaining 4 digits are all zero?
if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 1'0000)) {
buffer += (3 + unsigned(buffer[3] > '0'));
}
else {
// At least one of the remaining 4 digits are nonzero.
// Obtain the next two digits.
prod = std::uint32_t(prod) * std::uint64_t(100);
two_digits = std::uint32_t(prod >> 32);
std::memcpy(buffer + 4, radix_100_table + two_digits * 2, 2);
// Remaining 2 digits are all zero?
if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 100)) {
buffer += (5 + unsigned(buffer[5] > '0'));
}
else {
// Obtain the last two digits.
prod = std::uint32_t(prod) * std::uint64_t(100);
two_digits = std::uint32_t(prod >> 32);
std::memcpy(buffer + 6, radix_100_table + two_digits * 2, 2);
buffer += (7 + unsigned(buffer[7] > '0'));
}
}
}
}
else if (s32 >= 1'0000) {
// 5 or 6 digits.
// 429497 = ceil(2^32 / 1'0000)
auto prod = s32 * std::uint64_t(429497);
auto two_digits = std::uint32_t(prod >> 32);
// If s32 is of 6 digits, increase the exponent by 5.
// Otherwise, increase it by 4.
exponent += (4 + unsigned(two_digits >= 10));
// Write the first digit and the decimal point.
std::memcpy(buffer, radix_100_head_table + two_digits * 2, 2);
// This third character may be overwritten later but we don't care.
buffer[2] = radix_100_table[two_digits * 2 + 1];
// Remaining 4 digits are all zero?
if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 1'0000)) {
// The number of characters actually written is 1 or 3, similarly to the case of
// 7 or 8 digits.
buffer += (1 + (unsigned(two_digits >= 10) & unsigned(buffer[2] > '0')) * 2);
}
else {
// At least one of the remaining 4 digits are nonzero.
// After this adjustment, now the first destination becomes buffer + 2.
buffer += unsigned(two_digits >= 10);
// Obtain the next two digits.
prod = std::uint32_t(prod) * std::uint64_t(100);
two_digits = std::uint32_t(prod >> 32);
std::memcpy(buffer + 2, radix_100_table + two_digits * 2, 2);
// Remaining 2 digits are all zero?
if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 100)) {
buffer += (3 + unsigned(buffer[3] > '0'));
}
else {
// Obtain the last two digits.
prod = std::uint32_t(prod) * std::uint64_t(100);
two_digits = std::uint32_t(prod >> 32);
std::memcpy(buffer + 4, radix_100_table + two_digits * 2, 2);
buffer += (5 + unsigned(buffer[5] > '0'));
}
}
}
else if (s32 >= 100) {
// 3 or 4 digits.
// 42949673 = ceil(2^32 / 100)
auto prod = s32 * std::uint64_t(42949673);
auto two_digits = std::uint32_t(prod >> 32);
// If s32 is of 4 digits, increase the exponent by 3.
// Otherwise, increase it by 2.
exponent += (2 + int(two_digits >= 10));
// Write the first digit and the decimal point.
std::memcpy(buffer, radix_100_head_table + two_digits * 2, 2);
// This third character may be overwritten later but we don't care.
buffer[2] = radix_100_table[two_digits * 2 + 1];
// Remaining 2 digits are all zero?
if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 100)) {
// The number of characters actually written is 1 or 3, similarly to the case of
// 7 or 8 digits.
buffer += (1 + (unsigned(two_digits >= 10) & unsigned(buffer[2] > '0')) * 2);
}
else {
// At least one of the remaining 2 digits are nonzero.
// After this adjustment, now the first destination becomes buffer + 2.
buffer += unsigned(two_digits >= 10);
// Obtain the last two digits.
prod = std::uint32_t(prod) * std::uint64_t(100);
two_digits = std::uint32_t(prod >> 32);
std::memcpy(buffer + 2, radix_100_table + two_digits * 2, 2);
buffer += (3 + unsigned(buffer[3] > '0'));
}
}
else {
// 1 or 2 digits.
// If s32 is of 2 digits, increase the exponent by 1.
exponent += int(s32 >= 10);
// Write the first digit and the decimal point.
std::memcpy(buffer, radix_100_head_table + s32 * 2, 2);
// This third character may be overwritten later but we don't care.
buffer[2] = radix_100_table[s32 * 2 + 1];
// The number of characters actually written is 1 or 3, similarly to the case of
// 7 or 8 digits.
buffer += (1 + (unsigned(s32 >= 10) & unsigned(buffer[2] > '0')) * 2);
}
}
template <>
char* to_chars<float, default_float_traits<float>>(std::uint32_t s32, int exponent,
char* buffer) noexcept {
// Print significand.
print_9_digits(s32, exponent, buffer);
// Print exponent and return
if (exponent < 0) {
std::memcpy(buffer, "E-", 2);
buffer += 2;
exponent = -exponent;
}
else if (exponent > 0) {
buffer[0] = 'E';
buffer += 1;
}
else {
return buffer;
}
if (exponent >= 10) {
std::memcpy(buffer, &radix_100_table[exponent * 2], 2);
buffer += 2;
}
else {
buffer[0] = char('0' + exponent);
buffer += 1;
}
return buffer;
}
template <>
char* to_chars<double, default_float_traits<double>>(std::uint64_t const significand,
int exponent, char* buffer) noexcept {
// Print significand by decomposing it into a 9-digit block and a 8-digit block.
std::uint32_t first_block, second_block;
bool no_second_block;
if (significand >= 1'0000'0000) {
first_block = std::uint32_t(significand / 1'0000'0000);
second_block = std::uint32_t(significand) - first_block * 1'0000'0000;
exponent += 8;
no_second_block = (second_block == 0);
}
else {
first_block = std::uint32_t(significand);
no_second_block = true;
}
if (no_second_block) {
print_9_digits(first_block, exponent, buffer);
}
else {
// We proceed similarly to print_9_digits(), but since we do not need to remove
// trailing zeros, the procedure is a bit simpler.
if (first_block >= 1'0000'0000) {
// The input is of 17 digits, thus there should be no trailing zero at all.
// The first block is of 9 digits.
// 1441151882 = ceil(2^57 / 1'0000'0000) + 1
auto prod = first_block * std::uint64_t(1441151882);
prod >>= 25;
std::memcpy(buffer, radix_100_head_table + std::uint32_t(prod >> 32) * 2, 2);
prod = std::uint32_t(prod) * std::uint64_t(100);
std::memcpy(buffer + 2, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
prod = std::uint32_t(prod) * std::uint64_t(100);
std::memcpy(buffer + 4, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
prod = std::uint32_t(prod) * std::uint64_t(100);
std::memcpy(buffer + 6, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
prod = std::uint32_t(prod) * std::uint64_t(100);
std::memcpy(buffer + 8, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
// The second block is of 8 digits.
// 281474978 = ceil(2^48 / 100'0000) + 1
prod = second_block * std::uint64_t(281474978);
prod >>= 16;
prod += 1;
std::memcpy(buffer + 10, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
prod = std::uint32_t(prod) * std::uint64_t(100);
std::memcpy(buffer + 12, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
prod = std::uint32_t(prod) * std::uint64_t(100);
std::memcpy(buffer + 14, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
prod = std::uint32_t(prod) * std::uint64_t(100);
std::memcpy(buffer + 16, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
exponent += 8;
buffer += 18;
}
else {
if (first_block >= 100'0000) {
// 7 or 8 digits.
// 281474978 = ceil(2^48 / 100'0000) + 1
auto prod = first_block * std::uint64_t(281474978);
prod >>= 16;
auto two_digits = std::uint32_t(prod >> 32);
std::memcpy(buffer, radix_100_head_table + two_digits * 2, 2);
buffer[2] = radix_100_table[two_digits * 2 + 1];
exponent += (6 + unsigned(two_digits >= 10));
buffer += unsigned(two_digits >= 10);
// Print remaining 6 digits.
prod = std::uint32_t(prod) * std::uint64_t(100);
std::memcpy(buffer + 2, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
prod = std::uint32_t(prod) * std::uint64_t(100);
std::memcpy(buffer + 4, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
prod = std::uint32_t(prod) * std::uint64_t(100);
std::memcpy(buffer + 6, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
buffer += 8;
}
else if (first_block >= 1'0000) {
// 5 or 6 digits.
// 429497 = ceil(2^32 / 1'0000)
auto prod = first_block * std::uint64_t(429497);
auto two_digits = std::uint32_t(prod >> 32);
std::memcpy(buffer, radix_100_head_table + two_digits * 2, 2);
buffer[2] = radix_100_table[two_digits * 2 + 1];
exponent += (4 + unsigned(two_digits >= 10));
buffer += unsigned(two_digits >= 10);
// Print remaining 4 digits.
prod = std::uint32_t(prod) * std::uint64_t(100);
std::memcpy(buffer + 2, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
prod = std::uint32_t(prod) * std::uint64_t(100);
std::memcpy(buffer + 4, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
buffer += 6;
}
else if (first_block >= 100) {
// 3 or 4 digits.
// 42949673 = ceil(2^32 / 100)
auto prod = first_block * std::uint64_t(42949673);
auto two_digits = std::uint32_t(prod >> 32);
std::memcpy(buffer, radix_100_head_table + two_digits * 2, 2);
buffer[2] = radix_100_table[two_digits * 2 + 1];
exponent += (2 + unsigned(two_digits >= 10));
buffer += unsigned(two_digits >= 10);
// Print remaining 2 digits.
prod = std::uint32_t(prod) * std::uint64_t(100);
std::memcpy(buffer + 2, radix_100_table + std::uint32_t(prod >> 32) * 2, 2);
buffer += 4;
}
else {
// 1 or 2 digits.
std::memcpy(buffer, radix_100_head_table + first_block * 2, 2);
buffer[2] = radix_100_table[first_block * 2 + 1];
exponent += unsigned(first_block >= 10);
buffer += (2 + unsigned(first_block >= 10));
}
// Next, print the second block.
// The second block is of 8 digits, but we may have trailing zeros.
// 281474978 = ceil(2^48 / 100'0000) + 1
auto prod = second_block * std::uint64_t(281474978);
prod >>= 16;
prod += 1;
auto two_digits = std::uint32_t(prod >> 32);
std::memcpy(buffer, radix_100_table + two_digits * 2, 2);
// Remaining 6 digits are all zero?
if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 100'0000)) {
buffer += (1 + unsigned(buffer[1] > '0'));
}
else {
// Obtain the next two digits.
prod = std::uint32_t(prod) * std::uint64_t(100);
two_digits = std::uint32_t(prod >> 32);
std::memcpy(buffer + 2, radix_100_table + two_digits * 2, 2);
// Remaining 4 digits are all zero?
if (std::uint32_t(prod) <=
std::uint32_t((std::uint64_t(1) << 32) / 1'0000)) {
buffer += (3 + unsigned(buffer[3] > '0'));
}
else {
// Obtain the next two digits.
prod = std::uint32_t(prod) * std::uint64_t(100);
two_digits = std::uint32_t(prod >> 32);
std::memcpy(buffer + 4, radix_100_table + two_digits * 2, 2);
// Remaining 2 digits are all zero?
if (std::uint32_t(prod) <=
std::uint32_t((std::uint64_t(1) << 32) / 100)) {
buffer += (5 + unsigned(buffer[5] > '0'));
}
else {
// Obtain the last two digits.
prod = std::uint32_t(prod) * std::uint64_t(100);
two_digits = std::uint32_t(prod >> 32);
std::memcpy(buffer + 6, radix_100_table + two_digits * 2, 2);
buffer += (7 + unsigned(buffer[7] > '0'));
}
}
}
}
}
// Print exponent and return
if (exponent < 0) {
std::memcpy(buffer, "E-", 2);
buffer += 2;
exponent = -exponent;
}
else if (exponent > 0) {
buffer[0] = 'E';
buffer += 1;
}
else {
return buffer;
}
if (exponent >= 100) {
// d1 = exponent / 10; d2 = exponent % 10;
// 6554 = ceil(2^16 / 10)
auto prod = std::uint32_t(exponent) * std::uint32_t(6554);
auto d1 = prod >> 16;
prod = std::uint16_t(prod) * std::uint32_t(5); // * 10
auto d2 = prod >> 15; // >> 16
std::memcpy(buffer, &radix_100_table[d1 * 2], 2);
buffer[2] = char('0' + d2);
buffer += 3;
}
else if (exponent >= 10) {
std::memcpy(buffer, &radix_100_table[exponent * 2], 2);
buffer += 2;
}
else {
buffer[0] = char('0' + exponent);
buffer += 1;
}
return buffer;
}
}
}

@ -0,0 +1,63 @@
#ifndef __AQ_USE_THREADEDGC__
#include <atomic>
class GC {
private:;
size_t max_slots,
interval, forced_clean,
forceclean_timer = 0;
uint64_t max_size;
bool running, alive;
// ptr, dealloc, ref, sz
uint32_t threshould;
void *q, *q_back;
void* handle;
std::atomic<uint32_t> slot_pos;
std::atomic<uint32_t> alive_cnt;
std::atomic<uint64_t> current_size;
volatile bool lock;
// maybe use volatile std::thread::id instead
protected:
void acquire_lock();
void release_lock();
void gc();
void daemon();
void start_deamon();
void terminate_daemon();
public:
void reg(void* v, uint32_t sz = 1,
void(*f)(void*) = free
);
GC(
uint64_t max_size = 0xfffffff, uint32_t max_slots = 4096,
uint32_t interval = 10000, uint32_t forced_clean = 1000000,
uint32_t threshould = 64 //one seconds
) : max_size(max_size), max_slots(max_slots),
interval(interval), forced_clean(forced_clean),
threshould(threshould) {
start_deamon();
GC::gc_handle = this;
} // 256 MB
~GC(){
terminate_daemon();
}
static GC* gc_handle;
constexpr static void(*_free) (void*) = free;
};
#else
class GC {
public:
GC(uint32_t) = default;
void reg(
void* v, uint32_t = 0,
void(*f)(void*) = free
) const { f(v); }
static GC* gc;
constexpr static void(*_free) (void*) = free;
}
#endif

@ -1,53 +0,0 @@
#pragma once
#include <vector_type>
#include <utility>
#include <thread>
#include <chrono>
class GC {
template<class T>
using vector = vector_type<T>;
template<class ...T>
using tuple = std::tuple<T...>;
size_t current_size, max_size, interval, forced_clean;
bool running, alive;
// ptr, dealloc, ref, sz
vector<tuple<void*, void (*)(void*)>> q;
std::thread handle;
void gc()
{
}
void reg(void* v, uint32_t ref, uint32_t sz,
void(*f)(void*) = [](void* v) {free (v); }) {
current_size += sz;
if (current_size > max_size)
gc();
q.push_back({ v, f });
}
void daemon() {
using namespace std::chrono;
while (alive) {
if (running) {
gc();
std::this_thread::sleep_for(microseconds(interval));
}
else {
std::this_thread::sleep_for(10ms);
}
}
}
void start_deamon() {
handle = std::thread(&daemon);
alive = true;
}
void terminate_daemon() {
running = false;
alive = false;
using namespace std::chrono;
if (handle.joinable()) {
std::this_thread::sleep_for(microseconds(1000 + std::max(static_cast<size_t>(10000), interval)));
handle.join();
}
}
};

@ -0,0 +1,121 @@
#pragma once
// Copyright (c) 2022 James Edward Anhalt III - https://github.com/jeaiii/itoa
using u32 = decltype(0xffffffff);
using u64 = decltype(0xffffffffffffffff);
static_assert(u32(-1) > 0, "u32 must be unsigned");
static_assert(u32(0xffffffff) + u32(1) == u32(0), "u32 must be 32 bits");
static_assert(u64(-1) > 0, "u64 must be unsigned");
static_assert(u64(0xffffffffffffffff) + u32(1) == u32(0), "u64 must be 64 bits");
constexpr auto digits_00_99 =
"00010203040506070809" "10111213141516171819" "20212223242526272829" "30313233343536373839" "40414243444546474849"
"50515253545556575859" "60616263646566676869" "70717273747576777879" "80818283848586878889" "90919293949596979899";
struct pair { char t, o; };
#define JEAIII_W(I, U) *(pair*)&b[I] = *(pair*)&digits_00_99[(U) * 2]
#define JEAIII_A(I, N) t = (u64(1) << (32 + N / 5 * N * 53 / 16)) / u32(1e##N) + 1 + N / 6 - N / 8, t *= u, t >>= N / 5 * N * 53 / 16, t += N / 6 * 4, JEAIII_W(I, t >> 32)
#define JEAIII_S(I) b[I] = char(u64(10) * u32(t) >> 32) + '0'
#define JEAIII_D(I) t = u64(100) * u32(t), JEAIII_W(I, t >> 32)
#define JEAIII_C0(I) b[I] = char(u) + '0'
#define JEAIII_C1(I) JEAIII_W(I, u)
#define JEAIII_C2(I) JEAIII_A(I, 1), JEAIII_S(I + 2)
#define JEAIII_C3(I) JEAIII_A(I, 2), JEAIII_D(I + 2)
#define JEAIII_C4(I) JEAIII_A(I, 3), JEAIII_D(I + 2), JEAIII_S(I + 4)
#define JEAIII_C5(I) JEAIII_A(I, 4), JEAIII_D(I + 2), JEAIII_D(I + 4)
#define JEAIII_C6(I) JEAIII_A(I, 5), JEAIII_D(I + 2), JEAIII_D(I + 4), JEAIII_S(I + 6)
#define JEAIII_C7(I) JEAIII_A(I, 6), JEAIII_D(I + 2), JEAIII_D(I + 4), JEAIII_D(I + 6)
#define JEAIII_C8(I) JEAIII_A(I, 7), JEAIII_D(I + 2), JEAIII_D(I + 4), JEAIII_D(I + 6), JEAIII_S(I + 8)
#define JEAIII_C9(I) JEAIII_A(I, 8), JEAIII_D(I + 2), JEAIII_D(I + 4), JEAIII_D(I + 6), JEAIII_D(I + 8)
#define JEAIII_L(N, A, B) u < u32(1e##N) ? A : B
#define JEAIII_L09(F) JEAIII_L(2, JEAIII_L(1, F(0), F(1)), JEAIII_L(6, JEAIII_L(4, JEAIII_L(3, F(2), F(3)), JEAIII_L(5, F(4), F(5))), JEAIII_L(8, JEAIII_L(7, F(6), F(7)), JEAIII_L(9, F(8), F(9)))))
#define JEAIII_L03(F) JEAIII_L(2, JEAIII_L(1, F(0), F(1)), JEAIII_L(3, F(2), F(3)))
#define JEAIII_K(N) (JEAIII_C##N(0), b + N + 1)
#define JEAIII_KX(N) (JEAIII_C##N(0), u = x, JEAIII_C7(N + 1), b + N + 9)
#define JEAIII_KYX(N) (JEAIII_C##N(0), u = y, JEAIII_C7(N + 1), u = x, JEAIII_C7(N + 9), b + N + 17)
template<bool B, class T, class F> struct _cond { using type = F; };
template<class T, class F> struct _cond<true, T, F> { using type = T; };
template<bool B, class T, class F> using cond = typename _cond<B, T, F>::type;
template<class T> inline char* to_text_from_integer(char* b, T i)
{
u64 t = u64(i);
if (i < T(0))
t = u64(0) - t, b[0] = '-', ++b;
u32 u = cond<T(1) != T(2), cond<sizeof(T) != 1, cond<sizeof(T) != sizeof(short), u32, unsigned short>, unsigned char>, bool>(t);
// if our input type fits in 32bits, or its value does, ctreat as 32bit (the line above ensures the compiler can still know the range limits of the input type)
// and optimize out cases for small integer types (if only c++ had a builtin way to get the unsigned type from a signed type)
if (sizeof(i) <= sizeof(u) || u == t)
return JEAIII_L09(JEAIII_K);
u32 x = t % 100000000u;
u = u32(t /= 100000000u);
// t / 10^8 (fits in 32 bit), t % 10^8 -> ~17.5 digits
if (u == t)
return JEAIII_L09(JEAIII_KX);
// t / 10^16 (1-4 digits), t / 10^8 % 10^8, t % 10^8
u32 y = t % 100000000u;
u = u32(t / 100000000u);
return JEAIII_L03(JEAIII_KYX);
}
inline char* to_text(char text[], signed char i) { return to_text_from_integer(text, i); }
inline char* to_text(char text[], unsigned char i) { return to_text_from_integer(text, i); }
inline char* to_text(char text[], short i) { return to_text_from_integer(text, i); }
inline char* to_text(char text[], unsigned short i) { return to_text_from_integer(text, i); }
inline char* to_text(char text[], int i) { return to_text_from_integer(text, i); }
inline char* to_text(char text[], unsigned int i) { return to_text_from_integer(text, i); }
inline char* to_text(char text[], long i) { return to_text_from_integer(text, i); }
inline char* to_text(char text[], unsigned long i) { return to_text_from_integer(text, i); }
inline char* to_text(char text[], long long i) { return to_text_from_integer(text, i); }
inline char* to_text(char text[], unsigned long long i) { return to_text_from_integer(text, i); }
// Copyright (c) 2022 Bill Sun
//#if defined(SIZEOF___INT128) || (defined(SIZEOF___INT128_T) && defined(SIZEOF___UINT128_T))
constexpr static __uint128_t _10_19 = 10000000000000000000ull,
_10_37 = _10_19*_10_19 / 10;
template<class T>
char* jeaiii_i128(char* buf, T v){
if constexpr (std::is_signed_v<T>) {
if (v < 0){
*(buf++) = '0';
v = -v;
}
}
if (v > _10_37){
uint8_t vv = uint8_t(v/_10_37);
// vv <<= 1;
// if (vv < 20)
// *buf ++ = digits_00_99[vv + 1];
// else{
// memcpy(buf, digits_00_99 + vv, 2);
// buf += 2;
// }
*(buf++) = vv%10 + '0';
vv/=10;
if (vv) {
*buf = *(buf-1);
*(buf++-1) = vv + '0';
}
}
if (v > _10_19)
buf = to_text(buf, uint64_t((v/_10_19) % _10_19));
buf = to_text(buf, uint64_t(v % _10_19));
return buf;
}
// #endif

@ -1,20 +1,20 @@
#include "pch_msc.hpp"
#include "io.h"
#include "table.h"
#include <limits>
#include <chrono>
#include <ctime>
#include "utils.h"
#include "libaquery.h"
#include <random>
char* gbuf = nullptr;
void setgbuf(char* buf) {
static char* b = 0;
if (buf == 0)
static char* b = nullptr;
if (buf == nullptr)
gbuf = b;
else {
gbuf = buf;
@ -63,6 +63,7 @@ T getInt(const char*& buf){
}
return ret;
}
template<class T>
char* intToString(T val, char* buf){
@ -275,6 +276,44 @@ inline const char* str(const bool& v) {
return v ? "true" : "false";
}
Context::Context() {
current.memory_map = new std::unordered_map<void*, deallocator_t>;
init_session();
}
Context::~Context() {
auto memmap = (std::unordered_map<void*, deallocator_t>*) this->current.memory_map;
delete memmap;
}
void Context::init_session(){
if (log_level == LOG_INFO){
memset(&(this->current.stats), 0, sizeof(Session::Statistic));
}
auto memmap = (std::unordered_map<void*, deallocator_t>*) this->current.memory_map;
memmap->clear();
}
void Context::end_session(){
auto memmap = (std::unordered_map<void*, deallocator_t>*) this->current.memory_map;
for (auto& mem : *memmap) {
mem.second(mem.first);
}
memmap->clear();
}
void* Context::get_module_function(const char* fname){
auto fmap = static_cast<std::unordered_map<std::string, void*>*>
(this->module_function_maps);
// printf("%p\n", fmap->find("mydiv")->second);
// for (const auto& [key, value] : *fmap){
// printf("%s %p\n", key.c_str(), value);
// }
auto ret = fmap->find(fname);
return ret == fmap->end() ? nullptr : ret->second;
}
// template<typename _Ty>
// inline void vector_type<_Ty>::out(uint32_t n, const char* sep) const
// {
@ -288,3 +327,195 @@ inline const char* str(const bool& v) {
// }
// std::cout << ')';
// }
#include "gc.h"
#include <utility>
#include <thread>
#ifndef __AQ_USE_THREADEDGC__
struct gcmemory_t{
void* memory;
void (*deallocator)(void*);
};
using memoryqueue_t = gcmemory_t*;
void GC::acquire_lock() {
// auto this_tid = std::this_thread::get_id();
// while(lock != this_tid)
// {
// while(lock != this_tid && lock != std::thread::id()) {
// std::this_thread::sleep_for(std::chrono::milliseconds(0));
// }
// lock = this_tid;
// }
}
void GC::release_lock(){
// lock = std::thread::id();
}
void GC::gc()
{
auto _q = static_cast<memoryqueue_t>(q);
auto _q_back = static_cast<memoryqueue_t>(q_back);
if (slot_pos == 0)
return;
auto t = _q;
lock = true;
while(alive_cnt != 0);
q = _q_back;
uint32_t _slot = slot_pos;
slot_pos = 0;
current_size = 0;
lock = false;
q_back = t;
for(uint32_t i = 0; i < _slot; ++i){
if (_q[i].memory != nullptr && _q[i].deallocator != nullptr)
_q[i].deallocator(_q[i].memory);
}
memset(_q, 0, sizeof(gcmemory_t) * _slot);
running = false;
}
void GC::daemon() {
using namespace std::chrono;
while (alive) {
if (running) {
if (current_size - max_size > 0 ||
forceclean_timer > forced_clean)
{
gc();
forceclean_timer = 0;
}
std::this_thread::sleep_for(microseconds(interval));
forceclean_timer += interval;
}
else {
std::this_thread::sleep_for(10ms);
forceclean_timer += 10000;
}
}
}
void GC::start_deamon() {
q = new gcmemory_t[max_slots << 1];
q_back = new memoryqueue_t[max_slots << 1];
lock = false;
slot_pos = 0;
current_size = 0;
alive_cnt = 0;
alive = true;
handle = new std::thread(&GC::daemon, this);
}
void GC::terminate_daemon() {
running = false;
alive = false;
decltype(auto) _handle = static_cast<std::thread*>(handle);
delete[] static_cast<memoryqueue_t>(q);
delete[] static_cast<memoryqueue_t>(q_back);
using namespace std::chrono;
std::this_thread::sleep_for(microseconds(1000 + std::max(static_cast<size_t>(10000), interval)));
if (_handle->joinable()) {
_handle->join();
}
delete _handle;
}
void GC::reg(void* v, uint32_t sz, void(*f)(void*)) { //~ 40ns expected v. free ~ 75ns
if (v == nullptr || f == nullptr)
return;
if (sz < threshould){
f(v);
return;
}
auto _q = static_cast<memoryqueue_t>(q);
while(lock);
++alive_cnt;
current_size += sz;
auto _slot = (slot_pos += 1);
_q[_slot] = {v, f};
--alive_cnt;
running = true;
}
#endif
GC* GC::gc_handle = nullptr;
#include "dragonbox/dragonbox_to_chars.hpp"
template<>
char*
aq_to_chars<float>(void* value, char* buffer) {
return jkj::dragonbox::to_chars_n(*static_cast<float*>(value), buffer);
}
template<>
char*
aq_to_chars<double>(void* value, char* buffer) {
return jkj::dragonbox::to_chars_n(*static_cast<double*>(value), buffer);
}
template<>
inline char*
aq_to_chars<bool>(void* value, char* buffer) {
if (*static_cast<bool*>(value)){
memcpy(buffer, "true", 4);
return buffer + 4;
}
else{
memcpy(buffer, "false", 5);
return buffer + 5;
}
}
template<>
char*
aq_to_chars<char*>(void* value, char* buffer) {
const auto src = *static_cast<char**>(value);
const auto len = strlen(src);
memcpy(buffer, src, len);
return buffer + len;
}
template<>
char*
aq_to_chars<types::date_t>(void* value, char* buffer) {
const auto& src = *static_cast<types::date_t*>(value);
buffer = to_text(buffer, src.year);
*buffer++ = '-';
buffer = to_text(buffer, src.month);
*buffer++ = '-';
buffer = to_text(buffer, src.day);
return buffer;
}
template<>
char*
aq_to_chars<types::time_t>(void* value, char* buffer) {
const auto& src = *static_cast<types::time_t*>(value);
buffer = to_text(buffer, src.hours);
*buffer++ = ':';
buffer = to_text(buffer, src.minutes);
*buffer++ = ':';
buffer = to_text(buffer, src.seconds);
*buffer++ = ':';
buffer = to_text(buffer, src.ms);
return buffer;
}
template<>
char*
aq_to_chars<types::timestamp_t>(void* value, char* buffer) {
auto& src = *static_cast<types::timestamp_t*>(value);
buffer = aq_to_chars<types::date_t>(static_cast<void*>(&src.date), buffer);
*buffer++ = ' ';
buffer = aq_to_chars<types::time_t>(static_cast<void*>(&src.time), buffer);
return buffer;
}

@ -1,8 +1,37 @@
#ifndef _AQUERY_H
#define _AQUERY_H
#include "table.h"
#ifdef __INTELLISENSE__
#define __AQUERY_ITC_USE_SEMPH__
#define THREADING
#define __AQ_THREADED_GC__
#endif
#include <unordered_map>
#include <chrono>
class aq_timer {
private:
std::chrono::high_resolution_clock::time_point now;
public:
aq_timer(){
now = std::chrono::high_resolution_clock::now();
}
void reset(){
now = std::chrono::high_resolution_clock::now();
}
long long elapsed(){
long long ret = (std::chrono::high_resolution_clock::now() - now).count();
reset();
return ret;
}
long long lap() const{
long long ret = (std::chrono::high_resolution_clock::now() - now).count();
return ret;
}
};
#include "table.h"
enum Log_level {
LOG_INFO,
@ -15,9 +44,16 @@ enum Backend_Type {
BACKEND_MonetDB,
BACKEND_MariaDB
};
struct QueryStats{
long long monet_time;
long long postproc_time;
};
struct Config{
int running, new_query, server_mode,
backend_type, has_dll, exec_time, n_buffers;
int running, new_query, server_mode,
backend_type, has_dll,
n_buffers;
QueryStats stats;
int buffer_sizes[];
};
@ -47,7 +83,10 @@ struct Context{
#ifdef THREADING
void* thread_pool;
#endif
printf_type print = printf;
#ifdef __AQ_THREADED_GC__
void* gc;
#endif
printf_type print = &printf;
Context();
virtual ~Context();
template <class ...Types>
@ -67,6 +106,8 @@ struct Context{
std::unordered_map<const char*, uColRef *> cols;
};
#ifdef _WIN32
#define __DLLEXPORT__ __declspec(dllexport) __stdcall
#else
@ -76,4 +117,40 @@ struct Context{
#define __AQEXPORT__(_Ty) extern "C" _Ty __DLLEXPORT__
typedef void (*deallocator_t) (void*);
#include <type_traits>
#include "jeaiii_to_text.h"
template<class T>
inline std::enable_if_t<std::is_integral_v<T>, char *>
aq_to_chars(void* value, char* buffer) {
return to_text(buffer, *static_cast<T*>(value));
}
template<class T>
inline std::enable_if_t<!std::is_integral_v<T>, char *>
aq_to_chars(void* value, char* buffer) {
return buffer;
}
#ifdef __SIZEOF_INT128__
template<>
inline char*
aq_to_chars<__int128_t>(void* value, char* buffer) {
return jeaiii_i128<__int128_t>(buffer, *static_cast<__int128_t*>(value));
}
template<>
inline char*
aq_to_chars<__uint128_t>(void* value, char* buffer) {
return jeaiii_i128<__uint128_t>(buffer, *static_cast<__uint128_t*>(value));
}
#endif
template<> char* aq_to_chars<float>(void* , char*);
template<> char* aq_to_chars<double>(void* , char*);
template<> char* aq_to_chars<char*>(void* , char*);
template<> char* aq_to_chars<types::date_t>(void* , char*);
template<> char* aq_to_chars<types::time_t>(void* , char*);
template<> char* aq_to_chars<types::timestamp_t>(void* , char*);
#endif

@ -2,12 +2,14 @@
#include "libaquery.h"
#include <cstdio>
#include <string>
#include "monetdb_conn.h"
#include "monetdbe.h"
#include "table.h"
#undef static_assert
const char* monetdbe_type_str[] = {
constexpr const char* monetdbe_type_str[] = {
"monetdbe_bool", "monetdbe_int8_t", "monetdbe_int16_t", "monetdbe_int32_t", "monetdbe_int64_t",
#ifdef HAVE_HGE
"monetdbe_int128_t",
@ -20,7 +22,7 @@ const char* monetdbe_type_str[] = {
"monetdbe_type_unknown"
} ;
const unsigned char monetdbe_type_szs[] = {
inline constexpr static unsigned char monetdbe_type_szs[] = {
sizeof(monetdbe_column_bool::null_value), sizeof(monetdbe_column_int8_t::null_value),
sizeof(monetdbe_column_int16_t::null_value), sizeof(monetdbe_column_int32_t::null_value),
sizeof(monetdbe_column_int64_t::null_value),
@ -36,7 +38,19 @@ const unsigned char monetdbe_type_szs[] = {
1
};
namespace types{
constexpr const Type_t monetdbe_type_aqtypes[] = {
ABOOL, AINT8, AINT16, AINT32, AINT64,
#ifdef HAVE_HGE
AINT128,
#endif
AUINT64, AFLOAT, ADOUBLE, ASTR,
// blob?
AINT64,
ADATE, ATIME, ATIMESTAMP, ERROR
};
}
Server::Server(Context* cxt){
if (cxt){
@ -80,7 +94,7 @@ void Server::connect(Context *cxt){
else{
if(server)
free(server);
this->server = 0;
this->server = nullptr;
status = false;
puts(ret == -1 ? "Allocation Error." : "Internal Database Error.");
}
@ -103,7 +117,7 @@ void Server::exec(const char* q){
bool Server::haserror(){
if (last_error){
last_error = 0;
last_error = nullptr;
return true;
}
else{
@ -111,12 +125,53 @@ bool Server::haserror(){
}
}
void Server::print_results(const char* sep, const char* end){
if (!haserror()){
auto _res = static_cast<monetdbe_result*> (res);
const auto& ncols = _res->ncols;
monetdbe_column** cols = static_cast<monetdbe_column**>(malloc(sizeof(monetdbe_column*) * ncols));
std::string* printf_string = new std::string[ncols];
const char** col_data = static_cast<const char**> (malloc(sizeof(char*) * ncols));
uint8_t* szs = static_cast<uint8_t*>(alloca(ncols));
std::string header_string = "";
const char* err_msg = nullptr;
for(uint32_t i = 0; i < ncols; ++i){
err_msg = monetdbe_result_fetch(_res, &cols[i], i);
printf_string[i] =
std::string(types::printf_str[types::monetdbe_type_aqtypes[cols[i]->type]])
+ (i < ncols - 1 ? sep : "");
puts(printf_string[i].c_str());
puts(monetdbe_type_str[cols[i]->type]);
col_data[i] = static_cast<char *>(cols[i]->data);
szs [i] = monetdbe_type_szs[cols[i]->type];
header_string = header_string + cols[i]->name + sep + '|' + sep;
}
const size_t l_sep = strlen(sep) + 1;
if (header_string.size() - l_sep >= 0)
header_string.resize(header_string.size() - l_sep);
header_string += end + std::string(header_string.size(), '=') + end;
fputs(header_string.c_str(), stdout);
for(uint64_t i = 0; i < cnt; ++i){
for(uint32_t j = 0; j < ncols; ++j){
printf(printf_string[j].c_str(), *((void**)col_data[j]));
col_data[j] += szs[j];
}
fputs(end, stdout);
}
free(cols);
delete[] printf_string;
free(col_data);
}
}
void Server::close(){
if(this->server){
auto server = static_cast<monetdbe_database*>(this->server);
monetdbe_close(*(server));
free(server);
this->server = 0;
this->server = nullptr;
}
}
@ -130,7 +185,7 @@ void* Server::getCol(int col_idx){
auto _ret_col = static_cast<monetdbe_column*>(this->ret_col);
cnt = _ret_col->count;
printf("Dbg: Getting col %s, type: %s\n",
_ret_col->name, monetdbe_type_str[_ret_col->type]);
_ret_col->name, monetdbe_type_str[_ret_col->type]);
return _ret_col->data;
}
else{
@ -140,7 +195,7 @@ void* Server::getCol(int col_idx){
else{
puts("Error: No result.");
}
return 0;
return nullptr;
}
Server::~Server(){
@ -149,10 +204,10 @@ Server::~Server(){
bool Server::havehge() {
#if defined(_MONETDBE_LIB_) and defined(HAVE_HGE)
puts("true");
// puts("true");
return HAVE_HGE;
#else
puts("false");
// puts("false");
return false;
#endif
}

@ -22,6 +22,9 @@ struct Server{
void close();
bool haserror();
static bool havehge();
void test(const char*);
void print_results(const char* sep = " ", const char* end = "\n");
friend void print_monetdb_results(Server* srv, const char* sep, const char* end, int limit);
~Server();
};

@ -1,47 +1,137 @@
#include "pch_msc.hpp"
#include "../csv.h"
#include <iostream>
#include <string>
#include <chrono>
#include <thread>
#include "libaquery.h"
#include "monetdb_conn.h"
#ifdef THREADING
#include "threading.h"
#endif
#ifdef _WIN32
#include "winhelper.h"
#else
#include <dlfcn.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <atomic>
// fast numeric to string conversion
#include "jeaiii_to_text.h"
#include "dragonbox/dragonbox_to_chars.h"
struct SharedMemory
{
std::atomic<bool> a;
int hFileMap;
void* pData;
SharedMemory(const char* fname) {
explicit SharedMemory(const char* fname) {
hFileMap = open(fname, O_RDWR, 0);
if (hFileMap != -1)
pData = mmap(NULL, 8, PROT_READ | PROT_WRITE, MAP_SHARED, hFileMap, 0);
pData = mmap(nullptr, 8, PROT_READ | PROT_WRITE, MAP_SHARED, hFileMap, 0);
else
pData = 0;
pData = nullptr;
}
void FreeMemoryMap() {
void FreeMemoryMap() const {
// automatically unmapped in posix
}
};
#ifndef __USE_STD_SEMAPHORE__
#ifdef __APPLE__
#include <dispatch/dispatch.h>
class A_Semaphore {
private:
dispatch_semaphore_t native_handle;
public:
A_Semaphore(bool v = false) {
native_handle = dispatch_semaphore_create(v);
}
void acquire() {
// puts("acquire");
dispatch_semaphore_wait(native_handle, DISPATCH_TIME_FOREVER);
}
void release() {
// puts("release");
dispatch_semaphore_signal(native_handle);
}
~A_Semaphore() {
}
};
#else
#include <semaphore.h>
class A_Semaphore {
private:
sem_t native_handle;
public:
A_Semaphore(bool v = false) {
sem_init(&native_handle, v, 1);
}
void acquire() {
sem_wait(&native_handle);
}
void release() {
sem_post(&native_handle);
}
~A_Semaphore() {
sem_destroy(&native_handle);
}
};
#endif
#endif
#endif
#ifdef __USE_STD_SEMAPHORE__
#define __AQUERY_ITC_USE_SEMPH__
#include <semaphore>
class A_Semaphore {
private:
std::binary_semaphore native_handle;
public:
A_Semaphore(bool v = false) {
native_handle = std::binary_semaphore(v);
}
void acquire() {
native_handle.acquire();
}
void release() {
native_handle.release();
}
~A_Semaphore() { }
};
#endif
#include "aggregations.h"
#ifdef __AQUERY_ITC_USE_SEMPH__
A_Semaphore prompt{ true }, engine{ false };
#define PROMPT_ACQUIRE() prompt.acquire()
#define PROMPT_RELEASE() prompt.release()
#define ENGINE_ACQUIRE() engine.acquire()
#define ENGINE_RELEASE() engine.release()
#else
#define PROMPT_ACQUIRE()
#define PROMPT_RELEASE() std::this_thread::sleep_for(std::chrono::nanoseconds(0))
#define ENGINE_ACQUIRE()
#define ENGINE_RELEASE()
#endif
typedef int (*code_snippet)(void*);
typedef void (*module_init_fn)(Context*);
int test_main();
int n_recv = 0;
char** n_recvd = nullptr;
__AQEXPORT__(void) wait_engine(){
PROMPT_ACQUIRE();
}
__AQEXPORT__(void) wake_engine(){
ENGINE_RELEASE();
}
extern "C" void __DLLEXPORT__ receive_args(int argc, char**argv){
n_recv = argc;
n_recvd = argv;
@ -71,42 +161,99 @@ __AQEXPORT__(bool) have_hge(){
#endif
}
Context::Context() {
current.memory_map = new std::unordered_map<void*, deallocator_t>;
init_session();
}
using prt_fn_t = char* (*)(void*, char*);
Context::~Context() {
auto memmap = (std::unordered_map<void*, deallocator_t>*) this->current.memory_map;
delete memmap;
}
void Context::init_session(){
if (log_level == LOG_INFO){
memset(&(this->current.stats), 0, sizeof(Session::Statistic));
}
auto memmap = (std::unordered_map<void*, deallocator_t>*) this->current.memory_map;
memmap->clear();
}
constexpr prt_fn_t monetdbe_prtfns[] = {
aq_to_chars<bool>, aq_to_chars<int8_t>, aq_to_chars<int16_t>, aq_to_chars<int32_t>,
aq_to_chars<int64_t>,
#if __SIZEOF_INT128__
aq_to_chars<__int128_t>,
#endif
aq_to_chars<size_t>, aq_to_chars<float>, aq_to_chars<double>,
aq_to_chars<char*>, aq_to_chars<std::nullptr_t>,
aq_to_chars<types::date_t>, aq_to_chars<types::time_t>, aq_to_chars<types::timestamp_t>,
// should be last:
aq_to_chars<std::nullptr_t>
};
#include "monetdbe.h"
inline constexpr static unsigned char monetdbe_type_szs[] = {
sizeof(monetdbe_column_bool::null_value), sizeof(monetdbe_column_int8_t::null_value),
sizeof(monetdbe_column_int16_t::null_value), sizeof(monetdbe_column_int32_t::null_value),
sizeof(monetdbe_column_int64_t::null_value),
#ifdef __SIZEOF_INT128__
sizeof(monetdbe_column_int128_t::null_value),
#endif
sizeof(monetdbe_column_size_t::null_value), sizeof(monetdbe_column_float::null_value),
sizeof(monetdbe_column_double::null_value),
sizeof(monetdbe_column_str::null_value), sizeof(monetdbe_column_blob::null_value),
sizeof(monetdbe_data_date), sizeof(monetdbe_data_time), sizeof(monetdbe_data_timestamp),
// should be last:
1
};
constexpr uint32_t output_buffer_size = 65536;
void print_monetdb_results(Server* srv, const char* sep = " ", const char* end = "\n",
uint32_t limit = std::numeric_limits<uint32_t>::max()) {
if (!srv->haserror() && srv->cnt && limit){
char buffer[output_buffer_size];
auto _res = static_cast<monetdbe_result*> (srv->res);
const auto& ncols = _res->ncols;
monetdbe_column** cols = static_cast<monetdbe_column**>(malloc(sizeof(monetdbe_column*) * ncols));
prt_fn_t *prtfns = (prt_fn_t*) alloca(sizeof(prt_fn_t) * ncols);
char** col_data = static_cast<char**> (alloca(sizeof(char*) * ncols));
uint8_t* szs = static_cast<uint8_t*>(alloca(ncols));
std::string header_string = "";
const char* err_msg = nullptr;
const size_t l_sep = strlen(sep);
const size_t l_end = strlen(end);
char* _buffer = buffer;
for(uint32_t i = 0; i < ncols; ++i){
err_msg = monetdbe_result_fetch(_res, &cols[i], i);
if(err_msg) { goto cleanup; }
col_data[i] = static_cast<char *>(cols[i]->data);
prtfns[i] = monetdbe_prtfns[cols[i]->type];
szs [i] = monetdbe_type_szs[cols[i]->type];
header_string = header_string + cols[i]->name + sep + '|' + sep;
}
void Context::end_session(){
auto memmap = (std::unordered_map<void*, deallocator_t>*) this->current.memory_map;
for (auto& mem : *memmap) {
mem.second(mem.first);
if(l_sep > 512 || l_end > 512) {
puts("Error: separator or end string too long");
goto cleanup;
}
if (header_string.size() - l_sep - 1>= 0)
header_string.resize(header_string.size() - l_sep - 1);
header_string += end + std::string(header_string.size(), '=') + end;
fputs(header_string.c_str(), stdout);
for(uint64_t i = 0; i < srv->cnt; ++i){
for(uint32_t j = 0; j < ncols; ++j){
//copy the field to buf
_buffer = prtfns[j](col_data[j], _buffer);
if (j != ncols - 1){
memcpy(_buffer, sep, l_sep);
_buffer += l_sep;
}
col_data[j] += szs[j];
}
memcpy(_buffer, end, l_end);
_buffer += l_end;
if(output_buffer_size - (_buffer - buffer) <= 1024){
fwrite(buffer, 1, _buffer - buffer, stdout);
_buffer = buffer;
}
}
memcpy(_buffer, end, l_end);
_buffer += l_end;
if (_buffer != buffer)
fwrite(buffer, 1, _buffer - buffer, stdout);
cleanup:
free(cols);
}
memmap->clear();
}
void* Context::get_module_function(const char* fname){
auto fmap = static_cast<std::unordered_map<std::string, void*>*>
(this->module_function_maps);
// printf("%p\n", fmap->find("mydiv")->second);
// for (const auto& [key, value] : *fmap){
// printf("%s %p\n", key.c_str(), value);
// }
auto ret = fmap->find(fname);
return ret == fmap->end() ? nullptr : ret->second;
}
void initialize_module(const char* module_name, void* module_handle, Context* cxt){
auto _init_module = reinterpret_cast<module_init_fn>(dlsym(module_handle, "init_session"));
@ -119,15 +266,16 @@ void initialize_module(const char* module_name, void* module_handle, Context* cx
}
int dll_main(int argc, char** argv, Context* cxt){
aq_timer timer;
Config *cfg = reinterpret_cast<Config *>(argv[0]);
std::unordered_map<std::string, void*> user_module_map;
if (cxt->module_function_maps == 0)
if (cxt->module_function_maps == nullptr)
cxt->module_function_maps = new std::unordered_map<std::string, void*>();
auto module_fn_map =
static_cast<std::unordered_map<std::string, void*>*>(cxt->module_function_maps);
auto buf_szs = cfg->buffer_sizes;
void** buffers = (void**)malloc(sizeof(void*) * cfg->n_buffers);
void** buffers = (void**) malloc (sizeof(void*) * cfg->n_buffers);
for (int i = 0; i < cfg->n_buffers; i++)
buffers[i] = static_cast<void *>(argv[i + 1]);
@ -135,19 +283,28 @@ int dll_main(int argc, char** argv, Context* cxt){
cxt->cfg = cfg;
cxt->n_buffers = cfg->n_buffers;
cxt->sz_bufs = buf_szs;
cxt->alt_server = NULL;
if (cfg->backend_type == BACKEND_MonetDB && cxt->alt_server == nullptr)
{
auto alt_server = new Server(cxt);
alt_server->exec("SELECT '**** WELCOME TO AQUERY++! ****';");
puts(*(const char**)(alt_server->getCol(0)));
cxt->alt_server = alt_server;
}
while(cfg->running){
ENGINE_ACQUIRE();
if (cfg->new_query) {
void *handle = 0;
void *user_module_handle = 0;
cfg->stats.postproc_time = 0;
cfg->stats.monet_time = 0;
void *handle = nullptr;
void *user_module_handle = nullptr;
if (cfg->backend_type == BACKEND_MonetDB){
if (cxt->alt_server == 0)
if (cxt->alt_server == nullptr)
cxt->alt_server = new Server(cxt);
Server* server = reinterpret_cast<Server*>(cxt->alt_server);
if(n_recv > 0){
if (cfg->backend_type == BACKEND_AQuery || cfg->has_dll) {
handle = dlopen("./dll.so", RTLD_LAZY);
handle = dlopen("./dll.so", RTLD_NOW);
}
for (const auto& module : user_module_map){
initialize_module(module.first.c_str(), module.second, cxt);
@ -159,14 +316,18 @@ int dll_main(int argc, char** argv, Context* cxt){
switch(n_recvd[i][0]){
case 'Q': // SQL query for monetdbe
{
timer.reset();
server->exec(n_recvd[i] + 1);
printf("Exec Q%d: %s", i, n_recvd[i]);
cfg->stats.monet_time += timer.elapsed();
// printf("Exec Q%d: %s", i, n_recvd[i]);
}
break;
case 'P': // Postprocessing procedure
if(handle && !server->haserror()) {
code_snippet c = reinterpret_cast<code_snippet>(dlsym(handle, n_recvd[i]+1));
timer.reset();
c(cxt);
cfg->stats.postproc_time += timer.elapsed();
}
break;
case 'M': // Load Module
@ -193,12 +354,21 @@ int dll_main(int argc, char** argv, Context* cxt){
//printf("F::: %p\n", module_fn_map->find("mydiv") != module_fn_map->end() ? module_fn_map->find("mydiv")->second : nullptr);
}
break;
case 'O':
{
if(!server->haserror()){
timer.reset();
print_monetdb_results(server);
cfg->stats.postproc_time += timer.elapsed();
}
}
break;
case 'U': // Unload Module
{
auto mname = n_recvd[i] + 1;
auto it = user_module_map.find(mname);
if (user_module_handle == it->second)
user_module_handle = 0;
user_module_handle = nullptr;
dlclose(it->second);
user_module_map.erase(it);
}
@ -207,8 +377,9 @@ int dll_main(int argc, char** argv, Context* cxt){
}
if(handle) {
dlclose(handle);
handle = 0;
handle = nullptr;
}
printf("%lld, %lld", cfg->stats.monet_time, cfg->stats.postproc_time);
cxt->end_session();
n_recv = 0;
}
@ -217,7 +388,7 @@ int dll_main(int argc, char** argv, Context* cxt){
}
else{
server->last_error = nullptr;
continue;
//goto finalize;
}
}
@ -230,9 +401,11 @@ int dll_main(int argc, char** argv, Context* cxt){
if (handle) dlclose(handle);
cfg->new_query = 0;
}
std::this_thread::sleep_for(std::chrono::milliseconds(100));
//puts(cfg->running? "true": "false");
//finalize:
PROMPT_RELEASE();
}
return 0;
}
@ -263,20 +436,21 @@ extern "C" int __DLLEXPORT__ main(int argc, char** argv) {
#ifdef __AQ_BUILD_LAUNCHER__
return launcher(argc, argv);
#endif
puts("running");
// puts("running");
Context* cxt = new Context();
cxt->log("%d %s\n", argc, argv[1]);
// cxt->log("%d %s\n", argc, argv[1]);
#ifdef THREADING
auto tp = new ThreadPool();
cxt->thread_pool = tp;
#endif
#ifdef __AQ_THREADED_GC__
cxt->gc_thread = new std::thread(gc_thread, cxt);
#endif
const char* shmname;
if (argc < 0)
return dll_main(argc, argv, cxt);
else if (argc <= 1)
return test_main();
else
shmname = argv[1];
SharedMemory shm = SharedMemory(shmname);
@ -310,56 +484,3 @@ extern "C" int __DLLEXPORT__ main(int argc, char** argv) {
return 0;
}
#include "utils.h"
#include "table_ext_monetdb.hpp"
int test_main()
{
Context* cxt = new Context();
if (cxt->alt_server == 0)
cxt->alt_server = new Server(cxt);
Server* server = reinterpret_cast<Server*>(cxt->alt_server);
const char* qs[]= {
"QCREATE TABLE trade(stocksymbol INT, time INT, quantity INT, price INT);",
"QCOPY OFFSET 2 INTO trade FROM 'w:/gg/AQuery++/data/trade_numerical.csv' ON SERVER USING DELIMITERS ',';",
"QSELECT stocksymbol, (SUM((quantity * price)) / SUM(quantity)) AS weighted_average FROM trade GROUP BY stocksymbol ;",
"Pdll_5lYrMY",
"QSELECT stocksymbol, price FROM trade ORDER BY time ;",
"Pdll_4Sg6Ri",
"QSELECT stocksymbol, quantity, price FROM trade ORDER BY time ;",
"Pdll_5h4kL2",
"QSELECT stocksymbol, price FROM trade ORDER BY time ;",
"Pdll_7tEWCO",
"QSELECT query_c.weighted_moving_averages, query_c.stocksymbol FROM query_c;",
"Pdll_7FCPnF"
};
n_recv = sizeof(qs)/(sizeof (char*));
n_recvd = const_cast<char**>(qs);
void* handle = 0;
handle = dlopen("./dll.so", RTLD_LAZY);
cxt->init_session();
for (int i = 0; i < n_recv; ++i)
{
//printf("%s, %d\n", n_recvd[i], n_recvd[i][0] == 'Q');
switch (n_recvd[i][0]) {
case 'Q': // SQL query for monetdbe
{
server->exec(n_recvd[i] + 1);
printf("Exec Q%d: %s\n", i, n_recvd[i]);
}
break;
case 'P': // Postprocessing procedure
if (handle && !server->haserror()) {
code_snippet c = reinterpret_cast<code_snippet>(dlsym(handle, n_recvd[i] + 1));
c(cxt);
}
break;
}
}
n_recv = 0;
//static_assert(std::is_same_v<decltype(fill_integer_array<5, 1>()), std::integer_sequence<bool, 1,1,1,1,1>>, "");
return 0;
}

@ -9,6 +9,7 @@
#include <string>
#include <algorithm>
#include <cstdarg>
#include <vector>
#include "io.h"
#include "hasher.h"
@ -74,7 +75,16 @@ public:
this->container = (_Ty*)container;
this->name = name;
}
template<template <typename ...> class VT, typename T>
template<template <typename> class VT, typename T>
void initfrom(VT<T>&& v, const char* name = "") {
ty = types::Types<_Ty>::getType();
this->size = v.size;
this->capacity = v.capacity;
this->container = (_Ty*)(v.container);
this->name = name;
v.capacity = 0;
}
template<template <typename> class VT, typename T>
void initfrom(const VT<T>& v, const char* name = "") {
ty = types::Types<_Ty>::getType();
this->size = v.size;
@ -82,6 +92,21 @@ public:
this->container = (_Ty*)(v.container);
this->name = name;
}
void initfrom(vectortype_cstorage v, const char* name = "") {
ty = types::Types<_Ty>::getType();
this->size = v.size;
this->capacity = v.capacity;
this->container = (_Ty*)v.container;
this->name = name;
}
template<typename T>
void initfrom(const T& v, const char* name = "") {
ty = types::Types<_Ty>::getType();
this->size = 0;
this->capacity = 0;
this->emplace_back(v);
this->name = name;
}
template <class T>
ColRef<_Ty>& operator =(ColRef<T>&& vt) {
this->container = (_Ty*)vt.container;
@ -115,8 +140,16 @@ public:
ColView<_Ty> operator [](const vector_type<uint32_t>& idxs) const {
return ColView<_Ty>(*this, idxs);
}
void out(uint32_t n = 4, const char* sep = " ") const {
vector_type<_Ty> operator [](const std::vector<bool>& idxs) const {
vector_type<_Ty> ret (this->size);
uint32_t i = 0;
for(const auto& f : idxs){
if(f) ret.emplace_back(this->operator[](i));
++i;
}
return ret;
}
void out(uint32_t n = 1000, const char* sep = " ") const {
const char* more = "";
if (n < this->size)
more = " ... ";
@ -180,7 +213,7 @@ template<>
class ColRef<void> : public ColRef<int> {};
template<typename _Ty>
class ColView {
class ColView : public vector_base<_Ty> {
public:
typedef ColRef<_Ty> Decayed_t;
const uint32_t size;
@ -219,7 +252,7 @@ public:
Iterator_t end() const {
return Iterator_t(idxs.end(), orig);
}
void out(uint32_t n = 4, const char* sep = " ") const {
void out(uint32_t n = 1000, const char* sep = " ") const {
n = n > size ? size : n;
std::cout << '(';
for (uint32_t i = 0; i < n; ++i)
@ -414,19 +447,27 @@ struct TableInfo {
}
template <int ...cols>
void print2(const char* __restrict sep = ",", const char* __restrict end = "\n",
const vector_type<uint32_t>* __restrict view = nullptr, FILE* __restrict fp = nullptr) const {
const vector_type<uint32_t>* __restrict view = nullptr,
FILE* __restrict fp = nullptr, uint32_t limit = std::numeric_limits<uint32_t>::max()
) const {
std::string printf_string =
generate_printf_string<typename std::tuple_element<cols, tuple_type>::type ...>(sep, end);
// puts(printf_string.c_str());
std::string header_string = std::string();
constexpr static int a_cols[] = { cols... };
for (int i = 0; i < sizeof...(cols); ++i)
header_string += std::string(this->colrefs[a_cols[i]].name) + sep;
const size_t l_sep = strlen(sep);
if (header_string.size() - l_sep >= 0)
header_string.resize(header_string.size() - l_sep);
const auto& prt_loop = [&fp, &view, &printf_string, *this](const auto& f) {
if (fp == nullptr){
header_string = get_header_string(sep, end);
header_string.resize(header_string.size() - strlen(end));
}
else {
for (int i = 0; i < sizeof...(cols); ++i)
header_string += std::string(this->colrefs[a_cols[i]].name) + sep;
const size_t l_sep = strlen(sep);
if (header_string.size() - l_sep >= 0)
header_string.resize(header_string.size() - l_sep);
}
const auto& prt_loop = [&fp, &view, &printf_string, *this, &limit](const auto& f) {
#ifdef __AQ__HAS__INT128__
constexpr auto num_hge = count_type<__int128_t, __uint128_t>((tuple_type*)(0));
#else
@ -442,16 +483,21 @@ struct TableInfo {
+ 1 // padding for msvc not allowing empty arrays
];
setgbuf(cbuf);
if (view)
for (uint32_t i = 0; i < view->size; ++i) {
if (view){
uint32_t outsz = limit > view->size ? view->size : limit;
for (uint32_t i = 0; i < outsz; ++i) {
print2_impl<cols...>(f, (*view)[i], printf_string.c_str());
setgbuf();
}
else
for (uint32_t i = 0; i < colrefs[0].size; ++i) {
}
else{
uint32_t outsz = limit > colrefs[0].size ? colrefs[0].size : limit;
for (uint32_t i = 0; i < outsz; ++i) {
print2_impl<cols...>(f, i, printf_string.c_str());
setgbuf();
}
}
};
if (fp)
@ -466,15 +512,17 @@ struct TableInfo {
}
template <int ...vals> struct applier {
inline constexpr static void apply(const TableInfo<Types...>& t, const char* __restrict sep = ",", const char* __restrict end = "\n",
const vector_type<uint32_t>* __restrict view = nullptr, FILE* __restrict fp = nullptr)
const vector_type<uint32_t>* __restrict view = nullptr, FILE* __restrict fp = nullptr, uint32_t limit = std::numeric_limits<uint32_t>::max()
)
{
t.template print2<vals ...>(sep, end, view, fp);
t.template print2<vals ...>(sep, end, view, fp, limit);
}
};
inline void printall(const char* __restrict sep = ",", const char* __restrict end = "\n",
const vector_type<uint32_t>* __restrict view = nullptr, FILE* __restrict fp = nullptr) {
applyIntegerSequence<sizeof...(Types), applier>::apply(*this, sep, end, view, fp);
const vector_type<uint32_t>* __restrict view = nullptr, FILE* __restrict fp = nullptr,
uint32_t limit = std::numeric_limits<uint32_t>::max() ) const {
applyIntegerSequence<sizeof...(Types), applier>::apply(*this, sep, end, view, fp, limit);
}
TableInfo<Types...>* rename(const char* name) {
@ -643,7 +691,9 @@ template <class ...Types>
template <size_t j>
inline typename std::enable_if<j == sizeof...(Types) - 1, void>::type
TableInfo<Types ...>::print_impl(const uint32_t& i, const char* __restrict sep) const {
std::cout << (get<j>(*this))[i];
decltype(auto) t = (get<j>(*this))[i];
// print(t);
std::cout << t;
}
template<class ...Types>
@ -658,6 +708,7 @@ inline typename std::enable_if < j < sizeof...(Types) - 1, void>::type
template<class ...Types>
inline void TableInfo<Types...>::print(const char* __restrict sep, const char* __restrict end) const {
//printall(sep, end);
std::string header_string = get_header_string(sep, end);
std::cout << header_string.c_str();
@ -669,51 +720,56 @@ inline void TableInfo<Types...>::print(const char* __restrict sep, const char* _
std::cout << end;
}
}
// use std::is_base_of here and all vt classes should derive from vector_base
template <class T1,
template<typename> class VT,
class TRet>
using test_vt_support = typename std::enable_if_t<std::is_same_v<VT<T1>, ColRef<T1>> ||
std::is_same_v<VT<T1>, ColView<T1>> ||
std::is_same_v<VT<T1>, vector_type<T1>>, TRet>;
using test_vt_support = typename std::enable_if_t<
std::is_base_of_v<vector_base<T1>, VT<T1>>,
TRet>;
template <class T1, class T2,
template<typename> class VT>
using get_autoext_type = test_vt_support<T1, VT,
decayed_t<VT, typename types::Coercion<T1, T2>::type>>;
template <class T1, class T2,
template<typename> class VT>
using get_long_type = test_vt_support<T1, VT,
decayed_t<VT, types::GetLongType<typename types::Coercion<T1, T2>::type>>>;
template <class T1, class T2, template<typename> class VT,
test_vt_support<T1, VT, void>* = nullptr>
using get_autoext_type =
decayed_t<VT, typename types::Coercion<T1, T2>::type>;
template <class T1, class T2,
template<typename> class VT>
using get_fp_type = test_vt_support<T1, VT,
decayed_t<VT, types::GetFPType<typename types::Coercion<T1, T2>::type>>>;
template <class T1, class T2, template<typename> class VT,
test_vt_support<T1, VT, void>* = nullptr>
using get_long_type =
decayed_t<VT, types::GetLongType<typename types::Coercion<T1, T2>::type>>;
template <class T1, class T2, template<typename> class VT,
test_vt_support<T1, VT, void>* = nullptr>
using get_fp_type =
decayed_t<VT, types::GetFPType<typename types::Coercion<T1, T2>::type>>;
template <class T1,
template<typename> class VT, template<typename> class VT2,
class TRet>
using test_vt_support2 = typename std::enable_if_t<(std::is_same_v<VT<T1>, ColRef<T1>> ||
std::is_same_v<VT<T1>, ColView<T1>> ||
std::is_same_v<VT<T1>, vector_type<T1>>) &&
(std::is_same_v<VT2<T1>, ColRef<T1>> ||
std::is_same_v<VT2<T1>, ColView<T1>> ||
std::is_same_v<VT2<T1>, vector_type<T1>>), TRet >;
using test_vt_support2 = typename std::enable_if_t<
std::is_base_of_v<vector_base<T1>, VT<T1>> &&
std::is_base_of_v<vector_base<T1>, VT2<T1>>,
TRet >;
template <class T1, class T2,
template<typename> class VT, template<typename> class VT2>
using get_autoext_type2 = test_vt_support2<T1, VT, VT2,
decayed_t<VT, typename types::Coercion<T1, T2>::type>>;
template<typename> class VT, template<typename> class VT2,
test_vt_support2<T1, VT, VT2, void>* = nullptr >
using get_autoext_type2 =
decayed_t<VT, typename types::Coercion<T1, T2>::type>;
template <class T1, class T2,
template<typename> class VT, template<typename> class VT2>
using get_long_type2 = test_vt_support2<T1, VT, VT2,
decayed_t<VT, types::GetLongType<typename types::Coercion<T1, T2>::type>>>;
template<typename> class VT, template<typename> class VT2,
test_vt_support2<T1, VT, VT2, void>* = nullptr >
using get_long_type2 =
decayed_t<VT, types::GetLongType<typename types::Coercion<T1, T2>::type>>;
template <class T1, class T2,
template<typename> class VT, template<typename> class VT2>
using get_fp_type2 = test_vt_support2<T1, VT, VT2,
decayed_t<VT, types::GetFPType<typename types::Coercion<T1, T2>::type>>>;
template<typename> class VT, template<typename> class VT2,
test_vt_support2<T1, VT, VT2, void>* = nullptr >
using get_fp_type2 =
decayed_t<VT, types::GetFPType<typename types::Coercion<T1, T2>::type>>;
template <class T1, class T2, template<typename> class VT, template<typename> class VT2>
get_autoext_type2<T1, T2, VT, VT2>
@ -835,7 +891,6 @@ VT<bool> operator >(const T2& lhs, const VT<T1>& rhs) {
}
template <class ...Types>
void print(const TableInfo<Types...>& v, const char* delimiter = " ", const char* endline = "\n") {
v.print(delimiter, endline);

@ -45,16 +45,16 @@ void TableInfo<Ts ...>::monetdb_append_table(void* srv, const char* alt_name) {
puts("getcols...");
uint32_t cnt = 0;
const auto get_col = [&monetdbe_cols, &i, *this, &gc_vecs, &cnt](auto v) {
printf("%d %d\n", i, (ColRef<void>*)v - colrefs);
// printf("%d %d\n", i, (ColRef<void>*)v - colrefs);
monetdbe_cols[i++] = (monetdbe_column*)v->monetdb_get_col(gc_vecs, cnt);
};
(get_col((ColRef<Ts>*)(colrefs + i)), ...);
puts("getcols done");
for(int i = 0; i < sizeof...(Ts); ++i)
{
printf("no:%d name: %s count:%d data: %p type:%d \n",
i, monetdbe_cols[i]->name, monetdbe_cols[i]->count, monetdbe_cols[i]->data, monetdbe_cols[i]->type);
}
// for(int i = 0; i < sizeof...(Ts); ++i)
// {
// printf("no:%d name: %s count:%d data: %p type:%d \n",
// i, monetdbe_cols[i]->name, monetdbe_cols[i]->count, monetdbe_cols[i]->data, monetdbe_cols[i]->type);
// }
std::string create_table_str = "CREATE TABLE IF NOT EXISTS ";
create_table_str += alt_name;
create_table_str += " (";

@ -29,27 +29,37 @@ inline constexpr size_t aq_szof<void> = 0;
template <class T1, class T2>
struct aqis_same_impl {
constexpr static bool value =
std::conditional_t<
std::is_signed_v<T1> == std::is_signed_v<T2>,
std::is_same_v<T1, bool> || std::is_same_v<T2, bool>,
Cond(
(std::is_same_v<T1, bool> && std::is_same_v<T2, bool>),
std::true_type,
std::false_type
),
Cond(
std::is_floating_point_v<T1> == std::is_floating_point_v<T2>,
std::is_signed_v<T1> == std::is_signed_v<T2>,
Cond(
aq_szof<T1> == aq_szof<T2>, // deal with sizeof(void)
std::true_type,
std::is_floating_point_v<T1> == std::is_floating_point_v<T2>,
Cond(
aq_szof<T1> == aq_szof<T2>, // deal with sizeof(void)
std::true_type,
std::false_type
),
std::false_type
),
std::false_type
),
std::false_type
)
>::value;
};
// make sure size_t/ptr_t and the corresponding integer types are the same
template <class T1, class T2, class ...Ts>
constexpr bool aqis_same = aqis_same_impl<T1, T2>::value &&
aqis_same<T2, Ts...>;
template <class T1, class T2>
constexpr bool aqis_same<T1, T2> = aqis_same_impl<T1, T2>::value;
namespace types {
enum Type_t {
AINT32, AFLOAT, ASTR, ADOUBLE, ALDOUBLE, AINT64, AINT128, AINT16, ADATE, ATIME, AINT8,

@ -1,14 +1,18 @@
#pragma once
#include <ctime>
#include <type_traits>
#include <string>
#if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L)
constexpr static bool cpp_17 = true;
#else
constexpr static bool cpp_17 = false;
#endif
template <class T>
inline const char* str(const T& v) {
return "";
}
#include<string>
extern std::string base62uuid(int l = 6);

@ -17,13 +17,16 @@
#include "types.h"
#pragma pack(push, 1)
template<class T>
struct vector_base {};
struct vectortype_cstorage{
void* container;
unsigned int size, capacity;
};
template <typename _Ty>
class vector_type {
class vector_type : public vector_base<_Ty>{
public:
typedef vector_type<_Ty> Decayed_t;
void inline _copy(const vector_type<_Ty>& vt) {
@ -71,9 +74,15 @@ public:
constexpr explicit vector_type(const vector_type<_Ty>& vt) noexcept : capacity(0) {
_copy(vt);
}
constexpr vector_type(vector_type<_Ty>& vt) noexcept : capacity(0) {
_move(std::move(vt));
}
constexpr vector_type(vector_type<_Ty>&& vt) noexcept : capacity(0) {
_move(std::move(vt));
}
vector_type(vectortype_cstorage vt) noexcept : capacity(vt.capacity), size(vt.size), container((_Ty*)vt.container) {
out(10);
};
// size >= capacity ==> readonly vector
constexpr vector_type(const uint32_t size, void* data) :
size(size), capacity(0), container(static_cast<_Ty*>(data)) {}
@ -159,6 +168,10 @@ public:
grow();
container[size++] = _val;
}
void emplace_back(_Ty& _val) {
grow();
container[size++] = std::move(_val);
}
void emplace_back(_Ty&& _val) {
grow();
container[size++] = std::move(_val);
@ -255,7 +268,7 @@ public:
}
size = this->size + dist;
}
inline void out(uint32_t n = 4, const char* sep = " ") const
inline void out(uint32_t n = 4000, const char* sep = " ") const
{
const char* more = "";
if (n < this->size)

@ -41,4 +41,20 @@ void SharedMemory::FreeMemoryMap()
if (this->hFileMap)
CloseHandle(this->hFileMap);
}
#ifndef __USE_STD_SEMAPHORE__
A_Semaphore::A_Semaphore(bool v = false) {
native_handle = CreateSemaphore(NULL, v, 1, NULL);
}
void A_Semaphore::acquire() {
WaitForSingleObject(native_handle, INFINITE);
}
void A_Semaphore::release() {
ReleaseSemaphore(native_handle, 1, NULL);
}
A_Semaphore::~A_Semaphore() {
CloseHandle(native_handle);
}
#endif
#endif

@ -14,5 +14,17 @@ struct SharedMemory
SharedMemory(const char*);
void FreeMemoryMap();
};
#ifndef __USE_STD_SEMAPHORE__
class A_Semaphore {
private:
void* native_handle;
public:
A_Semaphore();
void acquire();
void release();
~A_Semaphore();
};
#endif
#endif

@ -0,0 +1,3 @@
create table f (a float, b vecfloat, c int)
load complex data infile 'data/test_complex.csv' into table f fields terminated by ',' element terminated by ';'
select * from f

@ -18,7 +18,7 @@
//
///////////////////////////////////////////////////////////////////////////////
#include <stdio.h>
#include "Time.H"
#include "Time.hpp"
Time::Time(char *startTime_)
{

@ -1,21 +1,21 @@
LOAD MODULE FROM "./libirf.so"
FUNCTIONS (
newtree(height:int, f:int64, sparse:vecint, forget:double, maxf:int64, noclasses:int64, e:int, r:int64, rb:int64) -> bool,
additem(X:vecdouble, y:int64, size:int64) -> bool,
fit() -> bool,
predict() -> vecint
);
create table tb(x int);
create table tb2(x double, y double, z double);
insert into tb values (0);
insert into tb values (0);
insert into tb values (0);
select newtree(5, 3, tb.x, 0, 3, 2, 0, 100, 1) from tb;
insert into tb2 values (1, 0, 1);
insert into tb2 values (0, 1, 1);
insert into tb2 values (1, 1, 1);
select additem(tb2.x, 1, 3) from tb2;
select additem(tb2.y, 0, -1) from tb2;
select additem(tb2.z, 1, -1) from tb2;
select fit();
select predict();
FUNCTIONS (
newtree(height:int, f:int64, sparse:vecint, forget:double, maxf:int64, noclasses:int64, e:int, r:int64, rb:int64) -> bool,
additem(X:vecdouble, y:int64, size:int64) -> bool,
fit() -> bool,
predict() -> vecint
);
create table tb(x int);
create table tb2(x double, y double, z double);
insert into tb values (0);
insert into tb values (0);
insert into tb values (0);
select newtree(5, 3, tb.x, 0, 3, 2, 0, 100, 1) from tb;
insert into tb2 values (1, 0, 1);
insert into tb2 values (0, 1, 1);
insert into tb2 values (1, 1, 1);
select additem(tb2.x, 1, 3) from tb2;
select additem(tb2.y, 0, -1) from tb2;
select additem(tb2.z, 1, -1) from tb2;
select fit();
select predict();

@ -0,0 +1,22 @@
LOAD MODULE FROM "./libirf.so"
FUNCTIONS (
newtree(height:int, f:int64, sparse:vecint, forget:double, maxf:int64, noclasses:int64, e:int, r:int64, rb:int64) -> bool,
fit(X:vecvecdouble, y:vecint64) -> bool,
predict(X:vecvecdouble) -> vecint
);
create table source(x1 double, x2 double, x3 double, x4 double, x5 int64);
load data infile "data/benchmark" into table source fields terminated by ",";
create table sparse(x int);
insert into sparse values (1);
insert into sparse values (1);
insert into sparse values (1);
insert into sparse values (1);
select newtree(6, 4, sparse.x, 0, 4, 2, 0, 400, 2147483647) from sparse
select fit(pack(x1, x2, x3, x4), x5) from source
-- select pack(x1, x2, x3, x4) from source
select predict(pack(x1, x2, x3, x4)) from source

@ -19,7 +19,7 @@ LOAD DATA INFILE "data/test.csv"
INTO TABLE test1
FIELDS TERMINATED BY ","
SELECT pairCorr(c, b) * d, sum(a), b
SELECT pairCorr(c, b) * d, a, sum(b)
FROM test1
group by c,b,d
group by a
order by b ASC

@ -0,0 +1,31 @@
CREATE TABLE t(indiv INT, grp STRING, val INT)
INSERT INTO t VALUES(1, 'A', 1)
INSERT INTO t VALUES(1, 'A', 2)
INSERT INTO t VALUES(1, 'A', 3)
INSERT INTO t VALUES(1, 'A', 4)
INSERT INTO t VALUES(2, 'A', 2)
INSERT INTO t VALUES(2, 'A', 2)
INSERT INTO t VALUES(2, 'A', 4)
INSERT INTO t VALUES(2, 'A', 8)
INSERT INTO t VALUES(3, 'B', 10)
INSERT INTO t VALUES(3, 'B', 20)
INSERT INTO t VALUES(3, 'B', 30)
INSERT INTO t VALUES(3, 'B', 40)
INSERT INTO t VALUES(4, 'B', 20)
INSERT INTO t VALUES(4, 'B', 20)
INSERT INTO t VALUES(4, 'B', 40)
INSERT INTO t VALUES(4, 'B', 80)
SELECT * FROM t
FUNCTION myCov(x, y) {
center_x := x - avg(x);
center_y := y - avg(y);
num := sum(center_x * center_y);
denom := sqrt(sum(center_x * center_x)) * sqrt(sum(center_y * center_y));
num / denom
}
select myCov(1,2);

@ -7,4 +7,4 @@ FIELDS TERMINATED BY ","
SELECT sum(c), b, d
FROM testq1
group by a,b,d
order by d DESC, b ASC
order by d DESC, b ASC;

@ -17,4 +17,23 @@ LOAD DATA INFILE "data/ticks.csv" INTO TABLE TICKS FIELDS TERMINATED BY ","
SELECT max(endofdayprice/prev(endofdayprice)) as Max_Ratio
FROM ticks
ASSUMING ASC date
WHERE ID = "3001"
WHERE ID = "3001"
CREATE TABLE ticks2(ID VARCHAR(20), max REAL, min REAL)
INSERT INTO ticks2 SELECT ID AS ID, max(ratios(endofdayprice)) AS max, min(ratios(endofdayprice)) AS min from ticks group by ID;
SELECT ID, max, min
FROM ticks2;
CREATE TABLE my_table (c1 INT, c2 INT, c3 STRING)
INSERT INTO my_table VALUES(10, 20, "example")
select * from my_table;
INSERT INTO my_table SELECT * FROM my_table
select * from my_table;
SELECT c1, c2 as twice_c2 FROM my_table;
CREATE TABLE my_table_derived
AS
SELECT c1, c2 as twice_c2 FROM my_table;
SELECT * FROM my_table_derived;

@ -0,0 +1,9 @@
CREATE TABLE my_table (c1 INT, c2 INT, c3 STRING)
INSERT INTO my_table VALUES(10, 20, "example"), (20, 30, "example2")
<sql>
INSERT INTO my_table VALUES(14, 24, 'example3');
CREATE INDEX idx1 ON my_table(c1);
SELECT * FROM my_table WHERE c1 < 15;
</sql>
SELECT * FROM my_table WHERE c1 > 15
Loading…
Cancel
Save