Compare commits

...

51 Commits
dev ... master

Author SHA1 Message Date
billsun 91a1cc80cd update
1 year ago
bill 52afa95e94 improved caching and hashing
1 year ago
bill 0815222e96 hashtable optimize, aggresive SIMD via openmp
1 year ago
bill f9205dc2a6 General Hashtable optimization
2 years ago
bill d0f0b4fc56 update
2 years ago
Bill dfb3ec2380 update
2 years ago
Bill 6f267f982d added corr
2 years ago
Bill 27ae26db90 bugfix for duckdb engine, multi-engine support, perf regression investigate
2 years ago
Bill 7c5440c4fb make ext_engine: duckdb to work
2 years ago
bill 200dc71aad initial support for duckdb
2 years ago
bill d98b4817b3 update structure, vis application
2 years ago
Bill 726ef535ea Added documentation for trigger demo
2 years ago
Bill c944b5dfcf finialize demo
2 years ago
Bill 05cca378e0 trigger demo
2 years ago
Bill 4333af07f2 trigger type 2
2 years ago
Bill 84105347fc trigger type 2
2 years ago
Bill 64d4e3dd9a triggers
2 years ago
Bill d71fc77006 Merge branch 'master' of https://git.billsun.dev/bill/AQuery
2 years ago
Bill acc610280e triggers
2 years ago
Bill 541c702d78 Merge branch 'master' of https://git.billsun.dev/bill/AQuery
2 years ago
Bill 1d1b392435 fix windows/msvc build
2 years ago
Bill c5bf4c46e4 add paper
2 years ago
Bill b60bfc478d fix docker
2 years ago
Bill 906daf577b Interval based triggers
2 years ago
bill 64b2ec4d8f Merge branch 'master' of https://git.billsun.dev/bill/AQuery
2 years ago
Bill cf8185c5f0 bug fix
2 years ago
Bill 778703946d fixed select into/create table as
2 years ago
bill da5b5065e6 Merge branch 'master' of https://git.billsun.dev/bill/AQuery
2 years ago
bill 96bd11462d commit local
2 years ago
Bill a91ab1841d scratch space
2 years ago
Bill e588e4b0dc improved scratch space
2 years ago
Bill 540672cdc4 updated code generation for compound-columns, initial support for scratchspace
2 years ago
Bill aaef489029 Merge branch 'master' of https://github.com/sunyinqi0508/AQuery
2 years ago
Bill aee803adce group by optimization
2 years ago
Bill 181abacc55 Merge branch 'master' of https://git.billsun.dev/bill/AQuery
2 years ago
Bill eebf507c6a WIP: group by optimizations
2 years ago
bill 529c5cb6a8 draft: new group by, avoid small allocation
2 years ago
Bill 4942dc1f50 fixes on stored proc
2 years ago
sunyinqi0508 a59717ab65
[skip CI] Merge pull request #1 from sunyinqi0508/benchmark
2 years ago
taozizhuo 6adb7900cb add benchmark queries
2 years ago
Bill da901ee7fa Merge branch 'master' of https://git.billsun.dev/bill/AQuery
2 years ago
bill 853b129343 Fix bugs on stored procedures and threaded GC
2 years ago
bill 957e53eb70 Merge branch 'master' of https://git.billsun.dev/bill/AQuery
2 years ago
bill 80bc0a1e22 modified: server/vector_type.hpp
2 years ago
Bill 5ba333ca20 test gc
2 years ago
Bill 9c2bac3ec1 Merge branch 'master' of https://git.billsun.dev/bill/AQuery
2 years ago
Bill d6e3e4878e Optimized hashtable performance; Stored procedures
2 years ago
root f32cf317ad Merge branch 'master' of https://git.billsun.dev/bill/AQuery
2 years ago
root 31dfaf30be fixed msvc buildwq
2 years ago
Bill eb0fe8857a fixed create as union/except, distinct
2 years ago
Bill 9caa1fa82a fix 'ascii' codec can't encode characters
2 years ago

13
.gitignore vendored

@ -1,3 +1,4 @@
duckdb.dll
*.swp *.swp
tests/datagen_jose/histgen tests/datagen_jose/histgen
tests/datagen_jose/tickgen tests/datagen_jose/tickgen
@ -59,6 +60,10 @@ data/benchmark
!nyctx100.csv !nyctx100.csv
!network.csv !network.csv
!test_complex.csv !test_complex.csv
data/electricity*
data/covtype*
data/phishing*
data/power*
*.out *.out
*.asm *.asm
!mmw.so !mmw.so
@ -81,5 +86,9 @@ saves
out*.cpp out*.cpp
udf*.hpp udf*.hpp
*.ipynb *.ipynb
saved_procedures/**
procedures/**
.mypy_cache
__pycache__
deps/**
*.bsc

3
.gitmodules vendored

@ -0,0 +1,3 @@
[submodule "docs/paper"]
path = docs/paper
url = https://github.com/sunyinqi0508/AQueryPaper

@ -10,7 +10,7 @@ RUN export OS_VER=`cat /etc/os-release | grep VERSION_CODENAME` &&\
RUN wget --output-document=/etc/apt/trusted.gpg.d/monetdb.gpg https://dev.monetdb.org/downloads/MonetDB-GPG-KEY.gpg RUN wget --output-document=/etc/apt/trusted.gpg.d/monetdb.gpg https://dev.monetdb.org/downloads/MonetDB-GPG-KEY.gpg
RUN apt update && apt install -y python3 python3-pip clang-14 libmonetdbe-dev git RUN apt update && apt install -y python3 python3-pip clang-14 libmonetdbe-dev libmonetdb-client-dev monetdb5-sql-dev git
RUN git clone https://github.com/sunyinqi0508/AQuery2 RUN git clone https://github.com/sunyinqi0508/AQuery2

@ -2,26 +2,28 @@ OS_SUPPORT =
MonetDB_LIB = MonetDB_LIB =
MonetDB_INC = MonetDB_INC =
Defines = Defines =
CXXFLAGS = --std=c++1z CC = $(CXX) -xc
ifeq ($(AQ_DEBUG), 1) CXXFLAGS = --std=c++2a
OPTFLAGS = -g3 -fsanitize=address -fsanitize=leak
LINKFLAGS = ifdef AQ_LINKER
else CXX += -fuse-ld=$(AQ_LINKER)
OPTFLAGS = -O3 -DNDEBUG -fno-stack-protector
LINKFLAGS = -flto -s
endif endif
SHAREDFLAGS = -shared SHAREDFLAGS = -shared
FPIC = -fPIC FPIC = -fPIC
_COMPILER = $(shell $(CXX) --version | grep -q clang && echo clang|| echo gcc) _COMPILER = $(shell $(CXX) --version | grep -q clang && echo clang|| echo gcc)
COMPILER = $(strip $(_COMPILER)) COMPILER = $(strip $(_COMPILER))
LIBTOOL = ar rcs LIBTOOL = ar rcs
USELIB_FLAG = -Wl,--whole-archive,libaquery.a -Wl,-no-whole-archive USELIB_FLAG = -Wl,--whole-archive,libaquery.a -Wl,-no-whole-archive
LIBAQ_SRC = server/monetdb_conn.cpp server/libaquery.cpp LIBAQ_SRC = server/monetdb_conn.cpp server/duckdb_conn.cpp server/libaquery.cpp
LIBAQ_OBJ = monetdb_conn.o libaquery.o LIBAQ_OBJ = monetdb_conn.o duckdb_conn.o libaquery.o monetdb_ext.o
SEMANTIC_INTERPOSITION = -fno-semantic-interposition SEMANTIC_INTERPOSITION = -fno-semantic-interposition
RANLIB = ranlib RANLIB = ranlib
_LINKER_BINARY = $(shell `$(CXX) -print-prog-name=ld` -v 2>&1 | grep -q LLVM && echo lld || echo ld) _LINKER_BINARY = $(shell `$(CXX) -print-prog-name=ld` -v 2>&1 | grep -q LLVM && echo lld || echo ld)
LINKER_BINARY = $(strip $(_LINKER_BINARY)) LINKER_BINARY = $(strip $(_LINKER_BINARY))
DuckDB_LIB = -Ldeps -lduckdb
DuckDB_INC = -Ideps
ifeq ($(LINKER_BINARY), ld) ifeq ($(LINKER_BINARY), ld)
LINKER_FLAGS = -Wl,--allow-multiple-definition LINKER_FLAGS = -Wl,--allow-multiple-definition
else else
@ -43,7 +45,7 @@ else
LIBTOOL = gcc-ar rcs LIBTOOL = gcc-ar rcs
endif endif
endif endif
OPTFLAGS += $(SEMANTIC_INTERPOSITION) LINKFLAGS = $(SEMANTIC_INTERPOSITION)
ifeq ($(PCH), 1) ifeq ($(PCH), 1)
PCHFLAGS = -include server/pch.hpp PCHFLAGS = -include server/pch.hpp
@ -57,6 +59,7 @@ ifeq ($(OS),Windows_NT)
LIBAQ_OBJ += winhelper.o LIBAQ_OBJ += winhelper.o
MonetDB_LIB += msc-plugin/monetdbe.dll MonetDB_LIB += msc-plugin/monetdbe.dll
MonetDB_INC += -Imonetdb/msvc MonetDB_INC += -Imonetdb/msvc
LIBTOOL = gcc-ar rcs LIBTOOL = gcc-ar rcs
ifeq ($(COMPILER), clang) ifeq ($(COMPILER), clang)
FPIC = FPIC =
@ -74,15 +77,24 @@ else
LIBTOOL = libtool -static -o LIBTOOL = libtool -static -o
endif endif
ifneq ($(UNAME_M),arm64) ifneq ($(UNAME_M),arm64)
OPTFLAGS += -march=native OPTFLAGS = -march=native
endif endif
else else
OPTFLAGS += -march=native OPTFLAGS = -march=native
MonetDB_LIB += $(AQ_MONETDB_LIB) MonetDB_LIB += $(AQ_MONETDB_LIB)
MonetDB_INC += $(AQ_MONETDB_INC) MonetDB_INC += $(AQ_MONETDB_INC)
MonetDB_INC += -I/usr/local/include/monetdb -I/usr/include/monetdb MonetDB_INC += -I/usr/local/include/monetdb -I/usr/include/monetdb
endif endif
MonetDB_LIB += -lmonetdbe MonetDB_LIB += -lmonetdbe -lmonetdbsql -lbat
endif
ifeq ($(AQ_DEBUG), 1)
OPTFLAGS = -g3 #-static-libsan -fsanitize=address
# LINKFLAGS =
else
OPTFLAGS += -Ofast -DNDEBUG -fno-stack-protector -fopenmp
LINKFLAGS += -flto -s
endif endif
ifeq ($(THREADING),1) ifeq ($(THREADING),1)
@ -95,8 +107,8 @@ ifeq ($(AQUERY_ITC_USE_SEMPH), 1)
Defines += -D__AQUERY_ITC_USE_SEMPH__ Defines += -D__AQUERY_ITC_USE_SEMPH__
endif endif
CXXFLAGS += $(OPTFLAGS) $(Defines) $(MonetDB_INC) CXXFLAGS += $(OPTFLAGS) $(Defines) $(MonetDB_INC) $(DuckDB_INC)
BINARYFLAGS = $(CXXFLAGS) $(LINKFLAGS) $(MonetDB_LIB) BINARYFLAGS = $(CXXFLAGS) $(LINKFLAGS) $(MonetDB_LIB) $(DuckDB_LIB)
SHAREDFLAGS += $(FPIC) $(BINARYFLAGS) SHAREDFLAGS += $(FPIC) $(BINARYFLAGS)
info: info:
@ -128,6 +140,7 @@ pch:
$(CXX) -x c++-header server/pch.hpp $(FPIC) $(CXXFLAGS) $(CXX) -x c++-header server/pch.hpp $(FPIC) $(CXXFLAGS)
libaquery: libaquery:
$(CXX) -c $(FPIC) $(PCHFLAGS) $(LIBAQ_SRC) $(OS_SUPPORT) $(CXXFLAGS) &&\ $(CXX) -c $(FPIC) $(PCHFLAGS) $(LIBAQ_SRC) $(OS_SUPPORT) $(CXXFLAGS) &&\
$(CC) -c $(FPIC) server/monetdb_ext.c $(OPTFLAGS) $(MonetDB_INC) &&\
$(LIBTOOL) libaquery.a $(LIBAQ_OBJ) &&\ $(LIBTOOL) libaquery.a $(LIBAQ_OBJ) &&\
$(RANLIB) libaquery.a $(RANLIB) libaquery.a
@ -152,6 +165,7 @@ docker:
docker build -t aquery . docker build -t aquery .
clean: clean:
rm .cached *.shm *.o dll.so server.so server.bin libaquery.a libaquery.lib -rf 2> $(NULL_DEVICE) || true rm .cached *.shm *.o dll.so server.so server.bin libaquery.a libaquery.lib -rf 2> $(NULL_DEVICE) || true; \
rm -rf *.dSYM || true
.PHONY: clean .PHONY: clean

@ -1,31 +1,23 @@
# AQuery++ Database # AQuery++ Database
## News:
### Please try the latest code in dev branch if you encounter any problem. Use `git checkout dev` to switch branches. **Demo workflow for Triggers now available** See [**DEMO**](/demo/README.md)
## Introduction ## Introduction
AQuery++ Database is a cross-platform, In-Memory Column-Store Database that incorporates compiled query execution. (**Note**: If you encounter any problems, feel free to contact me via ys3540@nyu.edu) AQuery++ Database is a cross-platform, In-Memory Column-Store Database that incorporates compiled query execution. (**Note**: If you encounter any problems, feel free to contact me via ys3540@nyu.edu)
# Architecture
![Architecture](./docs/arch-hybrid.svg)
## Docker (Recommended): ## AQuery Compiler
- See installation instructions from [docker.com](https://www.docker.com). Run **docker desktop** to start docker engine. - The query is first processed by the AQuery Compiler which is composed of a frontend that parses the query into AST and a backend that generates target code that delivers the query.
- In AQuery root directory, type `make docker` to build the docker image from scratch. - Front end of AQuery++ Compiler is built on top of [mo-sql-parsing](https://github.com/klahnakoski/mo-sql-parsing) with modifications to handle AQuery dialect and extension.
- For Arm-based Mac users, you would have to build and run the **x86_64** docker image because MonetDB doesn't offer official binaries for arm64 Linux. (Run `docker buildx build --platform=linux/amd64 -t aquery .` instead of `make docker`) - Backend of AQuery++ Compiler generates target code dependent on the Execution Engine. It can either be the C++ code for AQuery Execution Engine or sql and C++ post-processor for Hybrid Engine or k9 for the k9 Engine.
- Finally run the image in **interactive** mode (`docker run --name aquery -it aquery`) ## Execution Engines
- When you need to access the container again run `docker start -ai aquery` - AQuery++ supports different execution engines thanks to the decoupled compiler structure.
- If there is a need to access the system shell within AQuery, type `dbg` to activate python interpreter and type `os.system('sh')` to launch a shell. - Hybrid Execution Engine: decouples the query into two parts. The sql-compliant part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed.
- Docker image is available on [Docker Hub](https://hub.docker.com/repository/docker/sunyinqi0508/aquery) but building image yourself is highly recommended (see [#2](../../issues/2)) - AQuery Library: Consists of a pre-compiled static library and a set of headers with templated methods that provide column arithmetic, operations and relational algebra inspired by array programming languages like kdb. This library is used by C++ post-processor code which can significantly reduce the complexity of generated code, reducing compile time while maintaining the best performance. The set of libraries can also be used by UDFs as well as User modules which makes it easier for users to write simple, efficient yet powerful extensions.
## CIMS Computer Lab (Only for NYU affiliates who have access)
1. Clone this git repo in CIMS.
2. Download the [patch](https://drive.google.com/file/d/1YkykhM6u0acZ-btQb4EUn4jAEXPT81cN/view?usp=sharing)
3. Decompress the patch to any directory and execute script inside by typing (`source ./cims.sh`). Please use the source command or `. ./cims.sh` (dot space) to execute the script because it contains configurations for environment variables. Also note that this script can only work with bash and compatible shells (e.g. dash, zsh. but not csh)
4. Execute `python3 ./prompt.py`
## Singularity Container # Installation:
1. build container `singularity build aquery.sif aquery.def`
2. execute container `singularity exec aquery.sif sh`
3. run AQuery `python3 ./prompt.py`
# Native Installation:
## Requirements ## Requirements
1. Recent version of Linux, Windows or MacOS, with recent C++ compiler that has C++17 (1z) support. (however c++20 is recommended if available for heterogeneous lookup on unordered containers) 1. Recent version of Linux, Windows or MacOS, with recent C++ compiler that has C++17 (1z) support. (however c++20 is recommended if available for heterogeneous lookup on unordered containers)
- GCC: 9.0 or above (g++ 7.x, 8.x fail to handle fold-expressions due to a compiler bug) - GCC: 9.0 or above (g++ 7.x, 8.x fail to handle fold-expressions due to a compiler bug)
@ -38,10 +30,6 @@ AQuery++ Database is a cross-platform, In-Memory Column-Store Database that inco
- On MacOS, Monetdb can be easily installed in homebrew `brew install monetdb`. - On MacOS, Monetdb can be easily installed in homebrew `brew install monetdb`.
3. Python 3.6 or above and install required packages in requirements.txt by `python3 -m pip install -r requirements.txt` 3. Python 3.6 or above and install required packages in requirements.txt by `python3 -m pip install -r requirements.txt`
## Installation
AQuery is tested on mainstream operating systems such as Windows, macOS and Linux
### Windows ### Windows
There're multiple options to run AQuery on Windows. But for better consistency I recommend using a simulated Linux environment such as **Windows Subsystem for Linux** (1 or 2), **Docker** or **Linux Virtual Machines**. You can also use the native toolchain from Microsoft Visual Studio or gcc from Winlabs/Cygwin/MinGW. There're multiple options to run AQuery on Windows. But for better consistency I recommend using a simulated Linux environment such as **Windows Subsystem for Linux** (1 or 2), **Docker** or **Linux Virtual Machines**. You can also use the native toolchain from Microsoft Visual Studio or gcc from Winlabs/Cygwin/MinGW.
@ -97,7 +85,24 @@ There're multiple options to run AQuery on Windows. But for better consistency I
In this case, upgrade anaconda or your compiler or use the python from your OS or package manager instead. Or (**NOT recommended**) copy/link the library from your system (e.g. /usr/lib/x86_64-linux-gnu/libstdc++.so.6) to anaconda's library directory (e.g. ~/Anaconda3/lib/). In this case, upgrade anaconda or your compiler or use the python from your OS or package manager instead. Or (**NOT recommended**) copy/link the library from your system (e.g. /usr/lib/x86_64-linux-gnu/libstdc++.so.6) to anaconda's library directory (e.g. ~/Anaconda3/lib/).
## Docker (Recommended):
- See installation instructions from [docker.com](https://www.docker.com). Run **docker desktop** to start docker engine.
- In AQuery root directory, type `make docker` to build the docker image from scratch.
- For Arm-based Mac users, you would have to build and run the **x86_64** docker image because MonetDB doesn't offer official binaries for arm64 Linux. (Run `docker buildx build --platform=linux/amd64 -t aquery .` instead of `make docker`)
- Finally run the image in **interactive** mode (`docker run --name aquery -it aquery`)
- When you need to access the container again run `docker start -ai aquery`
- If there is a need to access the system shell within AQuery, type `dbg` to activate python interpreter and type `os.system('sh')` to launch a shell.
- Docker image is available on [Docker Hub](https://hub.docker.com/repository/docker/sunyinqi0508/aquery) but building image yourself is highly recommended (see [#2](../../issues/2))
## CIMS Computer Lab (Only for NYU affiliates who have access)
1. Clone this git repo in CIMS.
2. Download the [patch](https://drive.google.com/file/d/1YkykhM6u0acZ-btQb4EUn4jAEXPT81cN/view?usp=sharing)
3. Decompress the patch to any directory and execute script inside by typing (`source ./cims.sh`). Please use the source command or `. ./cims.sh` (dot space) to execute the script because it contains configurations for environment variables. Also note that this script can only work with bash and compatible shells (e.g. dash, zsh. but not csh)
4. Execute `python3 ./prompt.py`
## Singularity Container
1. build container `singularity build aquery.sif aquery.def`
2. execute container `singularity exec aquery.sif sh`
3. run AQuery `python3 ./prompt.py`
# Usage # Usage
`python3 prompt.py` will launch the interactive command prompt. The server binary will be automatically rebuilt and started. `python3 prompt.py` will launch the interactive command prompt. The server binary will be automatically rebuilt and started.
### Commands: ### Commands:
@ -169,7 +174,11 @@ save: query INTO OUTFILE string FIELDS TERMINATED BY string
udf: FUNCTION ID '(' arg-list ')' '{' fun-body '}' udf: FUNCTION ID '(' arg-list ')' '{' fun-body '}'
arg_list: ID (, ID)* arg_list: ID (, ID)*
fun_body: [stmts] expr fun_body: [stmts] expr
/********* See more udf grammar later. **********/
/********* Triggers **********/
create: CREATE TRIGGER ID [ ACTION ID INTERVAL num | ON ID ACTION ID WHEN ID ]
drop: DROP TRIGGER ID
stmts: stmt+ stmts: stmt+
stmt: assignment; | if-stmt | for-stmt | ; stmt: assignment; | if-stmt | for-stmt | ;
@ -268,17 +277,6 @@ SELECT * FROM my_table WHERE c1 > 10
- `sqrt(x), trunc(x), and other builtin math functions`: value-wise math operations. `sqrt(x)[i] = sqrt(x[i])` - `sqrt(x), trunc(x), and other builtin math functions`: value-wise math operations. `sqrt(x)[i] = sqrt(x[i])`
- `pack(cols, ...)`: pack multiple columns with exact same type into a single column. - `pack(cols, ...)`: pack multiple columns with exact same type into a single column.
# Architecture
![Architecture](./docs/arch-hybrid.svg)
## AQuery Compiler
- The query is first processed by the AQuery Compiler which is composed of a frontend that parses the query into AST and a backend that generates target code that delivers the query.
- Front end of AQuery++ Compiler is built on top of [mo-sql-parsing](https://github.com/klahnakoski/mo-sql-parsing) with modifications to handle AQuery dialect and extension.
- Backend of AQuery++ Compiler generates target code dependent on the Execution Engine. It can either be the C++ code for AQuery Execution Engine or sql and C++ post-processor for Hybrid Engine or k9 for the k9 Engine.
## Execution Engines
- AQuery++ supports different execution engines thanks to the decoupled compiler structure.
- Hybrid Execution Engine: decouples the query into two parts. The sql-compliant part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed.
- AQuery Library: A set of header based libraries that provide column arithmetic and operations inspired by array programming languages like kdb. This library is used by C++ post-processor code which can significantly reduce the complexity of generated code, reducing compile time while maintaining the best performance. The set of libraries can also be used by UDFs as well as User modules which makes it easier for users to write simple but powerful extensions.
# Roadmap # Roadmap
- [x] SQL Parser -> AQuery Parser (Front End) - [x] SQL Parser -> AQuery Parser (Front End)
@ -304,15 +302,16 @@ SELECT * FROM my_table WHERE c1 > 10
- [x] Query Optimization - [x] Query Optimization
- [x] Selection/Order by push-down - [x] Selection/Order by push-down
- [x] Join Optimization (Only in Hybrid Engine) - [x] Join Optimization (Only in Hybrid Engine)
- [ ] Threaded GC - [x] Threaded GC
- [ ] Extensibility - [ ] Extensibility
- [x] UDFs (Hybrid Engine only) - [x] UDFs (Hybrid Engine only)
- [x] SDK and User Module - [x] SDK and User Module
- [ ] Triggers - [x] Stored Procedures
- [x] Triggers
# Known Issues: # Known Issues:
- [ ] Interval based triggers - [x] Interval based triggers
- [ ] Hot reloading server binary - [ ] Hot reloading server binary
- [x] Bug fixes: type deduction misaligned in Hybrid Engine - [x] Bug fixes: type deduction misaligned in Hybrid Engine
- [ ] Investigation: Using postproc only for q1 in Hybrid Engine (make is_special always on) - [ ] Investigation: Using postproc only for q1 in Hybrid Engine (make is_special always on)
@ -343,3 +342,7 @@ SELECT * FROM my_table WHERE c1 > 10
- [MonetDB](https://www.monetdb.org) <br> - [MonetDB](https://www.monetdb.org) <br>
License (Mozilla Public License): https://github.com/MonetDB/MonetDB/blob/master/license.txt License (Mozilla Public License): https://github.com/MonetDB/MonetDB/blob/master/license.txt
- [ankerl::unordered_dense](https://github.com/martinus/unordered_dense)<br>
Author: Martin Ankerl <br>
License (MIT): http://opensource.org/licenses/MIT <br>

@ -2,16 +2,18 @@
## GLOBAL CONFIGURATION FLAGS ## GLOBAL CONFIGURATION FLAGS
version_string = '0.5.3a' version_string = '0.7.7a'
add_path_to_ldpath = True add_path_to_ldpath = True
rebuild_backend = False rebuild_backend = False
run_backend = True run_backend = True
have_hge = False have_hge = False
cygroot = 'c:/msys64/usr/bin' cygroot = 'c:/mingw64/usr/bin'
msbuildroot = '' msbuildroot = ''
os_platform = 'unknown' os_platform = 'unknown'
build_driver = 'Auto' build_driver = 'Auto'
compilation_output = True compilation_output = True
compile_use_gc = True
compile_use_threading = True
## END GLOBAL CONFIGURATION FLAGS ## END GLOBAL CONFIGURATION FLAGS
@ -22,11 +24,12 @@ def init_config():
#os_platform = 'unkown' #os_platform = 'unkown'
#msbuildroot = 'd:/gg/vs22/MSBuild/Current/Bin' #msbuildroot = 'd:/gg/vs22/MSBuild/Current/Bin'
import os import os
from engine.utils import add_dll_dir from common.utils import add_dll_dir
# os.environ['CXX'] = 'C:/Program Files/LLVM/bin/clang.exe' # os.environ['CXX'] = 'C:/Program Files/LLVM/bin/clang.exe'
os.environ['THREADING'] = '1' os.environ['THREADING'] = '1'
os.environ['AQUERY_ITC_USE_SEMPH'] = '1' os.environ['AQUERY_ITC_USE_SEMPH'] = '1'
if 'AQ_DEBUG' not in os.environ:
os.environ['AQ_DEBUG'] = '0'
if ('__config_initialized__' not in globals() or if ('__config_initialized__' not in globals() or
not __config_initialized__): not __config_initialized__):
import sys import sys
@ -44,10 +47,12 @@ def init_config():
os_platform = 'bsd' os_platform = 'bsd'
elif sys.platform == 'cygwin' or sys.platform == 'msys': elif sys.platform == 'cygwin' or sys.platform == 'msys':
os_platform = 'cygwin' os_platform = 'cygwin'
# deal with msys dependencies: # deal with msys dependencies:
if os_platform == 'win': if os_platform == 'win':
add_dll_dir(cygroot)
add_dll_dir(os.path.abspath('./msc-plugin')) add_dll_dir(os.path.abspath('./msc-plugin'))
add_dll_dir(os.path.abspath('./deps'))
add_dll_dir(cygroot)
if build_driver == 'Auto': if build_driver == 'Auto':
try: try:
import vswhere import vswhere
@ -62,10 +67,16 @@ def init_config():
build_driver = 'Makefile' build_driver = 'Makefile'
# print("adding path") # print("adding path")
else: else:
import readline try:
import readline
except ImportError:
print("Warning: Readline module not present")
if build_driver == 'Auto': if build_driver == 'Auto':
build_driver = 'Makefile' build_driver = 'Makefile'
if os_platform == 'linux':
os.environ['PATH'] += os.pathsep + '/usr/lib'
if os_platform == 'cygwin': if os_platform == 'cygwin':
add_dll_dir('./lib') add_dll_dir('./lib')
os.environ['LD_LIBRARY_PATH'] += os.pathsep + os.getcwd()+ os.sep + 'deps'
__config_initialized__ = True __config_initialized__ = True

@ -5,20 +5,18 @@
# You can obtain one at http://mozilla.org/MPL/2.0/. # You can obtain one at http://mozilla.org/MPL/2.0/.
# #
# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # Contact: Kyle Lahnakoski (kyle@lahnakoski.com)
# # Bill Sun 2022 - 2023
from __future__ import absolute_import, division, unicode_literals from __future__ import absolute_import, division, unicode_literals
import json import json
from threading import Lock from threading import Lock
from aquery_parser.sql_parser import scrub from aquery_parser.parser import scrub
from aquery_parser.utils import ansi_string, simple_op, normal_op from aquery_parser.utils import simple_op, normal_op
import aquery_parser.parser
parse_locker = Lock() # ENSURE ONLY ONE PARSING AT A TIME parse_locker = Lock() # ENSURE ONLY ONE PARSING AT A TIME
common_parser = None common_parser = None
mysql_parser = None
sqlserver_parser = None
SQL_NULL = {"null": {}} SQL_NULL = {"null": {}}
@ -33,44 +31,10 @@ def parse(sql, null=SQL_NULL, calls=simple_op):
with parse_locker: with parse_locker:
if not common_parser: if not common_parser:
common_parser = sql_parser.common_parser() common_parser = aquery_parser.parser.common_parser()
result = _parse(common_parser, sql, null, calls) result = _parse(common_parser, sql, null, calls)
return result return result
def parse_mysql(sql, null=SQL_NULL, calls=simple_op):
"""
PARSE MySQL ASSUME DOUBLE QUOTED STRINGS ARE LITERALS
:param sql: String of SQL
:param null: What value to use as NULL (default is the null function `{"null":{}}`)
:return: parse tree
"""
global mysql_parser
with parse_locker:
if not mysql_parser:
mysql_parser = sql_parser.mysql_parser()
return _parse(mysql_parser, sql, null, calls)
def parse_sqlserver(sql, null=SQL_NULL, calls=simple_op):
"""
PARSE MySQL ASSUME DOUBLE QUOTED STRINGS ARE LITERALS
:param sql: String of SQL
:param null: What value to use as NULL (default is the null function `{"null":{}}`)
:return: parse tree
"""
global sqlserver_parser
with parse_locker:
if not sqlserver_parser:
sqlserver_parser = sql_parser.sqlserver_parser()
return _parse(sqlserver_parser, sql, null, calls)
parse_bigquery = parse_mysql
def _parse(parser, sql, null, calls): def _parse(parser, sql, null, calls):
utils.null_locations = [] utils.null_locations = []
utils.scrub_op = calls utils.scrub_op = calls
@ -85,4 +49,4 @@ def _parse(parser, sql, null, calls):
_ = json.dumps _ = json.dumps
__all__ = ["parse", "format", "parse_mysql", "parse_bigquery", "normal_op", "simple_op"] __all__ = ["parse", "format", "normal_op", "simple_op"]

@ -5,7 +5,7 @@
# You can obtain one at http://mozilla.org/MPL/2.0/. # You can obtain one at http://mozilla.org/MPL/2.0/.
# #
# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # Contact: Kyle Lahnakoski (kyle@lahnakoski.com)
# # Bill Sun 2022 - 2023
# SQL CONSTANTS # SQL CONSTANTS
from mo_parsing import * from mo_parsing import *

@ -5,9 +5,8 @@
# You can obtain one at http://mozilla.org/MPL/2.0/. # You can obtain one at http://mozilla.org/MPL/2.0/.
# #
# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # Contact: Kyle Lahnakoski (kyle@lahnakoski.com)
# # Bill Sun 2022 - 2023
from sre_parse import WHITESPACE
from mo_parsing.helpers import restOfLine from mo_parsing.helpers import restOfLine
from mo_parsing.infix import delimited_list from mo_parsing.infix import delimited_list
@ -28,37 +27,13 @@ simple_ident = Regex(simple_ident.__regex__()[1])
def common_parser(): def common_parser():
combined_ident = Combine(delimited_list( combined_ident = Combine(delimited_list(
ansi_ident | mysql_backtick_ident | simple_ident, separator=".", combine=True, ansi_ident | aquery_backtick_ident | simple_ident, separator=".", combine=True,
)).set_parser_name("identifier") )).set_parser_name("identifier")
return parser(ansi_string | mysql_doublequote_string, combined_ident) return parser(ansi_string | aquery_doublequote_string, combined_ident)
def mysql_parser():
mysql_string = ansi_string | mysql_doublequote_string
mysql_ident = Combine(delimited_list(
mysql_backtick_ident | sqlserver_ident | simple_ident,
separator=".",
combine=True,
)).set_parser_name("mysql identifier")
return parser(mysql_string, mysql_ident)
def sqlserver_parser(): def parser(literal_string, ident):
combined_ident = Combine(delimited_list(
ansi_ident
| mysql_backtick_ident
| sqlserver_ident
| Word(FIRST_IDENT_CHAR, IDENT_CHAR),
separator=".",
combine=True,
)).set_parser_name("identifier")
return parser(ansi_string, combined_ident, sqlserver=True)
def parser(literal_string, ident, sqlserver=False):
with Whitespace() as engine: with Whitespace() as engine:
engine.add_ignore(Literal("--") + restOfLine) engine.add_ignore(Literal("--") + restOfLine)
engine.add_ignore(Literal("#") + restOfLine) engine.add_ignore(Literal("#") + restOfLine)
@ -184,12 +159,10 @@ def parser(literal_string, ident, sqlserver=False):
) )
) )
if not sqlserver: create_array = (
# SQL SERVER DOES NOT SUPPORT [] FOR ARRAY CONSTRUCTION (USED FOR IDENTIFIERS) Literal("[") + delimited_list(Group(expr))("args") + Literal("]")
create_array = ( | create_array
Literal("[") + delimited_list(Group(expr))("args") + Literal("]") )
| create_array
)
create_array = create_array / to_array create_array = create_array / to_array
@ -596,26 +569,34 @@ def parser(literal_string, ident, sqlserver=False):
+ index_type + index_type
+ index_column_names + index_column_names
+ index_options + index_options
)("create index") )("create_index")
cache_options = Optional((
keyword("options").suppress()
+ LB
+ Dict(delimited_list(Group(
literal_string / (lambda tokens: tokens[0]["literal"])
+ Optional(EQ)
+ var_name
)))
+ RB
)("options"))
create_trigger = (
keyword("create trigger")
+ var_name("name")
+ ((
ON
+ var_name("table")
+ keyword("action")
+ var_name("action")
+ WHEN
+ var_name("query") )
| (
keyword("action")
+ var_name("action")
+ INTERVAL
+ int_num("interval")
))
)("create_trigger")
drop_trigger = (keyword("drop trigger") + var_name("name")) ("drop_trigger")
create_cache = ( create_cache = (
keyword("cache").suppress() keyword("cache").suppress()
+ Optional(flag("lazy")) + Optional(flag("lazy"))
+ TABLE + TABLE
+ var_name("name") + FROM
+ cache_options + var_name("source") # AQuery, MonetDB, DuckDB ...
+ Optional(AS + query("query"))
)("cache") )("cache")
drop_table = ( drop_table = (
@ -720,8 +701,8 @@ def parser(literal_string, ident, sqlserver=False):
sql_stmts = delimited_list( ( sql_stmts = delimited_list( (
query query
| (insert | update | delete | load) | (insert | update | delete | load)
| (create_table | create_view | create_cache | create_index) | (create_table | create_view | create_cache | create_index | create_trigger)
| (drop_table | drop_view | drop_index) | (drop_table | drop_view | drop_index | drop_trigger)
)("stmts"), ";") )("stmts"), ";")
other_stmt = ( other_stmt = (
@ -734,6 +715,5 @@ def parser(literal_string, ident, sqlserver=False):
|other_stmt |other_stmt
| keyword(";").suppress() # empty stmt | keyword(";").suppress() # empty stmt
) )
return stmts.finalize() return stmts.finalize()

@ -5,7 +5,7 @@
# You can obtain one at http://mozilla.org/MPL/2.0/. # You can obtain one at http://mozilla.org/MPL/2.0/.
# #
# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # Contact: Kyle Lahnakoski (kyle@lahnakoski.com)
# # Bill Sun 2022 - 2023
# KNOWN TYPES # KNOWN TYPES

@ -5,7 +5,7 @@
# You can obtain one at http://mozilla.org/MPL/2.0/. # You can obtain one at http://mozilla.org/MPL/2.0/.
# #
# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # Contact: Kyle Lahnakoski (kyle@lahnakoski.com)
# # Bill Sun 2022 - 2023
import ast import ast
@ -610,9 +610,8 @@ hex_num = (
# STRINGS # STRINGS
ansi_string = Regex(r"\'(\'\'|[^'])*\'") / to_string ansi_string = Regex(r"\'(\'\'|[^'])*\'") / to_string
mysql_doublequote_string = Regex(r'\"(\"\"|[^"])*\"') / to_string aquery_doublequote_string = Regex(r'\"(\"\"|[^"])*\"') / to_string
# BASIC IDENTIFIERS # BASIC IDENTIFIERS
ansi_ident = Regex(r'\"(\"\"|[^"])*\"') / unquote ansi_ident = Regex(r'\"(\"\"|[^"])*\"') / unquote
mysql_backtick_ident = Regex(r"\`(\`\`|[^`])*\`") / unquote aquery_backtick_ident = Regex(r"\`(\`\`|[^`])*\`") / unquote
sqlserver_ident = Regex(r"\[(\]\]|[^\]])*\]") / unquote

@ -0,0 +1,23 @@
SELECT id1, sum(v1) AS v1 FROM source GROUP BY id1; --0.036 | 0.017 | .274
SELECT id1, id2, sum(v1) AS v1 FROM source GROUP BY id1, id2; -- 0.063 | 0.013
SELECT id3, sum(v1) AS v1, avg(v3) AS v3 FROM source GROUP BY id3; -- 2.322 | 0.406 | 2.27
SELECT id4, avg(v1) AS v1, avg(v2) AS v2, avg(v3) AS v3 FROM source GROUP BY id4; -- 0.159 | 0.022
SELECT id6, sum(v1) AS v1, sum(v2) AS v2, sum(v3) AS v3 FROM source GROUP BY id6; -- 1.778 | 0.699 | 2.283
--faster median
--SELECT id4, id5, median(v3) AS median_v3, stddev(v3) AS sd_v3 FROM source GROUP BY id4, id5; -- x4
SELECT id3, max(v1) - min(v2) AS range_v1_v2 FROM source GROUP BY id3; -- 0.857 | 0.467 | 2.236
-- select top 2 from each grp
SELECT id6, subvec(v3,0,2) AS v3 FROM source GROUP BY id6 order by v3;
-- implement corr
SELECT id2, id4, pow(corr(v1, v2), 2) AS r2 FROM source GROUP BY id2, id4;
-- NA | 0.240 | 0.6
SELECT id1, id2, id3, id4, id5, id6, sum(v3) AS v3, count(*) AS cnt FROM source GROUP BY id1, id2, id3, id4, id5, id6; -- 2.669 | 1.232 | 2.221(1.8)

@ -0,0 +1,6 @@
create table source(id1 int,id2 int,id3 int,id4 int,id5 int,id6 int,v1 int,v2 int,v3 float)
LOAD DATA INFILE "data/h2o/G1_1e7_1e1_0_0_n.csv"
INTO TABLE source
FIELDS TERMINATED BY ","

@ -0,0 +1,6 @@
CREATE TABLE trade01m(stocksymbol STRING, time INT, quantity INT, price INT)
load data infile "../tables/trade01m.csv" into table trade01m fields terminated by ','
CREATE TABLE trade1m(stocksymbol STRING, time INT, quantity INT, price INT)
load data infile "../tables/trade1m.csv" into table trade1m fields terminated by ','
CREATE TABLE trade10m(stocksymbol STRING, time INT, quantity INT, price INT)
load data infile "../tables/trade10m.csv" into table trade10m fields terminated by ','

@ -0,0 +1,5 @@
-- select rows
<sql>
CREATE TABLE res0 AS
SELECT * FROM trade10m
</sql>

@ -0,0 +1,7 @@
-- groupby_multi_different_functions
<sql>
CREATE TABLE res1 AS
SELECT avg(quantity) AS avg_quan, min(price) AS min_p
FROM trade1m
GROUP BY stocksymbol, time
</sql>

@ -0,0 +1,4 @@
SELECT stocksymbol, MAX(stddevs(3, price))
FROM trade1m
ASSUMING ASC time
GROUP BY stocksymbol

@ -0,0 +1,4 @@
-- count values
<sql>
SELECT COUNT(*) FROM trade10m
</sql>

@ -0,0 +1,7 @@
-- group by multiple keys
<sql>
create table res3 AS
SELECT sum(quantity) as sum_quantity
FROM trade01m
GROUP BY stocksymbol, price
</sql>

@ -0,0 +1,5 @@
-- append tables
<sql>
CREATE TABLE res4 AS
SELECT * FROM trade10m UNION ALL SELECT * FROM trade10m
</sql>

@ -0,0 +1,5 @@
CREATE table res7 AS
SELECT stocksymbol, avgs(5, price)
FROM trade10m
ASSUMING ASC time
GROUP BY stocksymbol

@ -0,0 +1,6 @@
<sql>
CREATE TABLE res8 AS
SELECT stocksymbol, quantity, price
FROM trade10m
WHERE time >= 5288 and time <= 7000
</sql>

@ -0,0 +1,6 @@
<sql>
CREATE TABLE res9 AS
SELECT stocksymbol, MAX(price) - MIN(price)
FROM trade10m
GROUP BY stocksymbol
</sql>

@ -0,0 +1,3 @@
-- q0 select rows
CREATE TABLE res0 (a String, b Int32, c Int32, d Int32) ENGINE = MergeTree() ORDER BY b AS
SELECT * FROM benchmark.trade10m

@ -0,0 +1,4 @@
-- groupby_multi_different_functions
SELECT avg(quantity), min(price)
FROM benchmark.trade10m
GROUP BY stocksymbol, time

@ -0,0 +1,8 @@
-- max rolling std
select
stocksymbol,
max(stddevPop(price)) over
(partition by stocksymbol rows between 2 preceding AND CURRENT row) as maxRollingStd
from
(SELECT * FROM benchmark.trade01m ORDER BY time)
GROUP BY stocksymbol

@ -0,0 +1,2 @@
-- count values
SELECT COUNT(*) FROM benchmark.trade10m

@ -0,0 +1,4 @@
-- group by multiple keys
SELECT sum(quantity)
FROM benchmark.trade10m
GROUP BY stocksymbol, price

@ -0,0 +1,2 @@
-- append two tables
SELECT * FROM benchmark.trade10m UNION ALL SELECT * FROM benchmark.trade10m

@ -0,0 +1,5 @@
-- moving_avg
SELECT stocksymbol, groupArrayMovingAvg(5)(price) AS moving_avg_price
FROM
(SELECT * FROM benchmark.trade01m ORDER BY time)
GROUP BY stocksymbol

@ -0,0 +1,3 @@
SELECT stocksymbol, quantity, price
FROM benchmark.trade10m
WHERE time >= 5288 and time <= 7000

@ -0,0 +1,3 @@
SELECT stocksymbol, MAX(price) - MIN(price)
FROM benchmark.trade1m
GROUP BY stocksymbol

@ -0,0 +1,3 @@
-- select rows
CREATE TABLE res0 AS
SELECT * FROM trade10m;

@ -0,0 +1,4 @@
-- groupby_multi_different_functions
SELECT avg(quantity), min(price)
FROM trade10m
GROUP BY stocksymbol, time;

@ -0,0 +1,7 @@
select
stocksymbol,
max(stddev(price)) over
(partition by stocksymbol rows between 2 preceding AND CURRENT row) as maxRollingStd
from
(SELECT * FROM trade01m ORDER BY time) as t
GROUP BY stocksymbol;

@ -0,0 +1,2 @@
-- count values
SELECT COUNT(*) FROM trade10m;

@ -0,0 +1,4 @@
-- group by multiple keys
SELECT sum(quantity)
FROM trade10m
GROUP BY stocksymbol, price;

@ -0,0 +1,2 @@
-- append tables
SELECT * FROM trade10m UNION ALL SELECT * FROM trade10m;

@ -0,0 +1,5 @@
select
stocksymbol,
coalesce(avg(price) over
(partition by stocksymbol order by time rows between 4 preceding AND CURRENT row), price) as rollingAvg
from trade10m;

@ -0,0 +1,3 @@
SELECT stocksymbol, quantity, price
FROM trade01m
WHERE time >= 5288 and time <= 7000

@ -0,0 +1,3 @@
SELECT stocksymbol, MAX(price) - MIN(price)
FROM trade01m
GROUP BY stocksymbol;

@ -6,7 +6,7 @@ import os
import subprocess import subprocess
import hashlib import hashlib
import pickle import pickle
from engine.utils import nullstream from common.utils import nullstream
from typing import Dict, Optional, Set, Union from typing import Dict, Optional, Set, Union
@dataclass @dataclass
@ -27,6 +27,7 @@ class checksums:
aquery_config.build_driver + aquery_config.build_driver +
compiler_name + compiler_name +
aquery_config.version_string aquery_config.version_string
+ str(os.environ['AQ_DEBUG'] == '1')
) )
for key in self.__dict__.keys(): for key in self.__dict__.keys():
try: try:
@ -74,14 +75,15 @@ class build_manager:
sourcefiles = [ sourcefiles = [
'build.py', 'Makefile', 'build.py', 'Makefile',
'server/server.cpp', 'server/libaquery.cpp', 'server/server.cpp', 'server/libaquery.cpp',
'server/monetdb_conn.cpp', 'server/threading.cpp', 'server/monetdb_conn.cpp', 'server/duckdb_conn.cpp',
'server/winhelper.cpp' 'server/threading.cpp', 'server/winhelper.cpp',
'server/monetdb_ext.c'
] ]
headerfiles = ['server/aggregations.h', 'server/hasher.h', 'server/io.h', headerfiles = ['server/aggregations.h', 'server/hasher.h', 'server/io.h',
'server/libaquery.h', 'server/monetdb_conn.h', 'server/pch.hpp', 'server/libaquery.h', 'server/monetdb_conn.h', 'server/duckdb_conn.h',
'server/table.h', 'server/threading.h', 'server/types.h', 'server/utils.h', 'server/pch.hpp', 'server/table.h', 'server/threading.h',
'server/winhelper.h', 'server/gc.h', 'server/vector_type.hpp', 'server/types.h', 'server/utils.h', 'server/winhelper.h',
'server/table_ext_monetdb.hpp' 'server/gc.h', 'server/vector_type.hpp', 'server/table_ext_monetdb.hpp'
] ]
class DriverBase: class DriverBase:
@ -117,26 +119,26 @@ class build_manager:
else: else:
mgr.cxx = os.environ['CXX'] mgr.cxx = os.environ['CXX']
if 'AQ_DEBUG' not in os.environ: if 'AQ_DEBUG' not in os.environ:
os.environ['AQ_DEBUG'] = '0' if mgr.OptimizationLv else '1' os.environ['AQ_DEBUG'] = ('0' if mgr.OptimizationLv != '0' else '1')
def libaquery_a(self): def libaquery_a(self):
self.build_cmd = [['rm', 'libaquery.a'],['make', 'libaquery']] self.build_cmd = [['rm', 'libaquery.a'],['make', 'libaquery', '-j']]
return self.build() return self.build()
def pch(self): def pch(self):
self.build_cmd = [['rm', 'server/pch.hpp.gch'], ['make', 'pch']] self.build_cmd = [['rm', 'server/pch.hpp.gch'], ['make', 'pch', '-j']]
return self.build() return self.build()
def server(self): def server(self):
if self.mgr.StaticLib: if self.mgr.StaticLib:
self.build_cmd = [['rm', '*.o'],['rm', 'server.so'], ['make', 'server_uselib']] self.build_cmd = [['rm', '*.o'],['rm', 'server.so'], ['make', 'server_uselib', '-j']]
else: else:
self.build_cmd = [['rm', 'server.so'], ['make', 'server.so']] self.build_cmd = [['rm', 'server.so'], ['make', 'server.so', '-j']]
return self.build() return self.build()
def snippet(self): def snippet(self):
if self.mgr.StaticLib: if self.mgr.StaticLib:
self.build_cmd = [['make', 'snippet_uselib']] self.build_cmd = [['make', 'snippet_uselib', '-j']]
else: else:
self.build_cmd = [['rm', 'dll.so'], ['make', 'snippet']] self.build_cmd = [['rm', 'dll.so'], ['make', 'snippet', '-j']]
return self.build() return self.build()
class MSBuildDriver(DriverBase): class MSBuildDriver(DriverBase):
@ -162,6 +164,7 @@ class build_manager:
return True return True
def server(self): def server(self):
print(self.opt)
loc = os.path.abspath('./msc-plugin/server.vcxproj') loc = os.path.abspath('./msc-plugin/server.vcxproj')
self.get_flags() self.get_flags()
self.build_cmd = [['del', 'server.so'], [aquery_config.msbuildroot, loc, self.opt, self.platform]] self.build_cmd = [['del', 'server.so'], [aquery_config.msbuildroot, loc, self.opt, self.platform]]

@ -1,26 +1,24 @@
from reconstruct.ast import Context, ast_node from common.ast import Context, ast_node
saved_cxt = None saved_cxt = None
def initialize(cxt = None, keep = False): def initialize(cxt = None, keep = False):
global saved_cxt global saved_cxt
if cxt is None or not keep or type(cxt) is not Context: if cxt is None or not keep or type(cxt) is not Context:
if saved_cxt is None or not keep: if saved_cxt is None or not keep:
cxt = Context() cxt = Context()
saved_cxt = cxt saved_cxt = cxt
else: else:
cxt = saved_cxt cxt = saved_cxt
cxt.new() cxt.new()
return cxt return cxt
def generate(ast, cxt): def generate(ast, cxt):
for k in ast.keys(): for k in ast.keys():
if k in ast_node.types.keys(): if k in ast_node.types.keys():
ast_node.types[k](None, ast, cxt) root = ast_node.types[k](None, ast, cxt)
def exec(stmts, cxt = None, keep = False): def exec(stmts, cxt = None, keep = None):
if 'stmts' not in stmts:
return
cxt = initialize(cxt, keep) cxt = initialize(cxt, keep)
stmts_stmts = stmts['stmts'] stmts_stmts = stmts['stmts']
if type(stmts_stmts) is list: if type(stmts_stmts) is list:
@ -28,8 +26,12 @@ def exec(stmts, cxt = None, keep = False):
generate(s, cxt) generate(s, cxt)
else: else:
generate(stmts_stmts, cxt) generate(stmts_stmts, cxt)
for q in cxt.queries:
cxt.print(q.strip()) cxt.Info(cxt.ccode)
with open('out.cpp', 'wb') as outfile:
outfile.write((cxt.finalize()).encode('utf-8'))
return cxt return cxt
__all__ = ["initialize", "generate", "exec", "saved_cxt"] __all__ = ["initialize", "generate", "exec", "saved_cxt"]

@ -0,0 +1,377 @@
from common.utils import base62uuid
from copy import copy
from typing import *
# replace column info with this later.
class ColRef:
def __init__(self, cname, _ty, cobj, cnt, table:'TableInfo', name, id, compound = False):
self.cname = cname # column object location
self.cxt_name = None # column object in context
self.type = _ty
self.cobj = cobj
self.cnt = cnt
self.table = table
self.name = name
self.id = id # position in table
self.order_pending = None # order_pending
self.compound = compound # compound field (list as a field)
self.views = []
self.aux_columns = [] # columns for temperary calculations
# e.g. order by, group by, filter by expressions
self.__arr__ = (cname, _ty, cobj, cnt, table, name, id)
def reference(self):
cxt = self.table.cxt
self.table.reference()
if self not in cxt.columns_in_context:
counter = 0
base_name = self.table.table_name + '_' + self.name
if base_name in cxt.columns_in_context.values():
while (f'{base_name}_{counter}') in cxt.columns_in_context.values():
counter += 1
base_name = f'{base_name}_{counter}'
self.cxt_name = base_name
cxt.columns_in_context[self] = base_name
# TODO: change this to cname;
cxt.emit(f'auto& {base_name} = *(ColRef<{self.type}> *)(&{self.table.cxt_name}->colrefs[{self.id}]);')
elif self.cxt_name is None:
self.cxt_name = cxt.columns_in_context[self]
return self.cxt_name
def __getitem__(self, key):
if type(key) is str:
return getattr(self, key)
else:
return self.__arr__[key]
def __setitem__(self, key, value):
self.__arr__[key] = value
def __str__(self):
return self.reference()
def __repr__(self):
return self.reference()
class TableInfo:
def __init__(self, table_name, cols, cxt:'Context'):
# statics
self.table_name = table_name
self.alias = set([table_name])
self.columns_byname = dict() # column_name, type
self.columns = []
self.cxt = cxt
self.cxt_name = None
self.views = set()
#keep track of temp vars
self.local_vars = dict()
self.rec = None # a hook on get_col_d to record tables being referenced in the process
self.groupinfo = None
self.add_cols(cols)
# runtime
self.n_rows = 0 # number of cols
self.order = [] # assumptions
cxt.tables_byname[self.table_name] = self # construct reverse map
def reference(self):
if self not in self.cxt.tables_in_context:
counter = 0
base_name = self.table_name
if base_name in self.cxt.tables_in_context.values():
while (f'{base_name}_{counter}') in self.cxt.tables_in_context.values():
counter += 1
base_name = f'{base_name}_{counter}'
self.cxt_name = base_name
self.cxt.tables_in_context[self] = base_name
type_tags = '<'
for c in self.columns:
type_tags += c.type + ','
if type_tags.endswith(','):
type_tags = type_tags[:-1]
type_tags += '>'
self.cxt.emit(f'auto& {base_name} = *(TableInfo{type_tags} *)(cxt->tables["{self.table_name}"]);')
return self.cxt_name
def refer_all(self):
self.reference()
for c in self.columns:
c.reference()
def add_cols(self, cols, new = True):
for i, c in enumerate(cols):
self.add_col(c, new, i)
def add_col(self, c, new = True, i = 0):
_ty = c['type']
if new:
cname =f'get<{i}>({self.table_name})'
_ty = _ty if type(c) is ColRef else list(_ty.keys())[0]
col_object = ColRef(cname, _ty, c, 1, self,c['name'], len(self.columns))
else:
col_object = c
cname = c.cname
c.table = self
self.cxt.ccols_byname[cname] = col_object
self.columns_byname[c['name']] = col_object
self.columns.append(col_object)
def get_size(self):
size_tmp = 'tmp_sz_'+base62uuid(6)
self.cxt.emit(f'const auto& {size_tmp} = {self.columns[0].reference()}.size;')
return size_tmp
@property
def n_cols(self):
return len(self.columns)
def materialize_orderbys(self):
view_stack = ''
stack_name = ''
for o in self.order:
o.materialize()
if len(view_stack) == 0:
view_stack = o.view.name
stack_name = view_stack
else:
view_stack = view_stack+'['+ o.view.name +']'
# TODO: Optimize by doing everything in a stmt
if len(view_stack) > 0:
if len(self.order) > 1:
self.cxt.emit(f'{stack_name}:{view_stack}')
for c in self.columns:
c.order_pending = stack_name
self.order[0].node.view = stack_name
self.order.clear()
def get_col_d(self, col_name):
col = self.columns_byname[col_name]
if type(self.rec) is set:
self.rec.add(col)
return col
def get_ccolname_d(self, col_name):
return self.get_col_d(col_name).cname
def get_col(self, col_name):
self.materialize_orderbys()
col = self.get_col_d(col_name)
if type(col.order_pending) is str:
self.cxt.emit_no_flush(f'{col.cname}:{col.cname}[{col.order_pending}]')
col.order_pending = None
return col
def get_ccolname(self, col_name):
return self.get_col(col_name).cname
def add_alias(self, alias):
# TODO: Scoping of alias should be constrainted in the query.
if alias in self.cxt.tables_byname.keys():
print("Error: table alias already exists")
return
self.cxt.tables_byname[alias] = self
self.alias.add(alias)
def parse_col_names(self, colExpr, materialize = True, raw = False):
# get_col = self.get_col if materialize else self.get_col_d
parsedColExpr = colExpr.split('.')
ret = None
if len(parsedColExpr) <= 1:
ret = self.get_col_d(colExpr)
else:
datasource = self.cxt.tables_byname[parsedColExpr[0]]
if datasource is None:
raise ValueError(f'Table name/alias not defined{parsedColExpr[0]}')
else:
ret = datasource.parse_col_names(parsedColExpr[1], raw)
from common.expr import index_expr
string = ret.reference() + index_expr
if self.groupinfo is not None and ret and ret in self.groupinfo.raw_groups:
string = f'get<{self.groupinfo.raw_groups.index(ret)}>({{y}})'
return string, ret if raw else string
class View:
def __init__(self, context, table = None, tmp = True):
self.table: TableInfo = table
self.name = 'v'+base62uuid(7)
if type(table) is TableInfo:
table.views.add(self)
self.context = context
def construct(self):
self.context.emit(f'{self.name}:()')
class Context:
function_head = '''
extern "C" int __DLLEXPORT__ dllmain(Context* cxt) {
using namespace std;
using namespace types;
'''
LOG_INFO = 'INFO'
LOG_ERROR = 'ERROR'
LOG_SILENT = 'SILENT'
from common.types import Types
type_table : Dict[str, Types] = dict()
def new(self):
self.tmp_names = set()
self.udf_map = dict()
self.headers = set(['\"./server/libaquery.h\"'])
self.finalized = False
# read header
self.ccode = str()
self.ccodelet = str()
with open('header.cxx', 'r') as outfile:
self.ccode = outfile.read()
# datasource will be availible after `from' clause is parsed
# and will be deactivated when the `from' is out of scope
self.datasource = None
self.ds_stack = []
self.scans = []
self.removing_scan = False
def __init__(self):
from prompt import PromptState
self.tables:list[TableInfo] = []
self.tables_byname = dict()
self.ccols_byname = dict()
self.gc_name = 'gc_' + base62uuid(4)
self.tmp_names = set()
self.udf_map = dict()
self.headers = set(['\"./server/libaquery.h\"'])
self.finalized = False
self.log_level = Context.LOG_SILENT
self.print = print
# read header
self.ccode = str()
self.ccodelet = str()
self.columns_in_context = dict()
self.tables_in_context = dict()
with open('header.cxx', 'r') as outfile:
self.ccode = outfile.read()
# datasource will be availible after `from' clause is parsed
# and will be deactivated when the `from' is out of scope
self.datasource = None
self.ds_stack = []
self.scans = []
self.removing_scan = False
self.force_compiled = True
self.system_state: Optional[PromptState] = None
def add_table(self, table_name, cols):
tbl = TableInfo(table_name, cols, self)
self.tables.append(tbl)
return tbl
def gen_tmptable(self):
from common.utils import base62uuid
return f't{base62uuid(7)}'
def reg_tmp(self, name, f):
self.tmp_names.add(name)
self.emit(f"{self.gc_name}.reg({{{name}, 0,0{'' if f is None else ',{f}'}}});")
def define_tmp(self, typename, isPtr = True, f = None):
name = 'tmp_' + base62uuid()
if isPtr:
self.emit(f'auto* {name} = new {typename};')
self.reg_tmp(name, f)
else:
self.emit(f'auto {name} = {typename};')
return name
def emit(self, codelet):
self.ccode += self.ccodelet + codelet + '\n'
self.ccodelet = ''
def emit_no_flush(self, codelet):
self.ccode += codelet + '\n'
def emit_flush(self):
self.ccode += self.ccodelet + '\n'
self.ccodelet = ''
def emit_nonewline(self, codelet):
self.ccodelet += codelet
def datsource_top(self):
if len(self.ds_stack) > 0:
return self.ds_stack[-1]
else:
return None
def datasource_pop(self):
if len(self.ds_stack) > 0:
self.ds_stack.pop()
return self.ds_stack[-1]
else:
return None
def datasource_push(self, ds):
if type(ds) is TableInfo:
self.ds_stack.append(ds)
return ds
else:
return None
def remove_scan(self, scan, str_scan):
self.emit(str_scan)
self.scans.remove(scan)
def Info(self, msg):
if self.log_level.upper() == Context.LOG_INFO:
self.print(msg)
def Error(self, msg):
if self.log_level.upper() == Context.LOG_ERROR:
self.print(msg)
else:
self.Info(self, msg)
def finalize(self):
if not self.finalized:
headers = ''
for h in self.headers:
if h[0] != '"':
headers += '#include <' + h + '>\n'
else:
headers += '#include ' + h + '\n'
self.ccode = headers + self.function_head + self.ccode + 'return 0;\n}'
self.headers = set()
return self.ccode
def __str__(self):
self.finalize()
return self.ccode
def __repr__(self) -> str:
return self.__str__()
class ast_node:
types = dict()
header = []
def __init__(self, parent:"ast_node", node, context:Context = None):
self.context = parent.context if context is None else context
self.parent = parent
self.datasource = None
self.init(node)
self.produce(node)
self.spawn(node)
self.consume(node)
def emit(self, code):
self.context.emit(code)
def emit_no_ln(self, code):
self.context.emit_nonewline(code)
name = 'null'
# each ast node has 3 stages.
# `produce' generates info for child nodes
# `spawn' populates child nodes
# `consume' consumes info from child nodes and finalizes codegen
# For simple operators, there may not be need for some of these stages
def init(self, _):
pass
def produce(self, _):
pass
def spawn(self, _):
pass
def consume(self, _):
pass
# include classes in module as first order operators
def include(objs):
import inspect
for _, cls in inspect.getmembers(objs):
if inspect.isclass(cls) and issubclass(cls, ast_node) and not cls.name.startswith('_'):
ast_node.types[cls.name] = cls

@ -1,9 +1,9 @@
# code-gen for data decl languages # code-gen for data decl languages
from engine.orderby import orderby from common.orderby import orderby
from engine.ast import ColRef, TableInfo, ast_node, Context, include from common.ast import ColRef, TableInfo, ast_node, Context, include
from engine.scan import scan from common.scan import scan
from engine.utils import base62uuid from common.utils import base62uuid
class create_table(ast_node): class create_table(ast_node):
name = 'create_table' name = 'create_table'
@ -110,7 +110,7 @@ class outfile(ast_node):
filename = node['loc']['literal'] if 'loc' in node else node['literal'] filename = node['loc']['literal'] if 'loc' in node else node['literal']
sep = ',' if 'term' not in node else node['term']['literal'] sep = ',' if 'term' not in node else node['term']['literal']
file_pointer = 'fp_' + base62uuid(6) file_pointer = 'fp_' + base62uuid(6)
self.emit(f'FILE* {file_pointer} = fopen("{filename}", "w");') self.emit(f'FILE* {file_pointer} = fopen("{filename}", "wb");')
self.emit(f'{out_table.cxt_name}->printall("{sep}", "\\n", nullptr, {file_pointer});') self.emit(f'{out_table.cxt_name}->printall("{sep}", "\\n", nullptr, {file_pointer});')
self.emit(f'fclose({file_pointer});') self.emit(f'fclose({file_pointer});')
# self.context.headers.add('fstream') # self.context.headers.add('fstream')

@ -0,0 +1,135 @@
from common.ast import ast_node, ColRef
start_expr = 'f"'
index_expr = '{\'\' if x is None and y is None else f\'[{x}]\'}'
end_expr = '"'
class expr(ast_node):
name='expr'
builtin_func_maps = {
'max': 'max',
'min': 'min',
'avg': 'avg',
'sum': 'sum',
'count' : 'count',
'mins': ['mins', 'minw'],
'maxs': ['maxs', 'maxw'],
'avgs': ['avgs', 'avgw'],
'sums': ['sums', 'sumw'],
}
binary_ops = {
'sub':'-',
'add':'+',
'mul':'*',
'div':'/',
'mod':'%',
'and':'&&',
'or':'||',
'xor' : '^',
'gt':'>',
'lt':'<',
'lte':'<=',
'gte':'>=',
'neq':'!=',
'eq':'=='
}
compound_ops = {
'missing' : ['missing', lambda x: f'{x[0]} == nullval<decays<decltype({x[0]})>>'],
}
unary_ops = {
'neg' : '-',
'not' : '!'
}
coumpound_generating_ops = ['avgs', 'mins', 'maxs', 'sums'] + \
list( binary_ops.keys()) + list(compound_ops.keys()) + list(unary_ops.keys() )
def __init__(self, parent, node, materialize_cols = True, abs_col = False):
self.materialize_cols = materialize_cols
self.raw_col = None
self.__abs = abs_col
self.inside_agg = False
if(type(parent) is expr):
self.inside_agg = parent.inside_agg
self.__abs = parent.__abs
ast_node.__init__(self, parent, node, None)
def init(self, _):
from common.projection import projection
parent = self.parent
self.isvector = parent.isvector if type(parent) is expr else False
self.is_compound = parent.is_compound if type(parent) is expr else False
if type(parent) in [projection, expr]:
self.datasource = parent.datasource
else:
self.datasource = self.context.datasource
self.udf_map = parent.context.udf_map
self._expr = ''
self.cexpr = None
self.func_maps = {**self.udf_map, **self.builtin_func_maps}
def produce(self, node):
if type(node) is dict:
for key, val in node.items():
if key in self.func_maps:
# TODO: distinguish between UDF agg functions and other UDF functions.
self.inside_agg = True
self.context.headers.add('"./server/aggregations.h"')
if type(val) is list and len(val) > 1:
cfunc = self.func_maps[key]
cfunc = cfunc[len(val) - 1] if type(cfunc) is list else cfunc
self._expr += f"{cfunc}("
for i, p in enumerate(val):
self._expr += expr(self, p)._expr + (','if i<len(val)-1 else '')
else:
funcname = self.func_maps[key]
funcname = funcname[0] if type(funcname) is list else funcname
self._expr += f"{funcname}("
self._expr += expr(self, val)._expr
self._expr += ')'
self.inside_agg = False
elif key in self.binary_ops:
l = expr(self, val[0])._expr
r = expr(self, val[1])._expr
self._expr += f'({l}{self.binary_ops[key]}{r})'
elif key in self.compound_ops:
x = []
if type(val) is list:
for v in val:
x.append(expr(self, v)._expr)
self._expr = self.compound_ops[key][1](x)
elif key in self.unary_ops:
self._expr += f'{self.unary_ops[key]}({expr(self, val)._expr})'
else:
self.context.Error(f'Undefined expr: {key}{val}')
if key in self.coumpound_generating_ops and not self.is_compound:
self.is_compound = True
p = self.parent
while type(p) is expr and not p.is_compound:
p.is_compound = True
p = p.parent
elif type(node) is str:
p = self.parent
while type(p) is expr and not p.isvector:
p.isvector = True
p = p.parent
self._expr, self.raw_col = self.datasource.parse_col_names(node, self.materialize_cols, True)
self.raw_col = self.raw_col if type(self.raw_col) is ColRef else None
if self.__abs and self.raw_col:
self._expr = self.raw_col.reference() + ("" if self.inside_agg else index_expr)
elif type(node) is bool:
self._expr = '1' if node else '0'
else:
self._expr = f'{node}'
def toCExpr(_expr):
return lambda x = None, y = None : eval(start_expr + _expr + end_expr)
def consume(self, _):
self.cexpr = expr.toCExpr(self._expr)
def __str__(self):
return self.cexpr

@ -1,8 +1,8 @@
from engine.ast import ColRef, TableInfo, ast_node from common.ast import ColRef, TableInfo, ast_node
from engine.orderby import assumption from common.orderby import assumption
from engine.scan import scan from common.scan import scan
from engine.utils import base62uuid from common.utils import base62uuid
from engine.expr import expr from common.expr import expr
class groupby(ast_node): class groupby(ast_node):
name = '_groupby' name = '_groupby'

@ -0,0 +1,6 @@
from common.ast import ast_node
class join(ast_node):
name='join'

@ -1,6 +1,6 @@
from engine.ast import ColRef, TableInfo, View, ast_node, Context from common.ast import ColRef, TableInfo, View, ast_node, Context
from engine.utils import base62uuid, seps from common.utils import base62uuid, seps
from engine.expr import expr from common.expr import expr
class order_item: class order_item:
def __init__(self, name, node, order = True): def __init__(self, name, node, order = True):
@ -43,7 +43,7 @@ class orderby(ast_node):
def merge(self, node): def merge(self, node):
self.produce(node) self.produce(node)
def finialize(self, references): def finalize(self, references):
self.order = [ o for o in self.order if o.name in references ] self.order = [ o for o in self.order if o.name in references ]
def result(self, sep:str = ','): def result(self, sep:str = ','):

@ -1,11 +1,11 @@
from engine.ast import ColRef, TableInfo, ast_node, Context, include from common.ast import ColRef, TableInfo, ast_node, Context, include
from engine.groupby import groupby from common.groupby import groupby
from engine.join import join from common.join import join
from engine.expr import expr from common.expr import expr
from engine.orderby import assumption, orderby from common.orderby import assumption, orderby
from engine.scan import filter from common.scan import filter
from engine.utils import base62uuid, enlist, base62alp, has_other from common.utils import base62uuid, enlist, base62alp, has_other
from engine.ddl import create_table, outfile from common.ddl import create_table, outfile
import copy import copy
class projection(ast_node): class projection(ast_node):

@ -1,7 +1,7 @@
from xmlrpc.client import Boolean from xmlrpc.client import Boolean
from engine.ast import ColRef, TableInfo, View, ast_node, Context from common.ast import ColRef, TableInfo, View, ast_node, Context
from engine.utils import base62uuid from common.utils import base62uuid
from engine.expr import expr from common.expr import expr
class scan(ast_node): class scan(ast_node):
name = 'scan' name = 'scan'

@ -2,16 +2,24 @@ from copy import deepcopy
from typing import Dict, List from typing import Dict, List
from aquery_config import have_hge from aquery_config import have_hge
from engine.utils import base62uuid, defval from common.utils import base62uuid, defval
aquery_types: Dict[str, int] = {}
type_table: Dict[str, "Types"] = {} type_table: Dict[str, "Types"] = {}
with open('server/aquery_types.h', 'r') as f:
f.readline()
types = f.read()
import re
types = re.compile(r'(\s|\\)+').sub('', types).split(',')
aquery_types = { t : i for i, t in enumerate(types)}
class Types: class Types:
def init_any(self): def init_any(self):
self.name : str = 'Any' self.name : str = 'Any'
self.sqlname : str = 'Int' self.sqlname : str = 'Int'
self.cname : str = 'void*' self.cname : str = 'void*'
self.ctype_name : str = "types::NONE" self.ctype_name : str = "None"
self.null_value = 0 self.null_value = 0
self.priority : int= 0 self.priority : int= 0
self.cast_to_dict = dict() self.cast_to_dict = dict()
@ -31,7 +39,7 @@ class Types:
self.name = name self.name = name
self.cname = defval(cname, name.lower() + '_t') self.cname = defval(cname, name.lower() + '_t')
self.sqlname = defval(sqlname, name.upper()) self.sqlname = defval(sqlname, name.upper())
self.ctype_name = defval(ctype_name, f'types::{name.upper()}') self.ctype_name = defval(ctype_name, f'A{name.upper()}')
self.null_value = defval(null_value, 0) self.null_value = defval(null_value, 0)
self.cast_to_dict = defval(cast_to, dict()) self.cast_to_dict = defval(cast_to, dict())
self.cast_from_dict = defval(cast_from, dict()) self.cast_from_dict = defval(cast_from, dict())
@ -89,8 +97,8 @@ class TypeCollection:
type_table = dict() type_table = dict()
AnyT = Types(-1) AnyT = Types(-1)
LazyT = Types(240, name = 'Lazy', cname = '', sqlname = '', ctype_name = '') LazyT = Types(240, name = 'Lazy', cname = '', sqlname = '', ctype_name = '')
DateT = Types(200, name = 'DATE', cname = 'types::date_t', sqlname = 'DATE', ctype_name = 'types::ADATE') DateT = Types(200, name = 'DATE', cname = 'types::date_t', sqlname = 'DATE', ctype_name = 'ADATE')
TimeT = Types(201, name = 'TIME', cname = 'types::time_t', sqlname = 'TIME', ctype_name = 'types::ATIME') TimeT = Types(201, name = 'TIME', cname = 'types::time_t', sqlname = 'TIME', ctype_name = 'ATIME')
TimeStampT = Types(202, name = 'TIMESTAMP', cname = 'types::timestamp_t', sqlname = 'TIMESTAMP', ctype_name = 'ATIMESTAMP') TimeStampT = Types(202, name = 'TIMESTAMP', cname = 'types::timestamp_t', sqlname = 'TIMESTAMP', ctype_name = 'ATIMESTAMP')
DoubleT = Types(17, name = 'double', cname='double', sqlname = 'DOUBLE', is_fp = True) DoubleT = Types(17, name = 'double', cname='double', sqlname = 'DOUBLE', is_fp = True)
LDoubleT = Types(18, name = 'long double', cname='long double', sqlname = 'LDOUBLE', is_fp = True) LDoubleT = Types(18, name = 'long double', cname='long double', sqlname = 'LDOUBLE', is_fp = True)
@ -102,15 +110,15 @@ LongT = Types(4, name = 'int64', sqlname = 'BIGINT', fp_type = DoubleT)
BoolT = Types(0, name = 'bool', cname='bool', sqlname = 'BOOL', long_type=LongT, fp_type=FloatT) BoolT = Types(0, name = 'bool', cname='bool', sqlname = 'BOOL', long_type=LongT, fp_type=FloatT)
ByteT = Types(1, name = 'int8', sqlname = 'TINYINT', long_type=LongT, fp_type=FloatT) ByteT = Types(1, name = 'int8', sqlname = 'TINYINT', long_type=LongT, fp_type=FloatT)
ShortT = Types(2, name = 'int16', sqlname='SMALLINT', long_type=LongT, fp_type=FloatT) ShortT = Types(2, name = 'int16', sqlname='SMALLINT', long_type=LongT, fp_type=FloatT)
IntT = Types(3, name = 'int', cname = 'int', long_type=LongT, fp_type=FloatT) IntT = Types(3, name = 'int', cname = 'int', long_type=LongT, ctype_name = 'AINT32', fp_type=FloatT)
ULongT = Types(8, name = 'uint64', sqlname = 'UINT64', fp_type=DoubleT) ULongT = Types(8, name = 'uint64', sqlname = 'UINT64', fp_type=DoubleT)
UIntT = Types(7, name = 'uint32', sqlname = 'UINT32', long_type=ULongT, fp_type=FloatT) UIntT = Types(7, name = 'uint32', sqlname = 'UINT32', long_type=ULongT, fp_type=FloatT)
UShortT = Types(6, name = 'uint16', sqlname = 'UINT16', long_type=ULongT, fp_type=FloatT) UShortT = Types(6, name = 'uint16', sqlname = 'UINT16', long_type=ULongT, fp_type=FloatT)
UByteT = Types(5, name = 'uint8', sqlname = 'UINT8', long_type=ULongT, fp_type=FloatT) UByteT = Types(5, name = 'uint8', sqlname = 'UINT8', long_type=ULongT, fp_type=FloatT)
StrT = Types(200, name = 'str', cname = 'const char*', sqlname='TEXT', ctype_name = 'types::ASTR') StrT = Types(200, name = 'str', cname = 'string_view', sqlname='TEXT', ctype_name = 'ASTR')
TextT = Types(200, name = 'text', cname = 'const char*', sqlname='TEXT', ctype_name = 'types::ASTR') TextT = Types(200, name = 'text', cname = 'string_view', sqlname='TEXT', ctype_name = 'ASTR')
VarcharT = Types(200, name = 'varchar', cname = 'const char*', sqlname='VARCHAR', ctype_name = 'types::ASTR') VarcharT = Types(200, name = 'varchar', cname = 'string_view', sqlname='VARCHAR', ctype_name = 'ASTR')
VoidT = Types(200, name = 'void', cname = 'void', sqlname='Null', ctype_name = 'types::None') VoidT = Types(200, name = 'void', cname = 'void', sqlname='Null', ctype_name = 'None')
class VectorT(Types): class VectorT(Types):
def __init__(self, inner_type : Types, vector_type:str = 'vector_type'): def __init__(self, inner_type : Types, vector_type:str = 'vector_type'):
@ -290,6 +298,16 @@ def pack_behavior(op: OperatorBase, c_code, *x):
else: else:
return f'decltype({x[0]})::pack({len(x)}, {", ".join([f"{xx}.s()" for xx in x])})' return f'decltype({x[0]})::pack({len(x)}, {", ".join([f"{xx}.s()" for xx in x])})'
def subvec_behavior(op: OperatorBase, c_code, *x):
if len(x) < 1 :
raise ValueError('At least 1 parameters in subvec')
if len(x) == 1:
return f'{x[0]}'
if not c_code:
return f'{op.sqlname}({", ".join([f"{xx}" for xx in x])})'
else:
return f'{x[0]}.subvec({x[1]}{f", {x[2]}" if len(x) == 3 else ""})'
# arithmetic # arithmetic
opadd = OperatorBase('add', 2, auto_extension, cname = '+', sqlname = '+', call = binary_op_behavior) opadd = OperatorBase('add', 2, auto_extension, cname = '+', sqlname = '+', call = binary_op_behavior)
# monetdb wont extend int division to fp type # monetdb wont extend int division to fp type
@ -305,7 +323,7 @@ opor = OperatorBase('or', 2, logical, cname = '||', sqlname = ' OR ', call = bin
opxor = OperatorBase('xor', 2, logical, cname = '^', sqlname = ' XOR ', call = binary_op_behavior) opxor = OperatorBase('xor', 2, logical, cname = '^', sqlname = ' XOR ', call = binary_op_behavior)
opgt = OperatorBase('gt', 2, logical, cname = '>', sqlname = '>', call = binary_op_behavior) opgt = OperatorBase('gt', 2, logical, cname = '>', sqlname = '>', call = binary_op_behavior)
oplt = OperatorBase('lt', 2, logical, cname = '<', sqlname = '<', call = binary_op_behavior) oplt = OperatorBase('lt', 2, logical, cname = '<', sqlname = '<', call = binary_op_behavior)
opge = OperatorBase('gte', 2, logical, cname = '>=', sqlname = '>=', call = binary_op_behavior) opgte = OperatorBase('gte', 2, logical, cname = '>=', sqlname = '>=', call = binary_op_behavior)
oplte = OperatorBase('lte', 2, logical, cname = '<=', sqlname = '<=', call = binary_op_behavior) oplte = OperatorBase('lte', 2, logical, cname = '<=', sqlname = '<=', call = binary_op_behavior)
opneq = OperatorBase('neq', 2, logical, cname = '!=', sqlname = '!=', call = binary_op_behavior) opneq = OperatorBase('neq', 2, logical, cname = '!=', sqlname = '!=', call = binary_op_behavior)
opeq = OperatorBase('eq', 2, logical, cname = '==', sqlname = '=', call = binary_op_behavior) opeq = OperatorBase('eq', 2, logical, cname = '==', sqlname = '=', call = binary_op_behavior)
@ -322,10 +340,12 @@ fnlast = OperatorBase('last', 1, as_is, cname = 'last', sqlname = 'LAST', call =
fnfirst = OperatorBase('first', 1, as_is, cname = 'frist', sqlname = 'FRIST', call = fn_behavior) fnfirst = OperatorBase('first', 1, as_is, cname = 'frist', sqlname = 'FRIST', call = fn_behavior)
#fnsum = OperatorBase('sum', 1, ext(auto_extension), cname = 'sum', sqlname = 'SUM', call = fn_behavior) #fnsum = OperatorBase('sum', 1, ext(auto_extension), cname = 'sum', sqlname = 'SUM', call = fn_behavior)
#fnavg = OperatorBase('avg', 1, fp(ext(auto_extension)), cname = 'avg', sqlname = 'AVG', call = fn_behavior) #fnavg = OperatorBase('avg', 1, fp(ext(auto_extension)), cname = 'avg', sqlname = 'AVG', call = fn_behavior)
fnmedian = OperatorBase('median', 1, as_is, cname = 'median', sqlname = 'MEDIAN', call = fn_behavior)
fnsum = OperatorBase('sum', 1, long_return, cname = 'sum', sqlname = 'SUM', call = fn_behavior) fnsum = OperatorBase('sum', 1, long_return, cname = 'sum', sqlname = 'SUM', call = fn_behavior)
fnavg = OperatorBase('avg', 1, lfp_return, cname = 'avg', sqlname = 'AVG', call = fn_behavior) fnavg = OperatorBase('avg', 1, lfp_return, cname = 'avg', sqlname = 'AVG', call = fn_behavior)
fnvar = OperatorBase('var', 1, lfp_return, cname = 'var', sqlname = 'VAR_POP', call = fn_behavior) fnvar = OperatorBase('var', 1, lfp_return, cname = 'var', sqlname = 'VAR', call = fn_behavior)
fnstd = OperatorBase('stddev', 1, lfp_return, cname = 'stddev', sqlname = 'STDDEV_POP', call = fn_behavior) fnstd = OperatorBase('stddev', 1, lfp_return, cname = 'stddev', sqlname = 'STDDEV', call = fn_behavior)
fncorr = OperatorBase('corr', 1, lfp_return, cname = 'corr', sqlname = 'CORR', call = fn_behavior)
fnmaxs = OperatorBase('maxs', [1, 2], ty_clamp(as_is, -1), cname = 'maxs', sqlname = 'MAXS', call = windowed_fn_behavor) fnmaxs = OperatorBase('maxs', [1, 2], ty_clamp(as_is, -1), cname = 'maxs', sqlname = 'MAXS', call = windowed_fn_behavor)
fnmins = OperatorBase('mins', [1, 2], ty_clamp(as_is, -1), cname = 'mins', sqlname = 'MINS', call = windowed_fn_behavor) fnmins = OperatorBase('mins', [1, 2], ty_clamp(as_is, -1), cname = 'mins', sqlname = 'MINS', call = windowed_fn_behavor)
fnsums = OperatorBase('sums', [1, 2], ext(ty_clamp(auto_extension, -1)), cname = 'sums', sqlname = 'SUMS', call = windowed_fn_behavor) fnsums = OperatorBase('sums', [1, 2], ext(ty_clamp(auto_extension, -1)), cname = 'sums', sqlname = 'SUMS', call = windowed_fn_behavor)
@ -334,6 +354,7 @@ fnvars = OperatorBase('vars', [1, 2], fp(ext(ty_clamp(auto_extension, -1))), cna
fnstds = OperatorBase('stddevs', [1, 2], fp(ext(ty_clamp(auto_extension, -1))), cname = 'stddevs', sqlname = 'STDDEVS', call = windowed_fn_behavor) fnstds = OperatorBase('stddevs', [1, 2], fp(ext(ty_clamp(auto_extension, -1))), cname = 'stddevs', sqlname = 'STDDEVS', call = windowed_fn_behavor)
fncnt = OperatorBase('count', 1, int_return, cname = 'count', sqlname = 'COUNT', call = count_behavior) fncnt = OperatorBase('count', 1, int_return, cname = 'count', sqlname = 'COUNT', call = count_behavior)
fnpack = OperatorBase('pack', -1, pack_return, cname = 'pack', sqlname = 'PACK', call = pack_behavior) fnpack = OperatorBase('pack', -1, pack_return, cname = 'pack', sqlname = 'PACK', call = pack_behavior)
fnsubvec = OperatorBase('subvec', [1, 2, 3], ty_clamp(as_is, 0, 1), cname = 'subvec', sqlname = 'SUBVEC', call = subvec_behavior)
# special # special
def is_null_call_behavior(op:OperatorBase, c_code : bool, x : str): def is_null_call_behavior(op:OperatorBase, c_code : bool, x : str):
if c_code : if c_code :
@ -355,19 +376,27 @@ fnpow = OperatorBase('pow', 2, lambda *_ : DoubleT, cname = 'pow', sqlname = 'PO
# type collections # type collections
def _op_make_dict(*items : OperatorBase): def _op_make_dict(*items : OperatorBase):
return { i.name: i for i in items} return { i.name: i for i in items}
#binary op
builtin_binary_arith = _op_make_dict(opadd, opdiv, opmul, opsub, opmod) builtin_binary_arith = _op_make_dict(opadd, opdiv, opmul, opsub, opmod)
builtin_binary_logical = _op_make_dict(opand, opor, opxor, opgt, oplt, builtin_binary_logical = _op_make_dict(opand, opor, opxor, opgt, oplt,
opge, oplte, opneq, opeq) opgte, oplte, opneq, opeq)
builtin_binary_ops = {**builtin_binary_arith, **builtin_binary_logical}
#unary op
builtin_unary_logical = _op_make_dict(opnot) builtin_unary_logical = _op_make_dict(opnot)
builtin_unary_arith = _op_make_dict(opneg) builtin_unary_arith = _op_make_dict(opneg)
builtin_unary_special = _op_make_dict(spnull, opdistinct) builtin_unary_special = _op_make_dict(spnull, opdistinct)
# functions
builtin_cstdlib = _op_make_dict(fnsqrt, fnlog, fnsin, fncos, fntan, fnpow) builtin_cstdlib = _op_make_dict(fnsqrt, fnlog, fnsin, fncos, fntan, fnpow)
builtin_func = _op_make_dict(fnmax, fnmin, fnsum, fnavg, fnmaxs, builtin_aggfunc = _op_make_dict(fnmedian, fnsubvec, fnmax, fnmin, fnsum, fnavg,
fnmins, fndeltas, fnratios, fnlast, fnlast, fnfirst, fncnt, fnvar, fnstd, fncorr)
fnfirst, fnsums, fnavgs, fncnt, builtin_vecfunc = _op_make_dict(fnmaxs,
fnpack, fntrunc, fnprev, fnnext, fnmins, fndeltas, fnratios, fnsums, fnavgs,
fnvar, fnvars, fnstd, fnstds) fnpack, fntrunc, fnprev, fnnext, fnvars, fnstds)
builtin_vecfunc = {**builtin_vecfunc, **builtin_cstdlib}
builtin_func = {**builtin_vecfunc, **builtin_aggfunc}
user_module_func = {} user_module_func = {}
builtin_operators : Dict[str, OperatorBase] = {**builtin_binary_arith, **builtin_binary_logical, builtin_operators : Dict[str, OperatorBase] = {**builtin_binary_arith, **builtin_binary_logical,
**builtin_unary_arith, **builtin_unary_logical, **builtin_unary_special, **builtin_func, **builtin_cstdlib, **builtin_unary_arith, **builtin_unary_logical, **builtin_unary_special, **builtin_func, **builtin_cstdlib,
**user_module_func} **user_module_func}

@ -8,7 +8,7 @@ nums = '0123456789'
base62alp = nums + lower_alp + upper_alp base62alp = nums + lower_alp + upper_alp
reserved_monet = ['month'] reserved_monet = ['month']
session_context = None
class CaseInsensitiveDict(MutableMapping): class CaseInsensitiveDict(MutableMapping):
def __init__(self, data=None, **kwargs): def __init__(self, data=None, **kwargs):
@ -130,16 +130,21 @@ class _Counter:
import re import re
ws = re.compile(r'\s+') ws = re.compile(r'\s+')
import os def encode_integral(val : int):
return val.to_bytes(4, 'little').decode('latin-1')
import os
def add_dll_dir(dll: str): def add_dll_dir(dll: str):
import sys import sys
if sys.version_info.major >= 3 and sys.version_info.minor >7 and os.name == 'nt': try:
os.add_dll_directory(dll) if sys.version_info.major >= 3 and sys.version_info.minor >7 and os.name == 'nt':
else: os.add_dll_directory(dll)
os.environ['PATH'] = os.path.abspath(dll) + os.pathsep + os.environ['PATH'] else:
os.environ['PATH'] = os.path.abspath(dll) + os.pathsep + os.environ['PATH']
except FileNotFoundError:
print(f"Error: path not found")
nullstream = open(os.devnull, 'w') nullstream = open(os.devnull, 'w')
@ -157,4 +162,52 @@ def get_innermost(sl):
elif sl and type(sl) is list: elif sl and type(sl) is list:
return get_innermost(sl[0]) return get_innermost(sl[0])
else: else:
return sl return sl
def send_to_server(payload : str):
from prompt import PromptState
cxt : PromptState = session_context
if cxt is None:
raise RuntimeError("Error! no session specified.")
else:
from ctypes import c_char_p
cxt.payload = (c_char_p*1)(c_char_p(bytes(payload, 'utf-8')))
cxt.cfg.has_dll = 0
cxt.send(1, cxt.payload)
cxt.set_ready()
def get_storedproc(name : str):
from prompt import PromptState, StoredProcedure
cxt : PromptState = session_context
if cxt is None:
raise RuntimeError("Error! no session specified.")
else:
ret : StoredProcedure = cxt.get_storedproc(bytes(name, 'utf-8'))
if (
ret.name and
ret.name.decode('utf-8') != name
):
print(f'Procedure {name} mismatch in server {ret.name.value}')
return None
else:
return ret
def execute_procedure(proc):
pass
import enum
class Backend_Type(enum.Enum):
BACKEND_AQuery = 0
BACKEND_MonetDB = 1
BACKEND_MariaDB = 2
BACKEND_DuckDB = 3
BACKEND_SQLite = 4
BACKEND_TOTAL = 5
backend_strings = {
'aquery': Backend_Type.BACKEND_AQuery,
'monetdb': Backend_Type.BACKEND_MonetDB,
'mariadb': Backend_Type.BACKEND_MariaDB,
'duckdb': Backend_Type.BACKEND_DuckDB,
'sqlite': Backend_Type.BACKEND_SQLite,
}

@ -0,0 +1,53 @@
import os
payload = ('''\
(C) Bill Sun 2022 - 2023
All rights reserved. (or some other license stuff)
''' ).strip().split('\n')
comment_factory = lambda mark, enclosure = '': (f'''\
{enclosure}{mark}
{mark} {f'{chr(10)}{mark} '.join(payload)}
{mark}{enclosure}\n
''' ).encode()
py_payload = comment_factory('#')
c_payload = comment_factory('*', '/')
curr = ['.']
while curr:
next = []
for dir in curr:
items = os.listdir(dir)
for file in items:
fullpath = f'{dir}{os.sep}{file}'
if os.path.isdir(fullpath):
next.append(fullpath)
else:
def write_to_file(payload: str):
with open(fullpath, 'rb+') as f:
content = f.read()
if not content.startswith(payload):
f.seek(0)
f.write(payload + content)
print('processed', fullpath)
else:
print('not processed', fullpath)
if (
file.lower() == 'makefile' or
file.lower() == 'dockerfile' or
'.' in file and
file[file.rfind('.') + 1:].lower()
in
['py', 'sh']
):
write_to_file(py_payload)
elif (
'.' in file and
file[file.rfind('.') + 1:].lower()
in
['cc', 'c', 'cpp', 'cxx', 'hpp', 'h']
):
write_to_file(c_payload)
curr = next

@ -52,7 +52,7 @@ int gen_trade_data(int argc, char* argv[])
printf("No parameter supplied. Use default frac=%f, N=%d? [Y/n]\n", frac, N); printf("No parameter supplied. Use default frac=%f, N=%d? [Y/n]\n", frac, N);
char buf[4096]; fgets(buf, 4095, stdin); char buf[4096]; fgets(buf, 4095, stdin);
if((buf[0] != 'y' && buf[0] !='Y') && buf[0] != '\n') { if((buf[0] != 'y' && buf[0] !='Y') && buf[0] != '\n') {
const auto &getParams = [&](){ puts("Type: frac N [ENTER]"); const auto &getParams = [&](){ puts("Type: frac N n_rows [ENTER]");
for(int i = 0; i < 4096; ++i) if(buf[i] == '\n') {buf[i] = 0; break;} for(int i = 0; i < 4096; ++i) if(buf[i] == '\n') {buf[i] = 0; break;}
char* _buf = buf; frac = getFloat(_buf); N = getInt(_buf); n_rows=getInt(_buf); char* _buf = buf; frac = getFloat(_buf); N = getInt(_buf); n_rows=getInt(_buf);
}; getParams(); }; getParams();
@ -80,7 +80,7 @@ int gen_trade_data(int argc, char* argv[])
memmove(p + lens[i], p + lens[0], (lens[i - 1] - lens[i]) * sizeof(int)); memmove(p + lens[i], p + lens[0], (lens[i - 1] - lens[i]) * sizeof(int));
permutation(p, lens[0] + N); permutation(p, lens[0] + N);
// for (int i = 0; i < lens[0] + N; ++i) printf("%d ", p[i]); // for (int i = 0; i < lens[0] + N; ++i) printf("%d ", p[i]);
FILE* fp = fopen("trade.csv", "w"); FILE* fp = fopen("trade.csv", "wb");
int* last_price = new int[N]; int* last_price = new int[N];
memset(last_price, -1, sizeof(int) * N); memset(last_price, -1, sizeof(int) * N);
fprintf(fp, "stocksymbol, time, quantity, price\n"); fprintf(fp, "stocksymbol, time, quantity, price\n");
@ -102,7 +102,7 @@ int gen_trade_data(int argc, char* argv[])
current_price = new_price; current_price = new_price;
} }
d[i]= {ss, i+1, (int)g_quantity(engine), current_price}; d[i]= {ss, i+1, (int)g_quantity(engine), current_price};
fprintf(fp, "s%d, %d, %d, %d\n", d[i].ss, d[i].t, d[i].q, d[i].p); fprintf(fp, "%d, %d, %d, %d\n", d[i].ss, d[i].t, d[i].q, d[i].p);
last_price[ss - 1] = current_price; last_price[ss - 1] = current_price;
} }
fclose(fp); fclose(fp);
@ -131,7 +131,7 @@ int gen_stock_data(int argc, char* argv[]){
} }
IDs[n_stocks] = "S"; IDs[n_stocks] = "S";
names[n_stocks] = "x"; names[n_stocks] = "x";
FILE* fp = fopen("./data/stock.csv", "w"); FILE* fp = fopen("./data/stock.csv", "wb");
fprintf(fp, "ID, timestamp, tradeDate, price\n"); fprintf(fp, "ID, timestamp, tradeDate, price\n");
char date_str_buf [types::date_t::string_length()]; char date_str_buf [types::date_t::string_length()];
int* timestamps = new int[n_data]; int* timestamps = new int[n_data];
@ -142,14 +142,15 @@ int gen_stock_data(int argc, char* argv[]){
fprintf(fp, "%s,%d,%s,%d\n", IDs[ui(engine)%(n_stocks + 1)].c_str(), timestamps[i], date, ui(engine) % 1000); fprintf(fp, "%s,%d,%s,%d\n", IDs[ui(engine)%(n_stocks + 1)].c_str(), timestamps[i], date, ui(engine) % 1000);
} }
fclose(fp); fclose(fp);
fp = fopen("./data/base.csv", "w"); fp = fopen("./data/base.csv", "wb");
fprintf(fp, "ID, name\n"); fprintf(fp, "ID, name\n");
for(int i = 0; i < n_stocks + 1; ++ i){ for(int i = 0; i < n_stocks + 1; ++ i){
fprintf(fp, "%s,%s\n", IDs[i].c_str(), names[i].c_str()); fprintf(fp, "%s,%s\n", IDs[i].c_str(), names[i].c_str());
} }
fclose(fp); fclose(fp);
return 0;
} }
int main(int argc, char* argv[]){ int main(int argc, char* argv[]){
return gen_stock_data(argc, argv); return gen_trade_data(argc, argv);
} }

@ -0,0 +1,9 @@
all:
$(CXX) -include ../server/pch.hpp putdata.cpp ../libaquery.a -shared -fPIC --std=c++2a -Ofast -DNDEBUG -fno-stack-protector -march=native -DTHREADING -D__AQUERY_ITC_USE_SEMPH__ -I/usr/local/opt/monetdb/include/monetdb -flto -s -fno-semantic-interposition -L/usr/local/opt/monetdb/lib -lmonetdbe -lmonetdbsql -lbat -o ../procedures/demoi0.so
$(CXX) -include ../server/pch.hpp action.cpp ../libaquery.a -shared -fPIC --std=c++2a -Ofast -DNDEBUG -fno-stack-protector -march=native -DTHREADING -D__AQUERY_ITC_USE_SEMPH__ -I/usr/local/opt/monetdb/include/monetdb -flto -s -fno-semantic-interposition -L/usr/local/opt/monetdb/lib -lmonetdbe -lmonetdbsql -lbat -o ../procedures/democa0.so
$(CXX) -include ../server/pch.hpp query.cpp ../libaquery.a -shared -fPIC --std=c++2a -Ofast -DNDEBUG -fno-stack-protector -march=native -DTHREADING -D__AQUERY_ITC_USE_SEMPH__ -I/usr/local/opt/monetdb/include/monetdb -flto -s -fno-semantic-interposition -L/usr/local/opt/monetdb/lib -lmonetdbe -lmonetdbsql -lbat -o ../procedures/democq0.so
dbg:
$(CXX) -include ../server/pch.hpp putdata.cpp -g3 -march=native ../libaquery.a -shared -fPIC --std=c++2a -D_DEBUG -DTHREADING -D__AQUERY_ITC_USE_SEMPH__ -I/usr/local/opt/monetdb/include/monetdb -L/usr/local/opt/monetdb/lib -lmonetdbe -lmonetdbsql -lbat -o ../procedures/demoi0.so
$(CXX) -include ../server/pch.hpp action.cpp -g3 -march=native ../libaquery.a -shared -fPIC --std=c++2a -D_DEBUG -DTHREADING -D__AQUERY_ITC_USE_SEMPH__ -I/usr/local/opt/monetdb/include/monetdb -L/usr/local/opt/monetdb/lib -lmonetdbe -lmonetdbsql -lbat -o ../procedures/democa0.so
$(CXX) -include ../server/pch.hpp query.cpp -g3 -march=native ../libaquery.a -shared -fPIC --std=c++2a -D_DEBUG -DTHREADING -D__AQUERY_ITC_USE_SEMPH__ -I/usr/local/opt/monetdb/include/monetdb -L/usr/local/opt/monetdb/lib -lmonetdbe -lmonetdbsql -lbat -o ../procedures/democq0.so

@ -0,0 +1,15 @@
ifeq ($(PYTHON_EXEC),)
PYTHON_EXEC=python3
endif
PYTHON_CONFIG=$(PYTHON_EXEC)-config
PYTHON_LDFLAGS=$(shell echo `$(PYTHON_CONFIG) --ldflags` | sed 's/^-L\([^[:space:]]*\).*$$/\1/')/lib$(PYTHON_EXEC)*.a
all:
rm ../../libdraw.so; \
$(CXX) --shared app2.cpp --std=c++2a -I../.. -I/usr/local/opt/matplotplusplus/include -L../.. -L/usr/local/opt/matplotplusplus/lib/ -lmatplot -laquery -o ../../libdraw.so
py:
rm ../../libdraw.so; \
$(CXX) --shared app.cpp --std=c++2a -I../.. `$(PYTHON_CONFIG) --cflags` -O0 -g3 $(PYTHON_LDFLAGS) ../../libaquery.a -o ../../libdraw.so
exe:
$(CXX) app.cpp --std=c++2a -I../.. `$(PYTHON_CONFIG) --cflags` -O0 -g3 $(PYTHON_LDFLAGS) ../../libaquery.a -o a.out
info:
$(info $(PYTHON_LDFLAGS))

@ -0,0 +1,47 @@
#include <Python.h>
#include <random>
#include <ctime>
#include <cstdio>
#include "server/vector_type.hpp"
#include "sdk/aquery.h"
#include "unistd.h"
__AQEXPORT__(bool)
draw(vector_type<int> x, vector_type<int> y) {
puts("sibal!");
auto pid = fork();
int status = 0;
if (pid == 0) {
//PyOS_AfterFork();
Py_InitializeEx
Py_Initialize();
PyRun_SimpleString("print(globals())");
PyRun_SimpleString("import os");
PyRun_SimpleString("sys.path.append(os.getcwd()+'/demo/Python Integration')");
//PyErr_Print();
PyRun_SimpleString("print('fuck')");
auto py_strapp = PyUnicode_DecodeFSDefault("app");
auto py_module = PyImport_Import(py_strapp);
// Py_DECREF(py_strapp);
auto py_entrypt = PyObject_GetAttrString(py_module, "draw");
auto mvx = PyMemoryView_FromMemory((char*)x.container, x.size * sizeof(int), PyBUF_WRITE),
mvy = PyMemoryView_FromMemory((char*)y.container, y.size * sizeof(int), PyBUF_WRITE);
PyObject_CallObject(py_entrypt, PyTuple_Pack(2, mvx, mvy));
// Py_DECREF(mvx);
// Py_DECREF(mvy);
// Py_DECREF(py_entrypt);
// Py_DECREF(py_module);
return 0;
}
else {
while(wait(&status) > 0);
//getchar();
}
return true;
//return Py_FinalizeEx() >= 0;
}
int main(){
draw({1,2,3}, {4,5,6});
}

@ -0,0 +1,9 @@
import matplotlib.pyplot as plt
def draw(x, y):
plt.figure()
plt.plot(x.cast('i', shape=(len(x)//4, )),
y.cast('i', shape=(len(y)//4, )))
plt.show()

@ -0,0 +1,12 @@
#include "server/vector_type.hpp"
#include "sdk/aquery.h"
#include "matplot/matplot.h"
__AQEXPORT__(bool)
draw(vector_type<int> x, vector_type<int> y) {
using namespace matplot;
auto plt = gca();
plt->plot(vector_type_std{x}, vector_type_std{y});
show();
return true;
}

@ -0,0 +1,28 @@
# Triggers Demo
This folder contains a demo workflow for the two types of triggers.
- An interval-based trigger will be set up to execute a stored procedure `demoi` defined in [demo/putdata.cpp](/demo/putdata.cpp) that inserts a .csv file from `data/electricity` to the table `source` every 5 seconds.
- A Conditional trigger will be triggered by condition `democq` defined in [demo/query.cpp](/demo/query.cpp) that checks and returns true when more than 200 rows of data are inserted into table `source`. Once triggered, it will execute a stored procedure `democa` defined in [demo/democa.cpp](/demo/action.cpp) that trains the incremental random forest by the new data.
- See [demo/prep.a](/demo/prep.a) for parameters of the random forest.
## Run the demo
### Preparation
- Preprocess data
- Put `electricity` dataset to `/data/electricity_orig`
- Run `python3 rfdata_preproc.py` to generate .csv files to `data/electricity/`
- Use [demo/setup.sh](/demo/setup.sh) to
- setup stored procedures for this demo
- compile random forest user module used in this demo
- compile queries used in this demo
### Running the demo
- Run AQuery prompt `python3 prompt.py`
- Use Automated AQuery script in [demo/demo.aquery](/demo/demo.aquery) to execute the workflow. It does the following things in order:
- Register user module, create a new random forest by running [`f demo/prep.a`](/demo/prep.a)
- Register stored procedures.
- Create an Interval-based Trigger that executes payload `demoi` every 5 seconds
- Create a Conditional Trigger that executes payload `democa` whenever condition `democq` returns a true. While condition `democq` is tested every time new data is inserted to table `source`.
- Loads test data by running [demo/test.a](/demo/test.a)
- Use query `select predict(x) from test` to get predictions of the test data from current random forest.
- In AQuery prompt, an extra `exec` command after the query is needed to execute the query.
- Use query `select test(x, y) from test` will also calculate l2 error.

@ -0,0 +1,30 @@
#include "../server/libaquery.h"
#ifndef __AQ_USE_THREADEDGC__
#include "../server/gc.h"
__AQEXPORT__(void) __AQ_Init_GC__(Context* cxt) {
GC::gc_handle = static_cast<GC*>(cxt->gc);
GC::scratch_space = nullptr;
}
#else // __AQ_USE_THREADEDGC__
#define __AQ_Init_GC__(x)
#endif // __AQ_USE_THREADEDGC__
bool (*fit_inc)(vector_type<vector_type<double>> X, vector_type<int64_t> y) = nullptr;
#include "../server/monetdb_conn.h"
__AQEXPORT__(int) action(Context* cxt) {
using namespace std;
using namespace types;
if (fit_inc == nullptr)
fit_inc = (decltype(fit_inc))(cxt->get_module_function("fit_inc"));
auto server = reinterpret_cast<DataSource*>(cxt->alt_server);
auto len = uint32_t(monetdbe_get_size(*((void**)server->server), "source"));
auto x_1bN = ColRef<vector_type<double>>(len, monetdbe_get_col(*((void**)(server->server)), "source", 0));
auto y_6uX = ColRef<int64_t>(len, monetdbe_get_col(*((void**)(server->server)), "source", 1));
fit_inc(x_1bN, y_6uX);
puts("action done.");
return 0;
}

@ -0,0 +1,17 @@
#!aquery
f demo/prep.a
exec
procedure demoi load
procedure democq load
procedure democa load
create trigger t action demoi interval 5000
exec
create trigger c on source action democa when democq
exec
f demo/test.a
exec

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -0,0 +1,14 @@
create table source(x vecdouble, y int64);
LOAD MODULE FROM "./libirf.so" FUNCTIONS (
newtree(height:int, f:int64, sparse:vecint, forget:double, noclasses:int64, e:int) -> bool,
fit_inc(X:vecvecdouble, y:vecint64) -> bool,
predict(X:vecvecdouble) -> vecint ,
test(X:vecvecdouble, y:vecint64) -> double
);
create table elec_sparse(v int);
insert into elec_sparse values (0), (1), (1), (1), (1), (1), (1);
select newtree(30, 7, elec_sparse.v, 0.3, 2, 1) from elec_sparse

@ -0,0 +1,47 @@
#include "../server/libaquery.h"
#ifndef __AQ_USE_THREADEDGC__
#include "../server/gc.h"
__AQEXPORT__(void) __AQ_Init_GC__(Context* cxt) {
GC::gc_handle = static_cast<GC*>(cxt->gc);
GC::scratch_space = nullptr;
}
#else // __AQ_USE_THREADEDGC__
#define __AQ_Init_GC__(x)
#endif // __AQ_USE_THREADEDGC__
#include "../server/monetdb_conn.h"
#include "../csv.h"
__AQEXPORT__(int) ld(Context* cxt) {
using namespace std;
using namespace types;
static int cnt = 0;
if (cnt > 700)
return 1;
else
++cnt;
char data_name[] = "data/electricity/electricity ";
auto server = static_cast<DataSource*>(cxt->alt_server);
const char* names_fZrv[] = {"x", "y"};
auto tbl_6erF = new TableInfo<vector_type<double>,int64_t>("source", names_fZrv);
decltype(auto) c_31ju0e = tbl_6erF->get_col<0>();
decltype(auto) c_4VlzrR = tbl_6erF->get_col<1>();
c_31ju0e.init("x");
c_4VlzrR.init("y");
auto nxt = to_text(data_name + 28, cnt);
memcpy(nxt, ".csv", 5);
puts(data_name);
AQCSVReader<2, ',', ';'> csv_reader_7g0GY7(data_name);
csv_reader_7g0GY7.next_line();
vector_type<double> tmp_5XMNcBz5;
int64_t tmp_5dAHIJ1d;
while(csv_reader_7g0GY7.read_row(tmp_5XMNcBz5,tmp_5dAHIJ1d)) {
c_31ju0e.emplace_back(tmp_5XMNcBz5);
c_4VlzrR.emplace_back(tmp_5dAHIJ1d);
}
tbl_6erF->monetdb_append_table(cxt->alt_server, "source");
return 0;
}

@ -0,0 +1,30 @@
#include "../server/libaquery.h"
#ifndef __AQ_USE_THREADEDGC__
#include "../server/gc.h"
__AQEXPORT__(void) __AQ_Init_GC__(Context* cxt) {
GC::gc_handle = static_cast<GC*>(cxt->gc);
GC::scratch_space = nullptr;
}
#else // __AQ_USE_THREADEDGC__
#define __AQ_Init_GC__(x)
#endif // __AQ_USE_THREADEDGC__
#include "../server/monetdb_conn.h"
__AQEXPORT__(int) query(Context* cxt) {
using namespace std;
using namespace types;
auto server = static_cast<DataSource*>(cxt->alt_server);
static uint32_t old_sz = 0;
constexpr static uint32_t min_delta = 200;
auto newsz = monetdbe_get_size(*(void**) server->server, "source");
if (newsz > old_sz + min_delta) {
puts("query true.");
old_sz = uint32_t(newsz);
return 1;
}
puts("query false.");
return 0;
}

@ -0,0 +1,6 @@
#!/bin/sh
make -C ../sdk irf
mkdir ../procedures
cp demo*.aqp ../procedures
make

@ -0,0 +1,7 @@
-- select predict(x) from test;
-- select test(x, y) from test;
create table test(x vecdouble, y int64);
load complex data infile "data/electricity/electricity872.csv" into table test fields terminated by ',' element terminated by ';';

File diff suppressed because one or more lines are too long

@ -0,0 +1 @@
Subproject commit 6d4c91d015dfc6ffef48cf5a1f1e92fb192cc234

@ -0,0 +1,21 @@
import urllib.request
import zipfile
from aquery_config import os_platform
from os import remove
version = '0.8.1'
duckdb_os = 'windows' if os_platform == 'windows' else 'osx' if os_platform == 'darwin' else 'linux'
duckdb_plat = 'i386'
if duckdb_os == 'darwin':
duckdb_plat = 'universal'
else:
duckdb_plat = 'amd64'
duckdb_pkg = f'libduckdb-{duckdb_os}-{duckdb_plat}.zip'
# urllib.request.urlretrieve(f"https://github.com/duckdb/duckdb/releases/latest/download/{duckdb_pkg}", duckdb_pkg)
urllib.request.urlretrieve(f"https://github.com/duckdb/duckdb/releases/download/v{version}/{duckdb_pkg}", duckdb_pkg)
with zipfile.ZipFile(duckdb_pkg, 'r') as duck:
duck.extractall('deps')
remove(duckdb_pkg)

@ -8,4 +8,8 @@
## 2. ColRef supports multiple objects ## 2. ColRef supports multiple objects
- A.a = B.b then in projection A.a B.b will refer to same projection - A.a = B.b then in projection A.a B.b will refer to same projection
- Colref::ProjEq(ColRef v) => this == v or v in this.proj_eqs - Colref::ProjEq(ColRef v) => this == v or v in this.proj_eqs
## 3. External Optimizing Tools
- mold/sold instead of builtin linker will boost linker speed
- bolt that have binary optimizer

@ -1,37 +1,37 @@
from engine.ast import Context, ast_node from engine.ast import Context, ast_node
saved_cxt = None saved_cxt = None
def initialize(cxt = None, keep = False): def initialize(cxt = None, keep = False):
global saved_cxt global saved_cxt
if cxt is None or not keep or type(cxt) is not Context: if cxt is None or not keep or type(cxt) is not Context:
if saved_cxt is None or not keep: if saved_cxt is None or not keep:
cxt = Context() cxt = Context()
saved_cxt = cxt saved_cxt = cxt
else: else:
cxt = saved_cxt cxt = saved_cxt
cxt.new() cxt.new()
return cxt return cxt
def generate(ast, cxt): def generate(ast, cxt):
for k in ast.keys(): for k in ast.keys():
if k in ast_node.types.keys(): if k in ast_node.types.keys():
root = ast_node.types[k](None, ast, cxt) ast_node.types[k](None, ast, cxt)
def exec(stmts, cxt = None, keep = None): def exec(stmts, cxt = None, keep = False, parser = None):
if 'stmts' not in stmts:
return
cxt = initialize(cxt, keep) cxt = initialize(cxt, keep)
cxt.parser = parser
stmts_stmts = stmts['stmts'] stmts_stmts = stmts['stmts']
if type(stmts_stmts) is list: if type(stmts_stmts) is list:
for s in stmts_stmts: for s in stmts_stmts:
generate(s, cxt) generate(s, cxt)
else: else:
generate(stmts_stmts, cxt) generate(stmts_stmts, cxt)
for q in cxt.queries:
cxt.Info(cxt.ccode) if not q.startswith('O'):
with open('out.cpp', 'wb') as outfile: cxt.print(q.strip())
outfile.write((cxt.finalize()).encode('utf-8'))
return cxt return cxt
__all__ = ["initialize", "generate", "exec", "saved_cxt"] __all__ = ["initialize", "generate", "exec", "saved_cxt"]

File diff suppressed because it is too large Load Diff

@ -1,135 +1,487 @@
from engine.ast import ast_node, ColRef from typing import Optional, Set
start_expr = 'f"'
index_expr = '{\'\' if x is None and y is None else f\'[{x}]\'}' from common.types import *
end_expr = '"' from engine.ast import ast_node
from engine.storage import ColRef, Context
from common.utils import Backend_Type
# TODO: Decouple expr and upgrade architecture
# C_CODE : get ccode/sql code?
# projections : C/SQL/decltype string
# orderby/joins/where : SQL only
# assumption/groupby : C/sql
# is_udfexpr: C only
class expr(ast_node): class expr(ast_node):
name='expr' name='expr'
builtin_func_maps = { valid_joincond = {
'max': 'max', 0 : ('and', 'eq', 'not'),
'min': 'min', 1 : ('or', 'neq', 'not'),
'avg': 'avg', 2 : ('', '', '')
'sum': 'sum',
'count' : 'count',
'mins': ['mins', 'minw'],
'maxs': ['maxs', 'maxw'],
'avgs': ['avgs', 'avgw'],
'sums': ['sums', 'sumw'],
} }
@property
def udf_decltypecall(self):
return self._udf_decltypecall if self._udf_decltypecall else self.sql
binary_ops = { @udf_decltypecall.setter
'sub':'-', def udf_decltypecall(self, val):
'add':'+', self._udf_decltypecall = val
'mul':'*',
'div':'/',
'mod':'%',
'and':'&&',
'or':'||',
'xor' : '^',
'gt':'>',
'lt':'<',
'lte':'<=',
'gte':'>=',
'neq':'!=',
'eq':'=='
}
compound_ops = {
'missing' : ['missing', lambda x: f'{x[0]} == nullval<decays<decltype({x[0]})>>'],
}
unary_ops = {
'neg' : '-',
'not' : '!'
}
coumpound_generating_ops = ['avgs', 'mins', 'maxs', 'sums'] + \ @property
list( binary_ops.keys()) + list(compound_ops.keys()) + list(unary_ops.keys() ) def need_decltypestr(self):
return self._udf_decltypecall is not None
def __init__(self, parent, node, *, c_code = None, supress_undefined = False):
from engine.ast import projection, udf
def __init__(self, parent, node, materialize_cols = True, abs_col = False): # gen2 expr have multi-passes
self.materialize_cols = materialize_cols # first pass parse json into expr tree
# generate target code in later passes upon need
self.children = []
self.opname = ''
self.curr_code = ''
self.counts = {}
self.type = None
self.raw_col = None self.raw_col = None
self.__abs = abs_col self.udf : Optional[udf] = None
self.inside_agg = False self.inside_agg = False
self.is_special = False
self.is_ColExpr = False
self.is_recursive_call_inudf = False
self.codlets : list = []
self.codebuf : Optional[str] = None
self._udf_decltypecall = None
self.node = node
self.supress_undefined = supress_undefined
if(type(parent) is expr): if(type(parent) is expr):
self.next_valid = parent.next_valid
self.inside_agg = parent.inside_agg self.inside_agg = parent.inside_agg
self.__abs = parent.__abs self.is_udfexpr = parent.is_udfexpr
self.is_agg_func = parent.is_agg_func
self.root : expr = parent.root
self.c_code = parent.c_code
self.builtin_vars = parent.builtin_vars
else:
self.join_conditions = []
self.next_valid = 0
self.is_agg_func = False
self.is_udfexpr = type(parent) is udf
self.root : expr = self
self.c_code = self.is_udfexpr or type(parent) is projection
if self.is_udfexpr:
self.udf : udf = parent
self.builtin_vars = self.udf.builtin.keys()
else:
self.builtin_vars = []
if type(c_code) is bool:
self.c_code = c_code
self.udf_called = None
self.cols_mentioned : Optional[set[ColRef]] = None
ast_node.__init__(self, parent, node, None) ast_node.__init__(self, parent, node, None)
def init(self, _): def init(self, _):
from engine.projection import projection from engine.ast import _tmp_join_union, projection
parent = self.parent parent = self.parent
self.isvector = parent.isvector if type(parent) is expr else False
self.is_compound = parent.is_compound if type(parent) is expr else False self.is_compound = parent.is_compound if type(parent) is expr else False
if type(parent) in [projection, expr]: if type(parent) in [projection, expr, _tmp_join_union]:
self.datasource = parent.datasource self.datasource = parent.datasource
else: else:
self.datasource = self.context.datasource self.datasource = self.context.datasource
self.udf_map = parent.context.udf_map self.udf_map = parent.context.udf_map
self._expr = '' self.func_maps = {**builtin_func, **self.udf_map, **user_module_func}
self.cexpr = None self.operators = {**builtin_operators, **self.udf_map, **user_module_func}
self.func_maps = {**self.udf_map, **self.builtin_func_maps} self.ext_aggfuncs = ['sum', 'avg', 'count', 'min', 'max',
'last', 'first', 'prev', 'next', 'var',
'stddev']
def produce(self, node): def produce(self, node):
from common.utils import enlist
from engine.ast import udf, projection
if type(node) is dict: if type(node) is dict:
for key, val in node.items(): if 'literal' in node:
if key in self.func_maps: node = node['literal']
# TODO: distinguish between UDF agg functions and other UDF functions. else:
self.inside_agg = True if len(node) > 1:
self.context.headers.add('"./server/aggregations.h"') print(f'Parser Error: {node} has more than 1 dict entry.')
if type(val) is list and len(val) > 1:
cfunc = self.func_maps[key] is_joincond = False
cfunc = cfunc[len(val) - 1] if type(cfunc) is list else cfunc for key, val in node.items():
self._expr += f"{cfunc}(" key = key.lower()
for i, p in enumerate(val): if key not in self.valid_joincond[self.next_valid]:
self._expr += expr(self, p)._expr + (','if i<len(val)-1 else '') self.next_valid = 2
else: else:
funcname = self.func_maps[key] if key == self.valid_joincond[self.next_valid][2]:
funcname = funcname[0] if type(funcname) is list else funcname self.next_valid = not self.next_valid
self._expr += f"{funcname}(" elif key == self.valid_joincond[self.next_valid][1]:
self._expr += expr(self, val)._expr self.next_valid = 2
self._expr += ')' is_joincond = True
self.inside_agg = False if key in self.operators:
elif key in self.binary_ops: if key in builtin_func:
l = expr(self, val[0])._expr if self.is_agg_func:
r = expr(self, val[1])._expr self.root.is_special = True # Nested Aggregation
self._expr += f'({l}{self.binary_ops[key]}{r})' else:
elif key in self.compound_ops: self.is_agg_func = True
x = []
if type(val) is list: op = self.operators[key]
count_distinct = False
if key == 'count' and type(val) is dict and 'distinct' in val:
count_distinct = True
val = val['distinct']
val = enlist(val)
exp_vals = []
for v in val: for v in val:
x.append(expr(self, v)._expr) if (
self._expr = self.compound_ops[key][1](x) type(v) is str and
elif key in self.unary_ops: '*' in v and
self._expr += f'{self.unary_ops[key]}({expr(self, val)._expr})' key != 'count'
else: ):
self.context.Error(f'Undefined expr: {key}{val}') cols = self.datasource.get_cols(v)
if cols:
for c in cols:
exp_vals.append(expr(self, c.name, c_code=self.c_code))
else:
exp_vals.append(expr(self, v, c_code=self.c_code))
self.children = exp_vals
self.opname = key
str_vals = [e.sql for e in exp_vals]
type_vals = [e.type for e in exp_vals]
is_compound = max([e.is_compound for e in exp_vals])
if key in self.ext_aggfuncs:
self.is_compound = max(0, is_compound - 1)
else:
self.is_compound = is_compound
try:
self.type = op.return_type(*type_vals)
except AttributeError as e:
if type(self.root.parent) is not udf:
# TODO: do something when this is not an error
print(f'alert: {e}')
pass
self.type = AnyT
if count_distinct: # inject distinct col later
self.sql = f'{{{op(self.c_code, *str_vals, True)}}}'
else:
self.sql = op(self.c_code, *str_vals)
special_func = [*self.context.udf_map.keys(), *self.context.module_map.keys(),
"maxs", "mins", "avgs", "sums", "deltas", "last", "first",
"stddevs", "vars", "ratios", "pack", "truncate", "subvec"]
if (
self.context.special_gb
or
(
type(self.root.parent) is projection
and
self.root.parent.force_use_spgb
)
or
self.context.system_state.cfg.backend_type == Backend_Type.BACKEND_AQuery.value
):
special_func = [*special_func, *self.ext_aggfuncs]
if key in special_func and not self.is_special:
self.is_special = True
if key in self.context.udf_map:
self.root.udf_called = self.context.udf_map[key]
if self.is_udfexpr and key == self.root.udf.name:
self.root.is_recursive_call_inudf = True
elif key in user_module_func.keys():
udf.try_init_udf(self.context)
# TODO: make udf_called a set!
p = self.parent
while type(p) is expr and not p.udf_called:
p.udf_called = self.udf_called
p = p.parent
p = self.parent
while type(p) is expr and not p.is_special:
p.is_special = True
p = p.parent
if key in self.coumpound_generating_ops and not self.is_compound: need_decltypestr = any([e.need_decltypestr for e in exp_vals])
self.is_compound = True if need_decltypestr or (self.udf_called and type(op) is udf):
p = self.parent decltypestr_vals = [e.udf_decltypecall for e in exp_vals]
while type(p) is expr and not p.is_compound: self.udf_decltypecall = op(self.c_code, *decltypestr_vals)
p.is_compound = True
p = p.parent
elif type(node) is str: if self.udf_called and type(op) is udf:
p = self.parent self.udf_decltypecall = op.decltypecall(self.c_code, *decltypestr_vals)
while type(p) is expr and not p.isvector:
p.isvector = True elif self.is_udfexpr:
p = p.parent var_table = self.root.udf.var_table
vec = key.split('.')
_vars = [*var_table, *self.builtin_vars]
def get_vname (node):
if node in self.builtin_vars:
self.root.udf.builtin[node].enabled = True
self.builtin_var = node
return node
else:
return var_table[node]
if vec[0] not in _vars:
# print(f'Use of undefined variable {vec[0]}')
# TODO: do something when this is not an error
pass
else:
vname = get_vname(vec[0])
val = enlist(val)
if(len(val) > 2):
print('Warning: more than 2 indexes found for subvec operator.')
ex = [expr(self, v, c_code = self.c_code) for v in val]
idxs = ', '.join([e.sql for e in ex])
self.sql = f'{vname}.subvec({idxs})'
if any([e.need_decltypestr for e in ex]):
self.udf_decltypecall = f'{vname}.subvec({[", ".join([e.udf_decltypecall for e in ex])]})'
if key == 'get' and len(val) > 1:
ex_vname = expr(self, val[0], c_code=self.c_code)
self.sql = f'{ex_vname.sql}[{expr(self, val[1], c_code=self.c_code).sql}]'
if hasattr(ex_vname, 'builtin_var'):
if not hasattr(self, 'builtin_var'):
self.builtin_var = []
self.builtin_var = [*self.builtin_var, *ex_vname.builtin_var]
self.udf_decltypecall = ex_vname.sql
else:
print(f'Undefined expr: {key}{val}')
if (is_joincond and len(self.children) == 2
and all([c.is_ColExpr for c in self.children])) :
self.root.join_conditions.append(
(self.children[0].raw_col, self.children[1].raw_col)
)
if type(node) is str:
if self.is_udfexpr:
curr_udf : udf = self.root.udf
var_table = curr_udf.var_table
split = node.split('.')
if split[0] in var_table:
varname = var_table[split[0]]
if curr_udf.agg and varname in curr_udf.vecs:
if len(split) > 1:
if split[1] == 'vec':
self.sql += varname
elif split[1] == 'len':
self.sql += f'{varname}.size'
else:
print(f'no member {split[1]} in object {varname}')
else:
self.sql += f'{varname}[{curr_udf.idx_var}]'
else:
self.sql += varname
elif self.supress_undefined or split[0] in self.builtin_vars:
self.sql += node
if split[0] in self.builtin_vars:
curr_udf.builtin[split[0]].enabled = True
self.builtin_var = split[0]
else:
print(f'Undefined varname: {split[0]}')
self._expr, self.raw_col = self.datasource.parse_col_names(node, self.materialize_cols, True)
self.raw_col = self.raw_col if type(self.raw_col) is ColRef else None # get the column from the datasource in SQL context
if self.__abs and self.raw_col: else:
self._expr = self.raw_col.reference() + ("" if self.inside_agg else index_expr) if self.datasource is not None:
if (node == '*' and
not (type(self.parent) is expr
and 'count' in self.parent.node)):
self.datasource.all_cols(ordered = True)
else:
self.raw_col = self.datasource.parse_col_names(node)
self.raw_col = self.raw_col if type(self.raw_col) is ColRef else None
if self.raw_col is not None:
self.is_ColExpr = True
table_name = ''
if '.' in node:
table_name = self.raw_col.table.table_name
if self.raw_col.table.alias:
alias = iter(self.raw_col.table.alias)
try:
a = next(alias)
while(not a or a == table_name):
a = next(alias)
if (a and a != table_name):
table_name = a
except StopIteration:
pass
if table_name:
table_name = table_name + '.'
self.sql = table_name + self.raw_col.name
self.type = self.raw_col.type
self.is_compound = True
self.is_compound += self.raw_col.compound
self.opname = self.raw_col
else:
self.sql = '\'' + node + '\'' if node != '*' else '*'
self.type = StrT
self.opname = self.sql
if self.c_code and self.datasource is not None:
if (type(self.parent) is expr and
'distinct' in self.parent.node and
not self.is_special):
# this node is executed by monetdb
# gb condition, not special
self.sql = f'distinct({self.sql})'
self.sql = f'{{y(\"{self.sql}\")}}'
elif type(node) is bool: elif type(node) is bool:
self._expr = '1' if node else '0' self.type = BoolT
self.opname = node
if self.c_code:
self.sql = '1' if node else '0'
else:
self.sql = 'TRUE' if node else 'FALSE'
elif type(node) is not dict:
self.sql = f'{node}'
self.opname = node
if type(node) is int:
if (node >= 2**63 - 1 or node <= -2**63):
self.type = HgeT
elif (node >= 2**31 - 1 or node <= -2**31):
self.type = LongT
elif node >= 2**15 - 1 or node <= -2**15:
self.type = IntT
elif node >= 2**7 - 1 or node <= -2**7:
self.type = ShortT
else:
self.type = ByteT
elif type(node) is float:
self.type = DoubleT
self.sql = f'{{"CAST({node} AS DOUBLE)" if not c_code else "{node}f"}}'
def finalize(self, override = False):
from engine.ast import udf
if self.codebuf is None or override:
self.codebuf = ''
for c in self.codlets:
if type(c) is str:
self.codebuf += c
elif type(c) is udf:
self.codebuf += c()
elif type(c) is expr:
self.codebuf += c.finalize(override=override)
return self.codebuf
def codegen(self, delegate):
self.curr_code = ''
for c in self.children:
self.curr_code += c.codegen(delegate)
return self.curr_code
def remake_binary(self, ret_expr):
if self.root:
self.oldsql = self.sql
if (self.opname in builtin_binary_ops):
patched_opname = 'aqop_' + self.opname
self.sql = (f'{patched_opname}({self.children[0].sql}, '
f'{self.children[1].sql}, {ret_expr})')
return True
elif self.opname in builtin_vecfunc:
self.sql = self.sql[:self.sql.rindex(')')]
self.sql += ', ' + ret_expr + ')'
return True
return False
def __str__(self):
return self.sql
def __repr__(self):
return self.__str__()
# builtins is readonly, so it's okay to set default value as an object
# eval is only called at root expr.
def eval(self, c_code = None, y = lambda t: t,
materialize_builtin = False, _decltypestr = False,
count = lambda : 'count', var_inject = None,
*,
gettype = False):
assert(self.is_root)
def call(decltypestr = False) -> str:
nonlocal c_code, y, materialize_builtin, count, var_inject
if var_inject:
for k, v in var_inject.items():
locals()[k] = v
if self.udf_called is not None:
loc = locals()
builtin_vars = self.udf_called.builtin_used
for b in self.udf_called.builtin_var.all:
exec(f'loc["{b}"] = lambda: "{{{b}()}}"')
if builtin_vars:
if type(materialize_builtin) is dict:
for b in builtin_vars:
exec(f'loc["{b}"] = lambda: "{materialize_builtin[b]}"')
elif self.is_recursive_call_inudf:
for b in builtin_vars:
exec(f'loc["{b}"] = lambda : "{b}"')
x = self.c_code if c_code is None else c_code
from common.utils import escape_qoutes
if decltypestr:
return eval('f\'' + escape_qoutes(self.udf_decltypecall) + '\'')
self.sql.replace("'", "\\'")
return eval('f\'' + escape_qoutes(self.sql) + '\'')
if self.is_recursive_call_inudf or (self.need_decltypestr and self.is_udfexpr) or gettype:
return call
else: else:
self._expr = f'{node}' return call(_decltypestr)
def toCExpr(_expr):
return lambda x = None, y = None : eval(start_expr + _expr + end_expr) @property
def is_root(self):
return self.root == self
# For UDFs: first check if agg variable is used as vector
# if not, then check if its length is used
class fastscan(expr):
name = 'fastscan'
def init(self, _):
self.vec_vars = set()
self.requested_lens = set()
super().init(self, _)
def process(self, key : str):
segs = key.split('.')
var_table = self.root.udf.var_table
if segs[0] in var_table and len(segs) > 1:
if segs[1] == 'vec':
self.vec_vars.add(segs[0])
elif segs[1] == 'len':
self.requested_lens.add(segs[0])
def produce(self, node):
from common.utils import enlist
if type(node) is dict:
for key, val in node.items():
if key in self.operators:
val = enlist(val)
elif self.is_udfexpr:
self.process(key)
[fastscan(self, v, c_code = self.c_code) for v in val]
elif type(node) is str:
self.process(node)
class getrefs(expr):
name = 'getrefs'
def init(self, _):
self.datasource.rec = set()
self.rec = None
def produce(self, node):
from common.utils import enlist
if type(node) is dict:
for key, val in node.items():
if key in self.operators:
val = enlist(val)
[getrefs(self, v, c_code = self.c_code) for v in val]
elif type(node) is str:
self.datasource.parse_col_names(node)
def consume(self, _): def consume(self, _):
self.cexpr = expr.toCExpr(self._expr) if self.root == self:
def __str__(self): self.rec = self.datasource.rec
return self.cexpr self.datasource.rec = None

@ -1,6 +0,0 @@
from engine.ast import ast_node
class join(ast_node):
name='join'

@ -1,9 +1,9 @@
import abc import abc
from reconstruct.ast import ast_node from engine.ast import ast_node
from typing import Optional from typing import Optional
from reconstruct.storage import Context, ColRef from engine.storage import Context, ColRef
from engine.utils import enlist from common.utils import enlist
from engine.types import builtin_func, user_module_func, builtin_operators from common.types import builtin_func, user_module_func, builtin_operators
class expr_base(ast_node, metaclass = abc.ABCMeta): class expr_base(ast_node, metaclass = abc.ABCMeta):
@ -47,7 +47,7 @@ class expr_base(ast_node, metaclass = abc.ABCMeta):
pass pass
def produce(self, node): def produce(self, node):
from reconstruct.ast import udf from engine.ast import udf
if node and type(node) is dict: if node and type(node) is dict:
if 'litral' in node: if 'litral' in node:
self.get_literal(node['literal']) self.get_literal(node['literal'])

@ -1,7 +1,7 @@
from typing import Dict, List, Set from typing import Dict, List, Optional, Set
from engine.types import * from common.types import *
from engine.utils import CaseInsensitiveDict, base62uuid, enlist from common.utils import CaseInsensitiveDict, base62uuid, enlist
class ColRef: class ColRef:
@ -64,13 +64,16 @@ class ColRef:
class TableInfo: class TableInfo:
def __init__(self, table_name, cols, cxt:'Context'): def __init__(self, table_name, cols, cxt:'Context'):
from engine.ast import create_trigger
# statics # statics
self.table_name : str = table_name self.table_name : str = table_name
self.contextname_cpp : str = '' self.contextname_cpp : str = ''
self.alias : Set[str] = set([table_name]) self.alias : Set[str] = set([table_name])
self.columns_byname : CaseInsensitiveDict[str, ColRef] = CaseInsensitiveDict() # column_name, type self.columns_byname : CaseInsensitiveDict[str, ColRef] = CaseInsensitiveDict() # column_name, type
self.columns : List[ColRef] = [] self.columns : List[ColRef] = []
self.triggers : Set[create_trigger] = set()
self.cxt = cxt self.cxt = cxt
self.cached = False
# keep track of temp vars # keep track of temp vars
self.rec = None self.rec = None
self.add_cols(cols) self.add_cols(cols)
@ -83,7 +86,7 @@ class TableInfo:
def add_cols(self, cols, new = True): def add_cols(self, cols, new = True):
for c in enlist(cols): for c in enlist(cols):
self.add_col(c, new) self.add_col(c, new)
def add_col(self, c, new = True): def add_col(self, c, new = True):
_ty = c['type'] _ty = c['type']
_ty_args = None _ty_args = None
@ -91,7 +94,7 @@ class TableInfo:
_ty_val = list(_ty.keys())[0] _ty_val = list(_ty.keys())[0]
_ty_args = _ty[_ty_val] _ty_args = _ty[_ty_val]
_ty = _ty_val _ty = _ty_val
if new: if new or type(c) is not ColRef:
col_object = ColRef(_ty, c, self, c['name'], len(self.columns), _ty_args = _ty_args) col_object = ColRef(_ty, c, self, c['name'], len(self.columns), _ty_args = _ty_args)
else: else:
col_object = c col_object = c
@ -144,27 +147,31 @@ class TableInfo:
class Context: class Context:
def new(self): def new(self):
self.headers = set(['\"./server/libaquery.h\"', self.headers = set(['\"./server/monetdb_conn.h\"'])
'\"./server/monetdb_conn.h\"'])
self.ccode = '' self.ccode = ''
self.sql = '' self.sql = ''
self.finalized = False self.finalized = False
self.udf = None self.udf = None
self.module_stubs = ''
self.scans = [] self.scans = []
self.procs = [] self.procs = []
self.queries = [] self.queries = []
self.module_init_loc = 0 self.module_init_loc = 0
self.special_gb = False self.special_gb = False
self.has_dll = False self.has_dll = False
self.triggers_active.clear()
def __init__(self): self.has_payload = True
self.tables_byname = dict()
def __init__(self, state = None):
from prompt import PromptState
from .ast import create_trigger
from aquery_config import compile_use_gc
self.tables_byname : Dict[str, TableInfo] = dict()
self.col_byname = dict() self.col_byname = dict()
self.tables : Set[TableInfo] = set() self.tables : Set[TableInfo] = set()
self.cols = [] self.cols = []
self.datasource = None self.datasource = None
self.module_stubs = ''
self.module_map = {} self.module_map = {}
self.udf_map = dict() self.udf_map = dict()
self.udf_agg_map = dict() self.udf_agg_map = dict()
@ -175,7 +182,16 @@ class Context:
self.have_hge = False self.have_hge = False
self.Error = lambda *args: print(*args) self.Error = lambda *args: print(*args)
self.Info = lambda *_: None self.Info = lambda *_: None
self.triggers : Dict[str, create_trigger] = dict()
self.triggers_active = set()
self.stored_proceudres = dict()
self.force_compiled = False
self.use_gc = compile_use_gc
self.system_state: Optional[PromptState] = state
self.use_cached_tables = True
self.use_omp_simd = True
# self.new() called everytime new query batch is started
def get_scan_var(self): def get_scan_var(self):
it_var = 'i' + base62uuid(2) it_var = 'i' + base62uuid(2)
scan_vars = set(s.it_var for s in self.scans) scan_vars = set(s.it_var for s in self.scans)
@ -199,7 +215,10 @@ class Context:
function_head = ('(Context* cxt) {\n' + function_head = ('(Context* cxt) {\n' +
'\tusing namespace std;\n' + '\tusing namespace std;\n' +
'\tusing namespace types;\n' + '\tusing namespace types;\n' +
'\tauto server = static_cast<Server*>(cxt->alt_server);\n') '\tauto server = static_cast<DataSource*>(cxt->curr_server);\n'
'\tauto timer = chrono::high_resolution_clock::now();\n'
)
udf_head = ('#pragma once\n' udf_head = ('#pragma once\n'
'#include \"./server/libaquery.h\"\n' '#include \"./server/libaquery.h\"\n'
@ -251,19 +270,38 @@ class Context:
self.finalize_query() self.finalize_query()
def direct_output(self, limit = -1, sep = ' ', end = '\n'): def direct_output(self, limit = -1, sep = ' ', end = '\n'):
from common.utils import encode_integral
if type(limit) is not int or limit > 2**32 - 1 or limit < 0: if type(limit) is not int or limit > 2**32 - 1 or limit < 0:
limit = 2**32 - 1 limit = 2**32 - 1
limit = limit.to_bytes(4, 'little').decode('latin-1') limit = encode_integral(limit)
self.queries.append( self.queries.append(
'O' + limit + sep + end) 'O' + limit + sep + end)
def remove_trigger(self, name : str):
from engine.ast import create_trigger
val = self.triggers.pop(name, None)
if val.type == create_trigger.Type.Callback:
val.table.triggers.remove(val)
val.remove()
def post_exec_triggers(self):
for t in self.triggers_active:
t.execute()
self.triggers_active.clear()
def abandon_postproc(self): def abandon_postproc(self):
self.ccode = '' self.ccode = ''
self.finalize_query() self.finalize_query()
def finalize_udf(self): def finalize_udf(self):
if self.udf is not None: if self.udf:
return (Context.udf_head self.udf += '\n'.join([
u.ccode for u in self.udf_map.values()
])
self.module_stubs = '\n'.join(
[m for m in self.module_map.values()
])
return (Context.udf_head
+ self.module_stubs + self.module_stubs
+ self.get_init_func() + self.get_init_func()
+ self.udf + self.udf
@ -277,7 +315,8 @@ class Context:
headers = '' headers = ''
# if build_driver == 'MSBuild': # if build_driver == 'MSBuild':
# headers ='#include \"./server/pch.hpp\"\n' # headers ='#include \"./server/pch.hpp\"\n'
with open('header.cxx', 'r') as header:
headers += header.read()
for h in self.headers: for h in self.headers:
if h[0] != '"': if h[0] != '"':
headers += '#include <' + h + '>\n' headers += '#include <' + h + '>\n'
@ -287,6 +326,13 @@ class Context:
headers += '#undef max\n' headers += '#undef max\n'
headers += '#undef min\n' headers += '#undef min\n'
self.ccode = headers + '\n'.join(self.procs) self.ccode += headers + '\n'.join(self.procs)
self.headers = set() self.headers = set()
return self.ccode return self.ccode
@property
def omp_simd(self):
if self.use_omp_simd:
return '#pragma omp simd\n'
else:
return ''

@ -0,0 +1,13 @@
#include "./server/libaquery.h"
#ifndef __AQ_USE_THREADEDGC__
#include "./server/gc.h"
__AQEXPORT__(void) __AQ_Init_GC__(Context* cxt) {
GC::gc_handle = static_cast<GC*>(cxt->gc);
GC::scratch_space = nullptr;
}
#else // __AQ_USE_THREADEDGC__
#define __AQ_Init_GC__(x)
#endif // __AQ_USE_THREADEDGC__

@ -0,0 +1,2 @@
make snippet_uselib
cp ./dll.so procedures/q70.so

@ -0,0 +1,72 @@
#include "./server/libaquery.h"
#ifndef __AQ_USE_THREADEDGC__
#include "./server/gc.h"
__AQEXPORT__(void) __AQ_Init_GC__(Context* cxt) {
GC::gc_handle = static_cast<GC*>(cxt->gc);
}
#else // __AQ_USE_THREADEDGC__
#define __AQ_Init_GC__(x)
#endif // __AQ_USE_THREADEDGC__
#include "./server/hasher.h"
#include "./server/monetdb_conn.h"
#include "./server/aggregations.h"
__AQEXPORT__(int) dll_2Cxoox(Context* cxt) {
using namespace std;
using namespace types;
auto server = static_cast<DataSource*>(cxt->alt_server);
auto len_4ycjiV = server->cnt;
auto mont_8AE = ColRef<const char*>(len_4ycjiV, server->getCol(0));
auto sales_2RB = ColRef<int>(len_4ycjiV, server->getCol(1));
const char* names_6pIt[] = {"mont", "minw2ysales"};
auto out_2LuaMH = new TableInfo<const char*,vector_type<double>>("out_2LuaMH", names_6pIt);
decltype(auto) col_EeW23s = out_2LuaMH->get_col<0>();
decltype(auto) col_5gY1Dm = out_2LuaMH->get_col<1>();
typedef record<decays<decltype(mont_8AE)::value_t>> record_typegj3e8Xf;
ankerl::unordered_dense::map<record_typegj3e8Xf, uint32_t, transTypes<record_typegj3e8Xf, hasher>> gMzMTEvd;
gMzMTEvd.reserve(mont_8AE.size);
uint32_t* reversemap = new uint32_t[mont_8AE.size<<1],
*mapbase = reversemap + mont_8AE.size;
for (uint32_t i2E = 0; i2E < mont_8AE.size; ++i2E){
reversemap[i2E] = gMzMTEvd.hashtable_push(forward_as_tuple(mont_8AE[i2E]));
}
auto arr_values = gMzMTEvd.values().data();
auto arr_len = gMzMTEvd.size();
uint32_t* seconds = new uint32_t[gMzMTEvd.size()];
auto vecs = static_cast<vector_type<uint32_t>*>(malloc(sizeof(vector_type<uint32_t>) * arr_len));
vecs[0].init_from(arr_values[0].second, mapbase);
for (uint32_t i = 1; i < arr_len; ++i) {
vecs[i].init_from(arr_values[i].second, mapbase + arr_values[i - 1].second);
arr_values[i].second += arr_values[i - 1].second;
}
for (uint32_t i = 0; i < mont_8AE.size; ++i) {
auto id = reversemap[i];
mapbase[--arr_values[id].second] = i;
}
col_EeW23s.reserve(gMzMTEvd.size());
col_5gY1Dm.reserve(gMzMTEvd.size());
auto buf_col_5gY1Dm = new double[mont_8AE.size];
for (uint32_t i = 0; i < arr_len; ++i) {
col_5gY1Dm[i].init_from(vecs[i].size, buf_col_5gY1Dm + arr_values[i].second);
}
for (uint32_t i = 0; i < arr_len; ++i) {
auto &key_3iNX3qG = arr_values[i].first;
auto &val_7jjv8Mo = arr_values[i].second;
col_EeW23s.emplace_back(get<0>(key_3iNX3qG));
avgw(10, sales_2RB[vecs[i]], col_5gY1Dm[i]);
}
//print(*out_2LuaMH);
//FILE* fp_5LQeym = fopen("flatten.csv", "wb");
out_2LuaMH->printall(",", "\n", nullptr, nullptr, 10);
//fclose(fp_5LQeym);
puts("done.");
return 0;
}

Binary file not shown.

@ -0,0 +1,28 @@
/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* Copyright 1997 - July 2008 CWI, August 2008 - 2022 MonetDB B.V.
*/
#ifndef EXCEPTION_BUFFER_H
#define EXCEPTION_BUFFER_H
#include "monetdb_config.h"
#include <setjmp.h>
typedef struct exception_buffer {
jmp_buf state;
int code;
char *msg;
int enabled;
} exception_buffer;
extern exception_buffer *eb_init( exception_buffer *eb );
/* != 0 on when we return to the savepoint */
#define eb_savepoint(eb) ((eb)->enabled=1,setjmp((eb)->state))
extern _Noreturn void eb_error( exception_buffer *eb, char *msg, int val );
#endif /* EXCEPTION_BUFFER_H */

File diff suppressed because it is too large Load Diff

@ -0,0 +1,463 @@
/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* Copyright 1997 - July 2008 CWI, August 2008 - 2022 MonetDB B.V.
*/
#ifndef _GDK_ATOMS_H_
#define _GDK_ATOMS_H_
/* atomFromStr returns the number of bytes of the input string that
* were processed. atomToStr returns the length of the string
* produced. Both functions return -1 on (any kind of) failure. If
* *dst is not NULL, *len specifies the available space. If there is
* not enough space, or if *dst is NULL, *dst will be freed (if not
* NULL) and a new buffer will be allocated and returned in *dst.
* *len will be set to reflect the actual size allocated. If
* allocation fails, *dst will be NULL on return and *len is
* undefined. In any case, if the function returns, *buf is either
* NULL or a valid pointer and then *len is the size of the area *buf
* points to.
*
* atomCmp returns a value less than zero/equal to zero/greater than
* zer if the first argument points to a values which is deemed
* smaller/equal to/larger than the value pointed to by the second
* argument.
*
* atomHash calculates a hash function for the value pointed to by the
* argument.
*/
#define IDLENGTH 64 /* maximum BAT id length */
typedef struct {
/* simple attributes */
char name[IDLENGTH];
uint8_t storage; /* stored as another type? */
bool linear; /* atom can be ordered linearly */
uint16_t size; /* fixed size of atom */
/* automatically generated fields */
const void *atomNull; /* global nil value */
/* generic (fixed + varsized atom) ADT functions */
ssize_t (*atomFromStr) (const char *src, size_t *len, void **dst, bool external);
ssize_t (*atomToStr) (char **dst, size_t *len, const void *src, bool external);
void *(*atomRead) (void *dst, size_t *dstlen, stream *s, size_t cnt);
gdk_return (*atomWrite) (const void *src, stream *s, size_t cnt);
int (*atomCmp) (const void *v1, const void *v2);
BUN (*atomHash) (const void *v);
/* optional functions */
gdk_return (*atomFix) (const void *atom);
gdk_return (*atomUnfix) (const void *atom);
/* varsized atom-only ADT functions */
var_t (*atomPut) (BAT *, var_t *off, const void *src);
void (*atomDel) (Heap *, var_t *atom);
size_t (*atomLen) (const void *atom);
gdk_return (*atomHeap) (Heap *, size_t);
} atomDesc;
#define MAXATOMS 128
gdk_export atomDesc BATatoms[MAXATOMS];
gdk_export int GDKatomcnt;
gdk_export int ATOMallocate(const char *nme);
gdk_export int ATOMindex(const char *nme);
gdk_export str ATOMname(int id);
gdk_export size_t ATOMlen(int id, const void *v);
gdk_export void *ATOMnil(int id)
__attribute__((__malloc__));
gdk_export int ATOMprint(int id, const void *val, stream *fd);
gdk_export char *ATOMformat(int id, const void *val);
gdk_export void *ATOMdup(int id, const void *val);
/*
* @- maximum atomic string lengths
*/
#define bitStrlen 8
#define bteStrlen 8
#define shtStrlen 12
#define intStrlen 24
#if SIZEOF_OID == SIZEOF_INT
#define oidStrlen 24
#else
#define oidStrlen 48
#endif
#if SIZEOF_PTR == SIZEOF_INT
#define ptrStrlen 24
#else
#define ptrStrlen 48
#endif
#define lngStrlen 48
#ifdef HAVE_HGE
#define hgeStrlen 96
#endif
#define fltStrlen 48
#define dblStrlen 96
/*
* The system comes with the traditional atomic types: int (4 bytes),
* bool(1 byte) and str (variable). In addition, we support the notion
* of an OID type, which ensures uniqueness of its members. This
* leads to the following type descriptor table.
*/
#ifdef HAVE_HGE
gdk_export ssize_t hgeFromStr(const char *src, size_t *len, hge **dst, bool external);
gdk_export ssize_t hgeToStr(str *dst, size_t *len, const hge *src, bool external);
#endif
gdk_export ssize_t lngFromStr(const char *src, size_t *len, lng **dst, bool external);
gdk_export ssize_t lngToStr(str *dst, size_t *len, const lng *src, bool external);
gdk_export ssize_t intFromStr(const char *src, size_t *len, int **dst, bool external);
gdk_export ssize_t intToStr(str *dst, size_t *len, const int *src, bool external);
gdk_export ssize_t batFromStr(const char *src, size_t *len, bat **dst, bool external);
gdk_export ssize_t batToStr(str *dst, size_t *len, const bat *src, bool external);
gdk_export ssize_t ptrFromStr(const char *src, size_t *len, ptr **dst, bool external);
gdk_export ssize_t ptrToStr(str *dst, size_t *len, const ptr *src, bool external);
gdk_export ssize_t bitFromStr(const char *src, size_t *len, bit **dst, bool external);
gdk_export ssize_t bitToStr(str *dst, size_t *len, const bit *src, bool external);
gdk_export ssize_t OIDfromStr(const char *src, size_t *len, oid **dst, bool external);
gdk_export ssize_t OIDtoStr(str *dst, size_t *len, const oid *src, bool external);
gdk_export ssize_t shtFromStr(const char *src, size_t *len, sht **dst, bool external);
gdk_export ssize_t shtToStr(str *dst, size_t *len, const sht *src, bool external);
gdk_export ssize_t bteFromStr(const char *src, size_t *len, bte **dst, bool external);
gdk_export ssize_t bteToStr(str *dst, size_t *len, const bte *src, bool external);
gdk_export ssize_t fltFromStr(const char *src, size_t *len, flt **dst, bool external);
gdk_export ssize_t fltToStr(str *dst, size_t *len, const flt *src, bool external);
gdk_export ssize_t dblFromStr(const char *src, size_t *len, dbl **dst, bool external);
gdk_export ssize_t dblToStr(str *dst, size_t *len, const dbl *src, bool external);
gdk_export ssize_t GDKstrFromStr(unsigned char *restrict dst, const unsigned char *restrict src, ssize_t len);
gdk_export ssize_t strFromStr(const char *restrict src, size_t *restrict len, str *restrict dst, bool external);
gdk_export size_t escapedStrlen(const char *restrict src, const char *sep1, const char *sep2, int quote);
gdk_export size_t escapedStr(char *restrict dst, const char *restrict src, size_t dstlen, const char *sep1, const char *sep2, int quote);
/*
* @- nil values
* All types have a single value designated as a NIL value. It
* designates a missing value and it is ignored (forbidden) in several
* primitives. The current policy is to use the smallest value in any
* ordered domain. The routine atomnil returns a pointer to the nil
* value representation.
*/
#define GDK_bit_max ((bit) 1)
#define GDK_bit_min ((bit) 0)
#define GDK_bte_max ((bte) INT8_MAX)
#define GDK_bte_min ((bte) INT8_MIN+1)
#define GDK_sht_max ((sht) INT16_MAX)
#define GDK_sht_min ((sht) INT16_MIN+1)
#define GDK_int_max ((int) INT32_MAX)
#define GDK_int_min ((int) INT32_MIN+1)
#define GDK_lng_max ((lng) INT64_MAX)
#define GDK_lng_min ((lng) INT64_MIN+1)
#ifdef HAVE_HGE
#define GDK_hge_max ((((hge) 1) << 126) - 1 + (((hge) 1) << 126))
#define GDK_hge_min (-GDK_hge_max)
#endif
#define GDK_flt_max ((flt) FLT_MAX)
#define GDK_flt_min ((flt) -FLT_MAX)
#define GDK_dbl_max ((dbl) DBL_MAX)
#define GDK_dbl_min ((dbl) -DBL_MAX)
#define GDK_oid_max (((oid) 1 << ((8 * SIZEOF_OID) - 1)) - 1)
#define GDK_oid_min ((oid) 0)
/* representation of the nil */
gdk_export const bte bte_nil;
gdk_export const sht sht_nil;
gdk_export const int int_nil;
#ifdef NAN_CANNOT_BE_USED_AS_INITIALIZER
/* Definition of NAN is seriously broken on Intel compiler (at least
* in some versions), so we work around it. */
union _flt_nil_t {
uint32_t l;
flt f;
};
gdk_export const union _flt_nil_t _flt_nil_;
#define flt_nil (_flt_nil_.f)
union _dbl_nil_t {
uint64_t l;
dbl d;
};
gdk_export const union _dbl_nil_t _dbl_nil_;
#define dbl_nil (_dbl_nil_.d)
#else
gdk_export const flt flt_nil;
gdk_export const dbl dbl_nil;
#endif
gdk_export const lng lng_nil;
#ifdef HAVE_HGE
gdk_export const hge hge_nil;
#endif
gdk_export const oid oid_nil;
gdk_export const char str_nil[2];
gdk_export const ptr ptr_nil;
gdk_export const uuid uuid_nil;
/* derived NIL values - OIDDEPEND */
#define bit_nil ((bit) bte_nil)
#define bat_nil ((bat) int_nil)
#define void_nil oid_nil
#define is_bit_nil(v) ((v) == GDK_bte_min-1)
#define is_bte_nil(v) ((v) == GDK_bte_min-1)
#define is_sht_nil(v) ((v) == GDK_sht_min-1)
#define is_int_nil(v) ((v) == GDK_int_min-1)
#define is_lng_nil(v) ((v) == GDK_lng_min-1)
#ifdef HAVE_HGE
#define is_hge_nil(v) ((v) == GDK_hge_min-1)
#endif
#define is_oid_nil(v) ((v) == ((oid) 1 << ((8 * SIZEOF_OID) - 1)))
#define is_flt_nil(v) isnan(v)
#define is_dbl_nil(v) isnan(v)
#define is_bat_nil(v) (((v) & 0x7FFFFFFF) == 0) /* v == bat_nil || v == 0 */
#include <math.h>
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && _MSC_VER < 1800
#include <float.h>
#define isnan(x) _isnan(x)
#define isinf(x) (_fpclass(x) & (_FPCLASS_NINF | _FPCLASS_PINF))
#define isfinite(x) _finite(x)
#endif
#ifdef HAVE_HGE
#define is_uuid_nil(x) ((x).h == 0)
#else
#ifdef HAVE_UUID
#define is_uuid_nil(x) uuid_is_null((x).u)
#else
#define is_uuid_nil(x) (memcmp((x).u, uuid_nil.u, UUID_SIZE) == 0)
#endif
#endif
/*
* @- Derived types
* In all algorithms across GDK, you will find switches on the types
* (bte, sht, int, flt, dbl, lng, hge, str). They respectively
* represent an octet, a 16-bit int, a 32-bit int, a 32-bit float, a
* 64-bit double, a 64-bit int, a 128-bit int, and a pointer-sized location
* of a char-buffer (ended by a zero char).
*
* In contrast, the types (bit, ptr, bat, oid) are derived types. They
* do not occur in the switches. The ATOMstorage macro maps them
* respectively onto a @code{ bte}, @code{ int} (pointers are 32-bit),
* @code{ int}, and @code{ int}. OIDs are 32-bit.
*
* This approach makes it tractable to switch to 64-bits OIDs, or to a
* fully 64-bits OS easily. One only has to map the @code{ oid} and
* @code{ ptr} types to @code{ lng} instead of @code{ int}.
*
* Derived types mimic their fathers in many ways. They inherit the
* @code{ size}, @code{ linear}, and @code{ null}
* properties of their father. The same goes for the
* ADT functions HASH, CMP, PUT, NULL, DEL, LEN, and HEAP. So, a
* derived type differs in only two ways from its father:
* @table @code
* @item [string representation]
* the only two ADT operations specific for a derived type are FROMSTR
* and TOSTR.
* @item [identity]
* (a @code{ bit} is really of a different type than @code{ bte}). The
* set of operations on derived type values or BATs of such types may
* differ from the sets of operations on the father type.
* @end table
*/
/* use "do ... while(0)" so that lhs can safely be used in if statements */
#define ATOMstorage(t) BATatoms[t].storage
#define ATOMsize(t) BATatoms[t].size
#define ATOMfromstr(t,s,l,src,ext) BATatoms[t].atomFromStr(src,l,s,ext)
#define ATOMnilptr(t) BATatoms[t].atomNull
#define ATOMcompare(t) BATatoms[t].atomCmp
#define ATOMcmp(t,l,r) ((*ATOMcompare(t))(l, r))
#define ATOMhash(t,src) BATatoms[t].atomHash(src)
#define ATOMdel(t,hp,src) do if (BATatoms[t].atomDel) BATatoms[t].atomDel(hp,src); while (0)
#define ATOMvarsized(t) (BATatoms[t].atomPut != NULL)
#define ATOMlinear(t) BATatoms[t].linear
#define ATOMtype(t) ((t) == TYPE_void ? TYPE_oid : (t))
#define ATOMfix(t,v) (BATatoms[t].atomFix ? BATatoms[t].atomFix(v) : GDK_SUCCEED)
#define ATOMunfix(t,v) (BATatoms[t].atomUnfix ? BATatoms[t].atomUnfix(v) : GDK_SUCCEED)
/* The base type is the storage type if the comparison function, the
* hash function, and the nil value are the same as those of the
* storage type; otherwise it is the type itself. */
#define ATOMbasetype(t) ((t) != ATOMstorage(t) && \
ATOMnilptr(t) == ATOMnilptr(ATOMstorage(t)) && \
ATOMcompare(t) == ATOMcompare(ATOMstorage(t)) && \
BATatoms[t].atomHash == BATatoms[ATOMstorage(t)].atomHash ? \
ATOMstorage(t) : (t))
/*
* In case that atoms are added to a bat, their logical reference
* count should be incremented (and decremented if deleted). Notice
* that BATs with atomic types that have logical references (e.g. BATs
* of BATs but also BATs of ODMG odSet) can never be persistent, as
* this would make the commit tremendously complicated.
*/
static inline gdk_return __attribute__((__warn_unused_result__))
ATOMputVAR(BAT *b, var_t *dst, const void *src)
{
assert(BATatoms[b->ttype].atomPut != NULL);
if ((*BATatoms[b->ttype].atomPut)(b, dst, src) == (var_t) -1)
return GDK_FAIL;
return GDK_SUCCEED;
}
static inline gdk_return __attribute__((__warn_unused_result__))
ATOMputFIX(int type, void *dst, const void *src)
{
gdk_return rc;
assert(BATatoms[type].atomPut == NULL);
rc = ATOMfix(type, src);
if (rc != GDK_SUCCEED)
return rc;
switch (ATOMsize(type)) {
case 0: /* void */
break;
case 1:
* (bte *) dst = * (bte *) src;
break;
case 2:
* (sht *) dst = * (sht *) src;
break;
case 4:
* (int *) dst = * (int *) src;
break;
case 8:
* (lng *) dst = * (lng *) src;
break;
case 16:
#ifdef HAVE_HGE
* (hge *) dst = * (hge *) src;
#else
* (uuid *) dst = * (uuid *) src;
#endif
break;
default:
memcpy(dst, src, ATOMsize(type));
break;
}
return GDK_SUCCEED;
}
static inline gdk_return __attribute__((__warn_unused_result__))
ATOMreplaceVAR(BAT *b, var_t *dst, const void *src)
{
var_t loc = *dst;
int type = b->ttype;
assert(BATatoms[type].atomPut != NULL);
if ((*BATatoms[type].atomPut)(b, &loc, src) == (var_t) -1)
return GDK_FAIL;
if (ATOMunfix(type, dst) != GDK_SUCCEED)
return GDK_FAIL;
ATOMdel(type, b->tvheap, dst);
*dst = loc;
return ATOMfix(type, src);
}
/* string heaps:
* - strings are 8 byte aligned
* - start with a 1024 bucket hash table
* - heaps < 64KiB are fully duplicate eliminated with this hash tables
* - heaps >= 64KiB are opportunistically (imperfect) duplicate
* eliminated as only the last 128KiB chunk is considered and there
* is no linked list
* - buckets and next pointers are unsigned short "indices"
* - indices should be multiplied by 8 and takes from ELIMBASE to get
* an offset
* Note that a 64KiB chunk of the heap contains at most 8K 8-byte
* aligned strings. The 1K bucket list means that in worst load, the
* list length is 8 (OK).
*/
#define GDK_STRHASHTABLE (1<<10) /* 1024 */
#define GDK_STRHASHMASK (GDK_STRHASHTABLE-1)
#define GDK_STRHASHSIZE (GDK_STRHASHTABLE * sizeof(stridx_t))
#define GDK_ELIMPOWER 16 /* 64KiB is the threshold */
#define GDK_ELIMDOUBLES(h) ((h)->free < GDK_ELIMLIMIT)
#define GDK_ELIMLIMIT (1<<GDK_ELIMPOWER) /* equivalently: ELIMBASE == 0 */
#define GDK_ELIMBASE(x) (((x) >> GDK_ELIMPOWER) << GDK_ELIMPOWER)
#define GDK_VAROFFSET ((var_t) GDK_STRHASHSIZE)
/*
* @- String Comparison, NILs and UTF-8
*
* Using the char* type for strings is handy as this is the type of
* any constant strings in a C/C++ program. Therefore, MonetDB uses
* this definition for str. However, different compilers and
* platforms use either signed or unsigned characters for the char
* type. It is required that string ordering in MonetDB is consistent
* over platforms though.
*
* As for the choice how strings should be ordered, our support for
* UTF-8 actually imposes that it should follow 'unsigned char'
* doctrine (like in the AIX native compiler). In this semantics,
* though we have to take corrective action to ensure that str(nil) is
* the smallest value of the domain.
*/
static inline bool __attribute__((__pure__))
strEQ(const char *l, const char *r)
{
return strcmp(l, r) == 0;
}
static inline bool __attribute__((__pure__))
strNil(const char *s)
{
return s == NULL || (s[0] == '\200' && s[1] == '\0');
}
static inline size_t __attribute__((__pure__))
strLen(const char *s)
{
return strNil(s) ? 2 : strlen(s) + 1;
}
static inline int __attribute__((__pure__))
strCmp(const char *l, const char *r)
{
return strNil(r)
? !strNil(l)
: strNil(l) ? -1 : strcmp(l, r);
}
static inline size_t
VarHeapVal(const void *b, BUN p, int w)
{
switch (w) {
case 1:
return (size_t) ((const uint8_t *) b)[p] + GDK_VAROFFSET;
case 2:
return (size_t) ((const uint16_t *) b)[p] + GDK_VAROFFSET;
#if SIZEOF_VAR_T == 8
case 4:
return (size_t) ((const uint32_t *) b)[p];
#endif
default:
return (size_t) ((const var_t *) b)[p];
}
}
static inline BUN __attribute__((__pure__))
strHash(const char *key)
{
BUN y = 0;
for (BUN i = 0; key[i]; i++) {
y += key[i];
y += (y << 10);
y ^= (y >> 6);
}
y += (y << 3);
y ^= (y >> 11);
y += (y << 15);
return y;
}
#endif /* _GDK_ATOMS_H_ */

@ -0,0 +1,97 @@
/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* Copyright 1997 - July 2008 CWI, August 2008 - 2022 MonetDB B.V.
*/
#ifndef _GDK_BBP_H_
#define _GDK_BBP_H_
#define BBPLOADED 1 /* set if bat in memory */
#define BBPSWAPPED 2 /* set if dirty bat is not in memory */
#define BBPTMP 4 /* set if non-persistent bat has image on disk */
/* These 4 symbols indicate what the persistence state is of a bat.
* - If the bat was persistent at the last commit (or at startup
* before the first commit), BBPEXISTING or BBPDELETED is set.
* - If the bat is to be persistent after the next commit, BBPEXISTING
* or BBPNEW is set (i.e. (status&BBPPERSISTENT) != 0).
* - If the bat was transient at the last commit (or didn't exist),
* BBPNEW is set, or none of these flag values is set.
* - If the bat is to be transient at the next commit, BBPDELETED is
* set, or none of these flag values is set.
* BATmode() switches between BBPDELETED and BBPEXISTING (bat was
* persistent at last commit), or between BBPNEW and 0 (bat was
* transient or didn't exist at last commit).
* Committing a bat switches from BBPNEW to BBPEXISTING, or turns off
* BBPDELETED.
* In any case, only at most one of BBPDELETED, BBPEXISTING, and
* BBPNEW may be set at any one time.
*
* In short,
* BBPEXISTING -- bat was and should remain persistent;
* BBPDELETED -- bat was persistent at last commit and should be transient;
* BBPNEW -- bat was transient at last commit and should be persistent;
* none of the above -- bat was and should remain transient.
*/
#define BBPDELETED 16 /* set if bat persistent at last commit is now transient */
#define BBPEXISTING 32 /* set if bat was already persistent at end of last commit */
#define BBPNEW 64 /* set if bat has become persistent since last commit */
#define BBPPERSISTENT (BBPEXISTING|BBPNEW) /* mask for currently persistent bats */
#define BBPSTATUS 127
#define BBPUNLOADING 128 /* set while we are unloading */
#define BBPLOADING 256 /* set while we are loading */
#define BBPSAVING 512 /* set while we are saving */
#define BBPRENAMED 1024 /* set when bat is renamed in this transaction */
#define BBPDELETING 2048 /* set while we are deleting (special case in module unload) */
#define BBPHOT 4096 /* bat is "hot", i.e. is still in active use */
#define BBPSYNCING 8192 /* bat between creating backup and saving */
#define BBPUNSTABLE (BBPUNLOADING|BBPDELETING) /* set while we are unloading */
#define BBPWAITING (BBPUNLOADING|BBPLOADING|BBPSAVING|BBPDELETING|BBPSYNCING)
#define BBPTRIM_ALL (((size_t)1) << (sizeof(size_t)*8 - 2)) /* very large positive size_t */
gdk_export bat getBBPsize(void); /* current occupied size of BBP array */
gdk_export lng getBBPlogno(void); /* two lng of extra info in BBP.dir */
gdk_export lng getBBPtransid(void);
/* global calls */
gdk_export gdk_return BBPaddfarm(const char *dirname, uint32_t rolemask, bool logerror);
/* update interface */
gdk_export int BBPreclaim(BAT *b);
gdk_export gdk_return BBPsave(BAT *b);
gdk_export int BBPrename(bat bid, const char *nme);
/* query interface */
gdk_export bat BBPindex(const char *nme);
gdk_export BAT *BBPdescriptor(bat b);
/* swapping interface */
gdk_export gdk_return BBPsync(int cnt, bat *restrict subcommit, BUN *restrict sizes, lng logno, lng transid);
gdk_export int BBPfix(bat b);
gdk_export int BBPunfix(bat b);
gdk_export int BBPretain(bat b);
gdk_export int BBPrelease(bat b);
gdk_export void BBPkeepref(bat i);
gdk_export void BBPshare(bat b);
gdk_export void BBPcold(bat i);
#define BBP_status_set(bid, mode) \
ATOMIC_SET(&BBP_record(bid).status, mode)
#define BBP_status_on(bid, flags) \
ATOMIC_OR(&BBP_record(bid).status, flags)
#define BBP_status_off(bid, flags) \
ATOMIC_AND(&BBP_record(bid).status, ~(flags))
#define BBPswappable(b) ((b) && (b)->batCacheid && BBP_refs((b)->batCacheid) == 0)
#define BBPtrimmable(b) (BBPswappable(b) && isVIEW(b) == 0 && (BBP_status((b)->batCacheid)&BBPWAITING) == 0)
#endif /* _GDK_BBP_H_ */

@ -0,0 +1,168 @@
/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* Copyright 1997 - July 2008 CWI, August 2008 - 2022 MonetDB B.V.
*/
/* included from gdk.h */
gdk_export BAT *BATcalcnegate(BAT *b, BAT *s);
gdk_export BAT *BATcalcabsolute(BAT *b, BAT *s);
gdk_export BAT *BATcalcincr(BAT *b, BAT *s, bool abort_on_error);
gdk_export BAT *BATcalcdecr(BAT *b, BAT *s, bool abort_on_error);
gdk_export BAT *BATcalciszero(BAT *b, BAT *s);
gdk_export BAT *BATcalcsign(BAT *b, BAT *s);
gdk_export BAT *BATcalcisnil(BAT *b, BAT *s);
gdk_export BAT *BATcalcisnotnil(BAT *b, BAT *s);
gdk_export BAT *BATcalcnot(BAT *b, BAT *s);
gdk_export BAT *BATcalcmin(BAT *b1, BAT *b2, BAT *s1, BAT *s2);
gdk_export BAT *BATcalcmin_no_nil(BAT *b1, BAT *b2, BAT *s1, BAT *s2);
gdk_export BAT *BATcalcmincst(BAT *b, const ValRecord *v, BAT *s);
gdk_export BAT *BATcalcmincst_no_nil(BAT *b, const ValRecord *v, BAT *s);
gdk_export BAT *BATcalccstmin(const ValRecord *v, BAT *b, BAT *s);
gdk_export BAT *BATcalccstmin_no_nil(const ValRecord *v, BAT *b, BAT *s);
gdk_export BAT *BATcalcmax(BAT *b1, BAT *b2, BAT *s1, BAT *s2);
gdk_export BAT *BATcalcmax_no_nil(BAT *b1, BAT *b2, BAT *s1, BAT *s2);
gdk_export BAT *BATcalcmaxcst(BAT *b, const ValRecord *v, BAT *s);
gdk_export BAT *BATcalcmaxcst_no_nil(BAT *b, const ValRecord *v, BAT *s);
gdk_export BAT *BATcalccstmax(const ValRecord *v, BAT *b, BAT *s);
gdk_export BAT *BATcalccstmax_no_nil(const ValRecord *v, BAT *b, BAT *s);
gdk_export BAT *BATcalcadd(BAT *b1, BAT *b2, BAT *s1, BAT *s2, int tp, bool abort_on_error);
gdk_export BAT *BATcalcaddcst(BAT *b, const ValRecord *v, BAT *s, int tp, bool abort_on_error);
gdk_export BAT *BATcalccstadd(const ValRecord *v, BAT *b, BAT *s, int tp, bool abort_on_error);
gdk_export BAT *BATcalcsub(BAT *b1, BAT *b2, BAT *s1, BAT *s2, int tp, bool abort_on_error);
gdk_export BAT *BATcalcsubcst(BAT *b, const ValRecord *v, BAT *s, int tp, bool abort_on_error);
gdk_export BAT *BATcalccstsub(const ValRecord *v, BAT *b, BAT *s, int tp, bool abort_on_error);
gdk_export BAT *BATcalcmul(BAT *b1, BAT *b2, BAT *s1, BAT *s2, int tp, bool abort_on_error);
gdk_export BAT *BATcalcmulcst(BAT *b, const ValRecord *v, BAT *s, int tp, bool abort_on_error);
gdk_export BAT *BATcalccstmul(const ValRecord *v, BAT *b, BAT *s, int tp, bool abort_on_error);
gdk_export BAT *BATcalcdiv(BAT *b1, BAT *b2, BAT *s1, BAT *s2, int tp, bool abort_on_error);
gdk_export BAT *BATcalcdivcst(BAT *b, const ValRecord *v, BAT *s, int tp, bool abort_on_error);
gdk_export BAT *BATcalccstdiv(const ValRecord *v, BAT *b, BAT *s, int tp, bool abort_on_error);
gdk_export BAT *BATcalcmod(BAT *b1, BAT *b2, BAT *s1, BAT *s2, int tp, bool abort_on_error);
gdk_export BAT *BATcalcmodcst(BAT *b, const ValRecord *v, BAT *s, int tp, bool abort_on_error);
gdk_export BAT *BATcalccstmod(const ValRecord *v, BAT *b, BAT *s, int tp, bool abort_on_error);
gdk_export BAT *BATcalcxor(BAT *b1, BAT *b2, BAT *s1, BAT *s2);
gdk_export BAT *BATcalcxorcst(BAT *b, const ValRecord *v, BAT *s);
gdk_export BAT *BATcalccstxor(const ValRecord *v, BAT *b, BAT *s);
gdk_export BAT *BATcalcor(BAT *b1, BAT *b2, BAT *s1, BAT *s2);
gdk_export BAT *BATcalcorcst(BAT *b, const ValRecord *v, BAT *s);
gdk_export BAT *BATcalccstor(const ValRecord *v, BAT *b, BAT *s);
gdk_export BAT *BATcalcand(BAT *b1, BAT *b2, BAT *s1, BAT *s2);
gdk_export BAT *BATcalcandcst(BAT *b, const ValRecord *v, BAT *s);
gdk_export BAT *BATcalccstand(const ValRecord *v, BAT *b, BAT *s);
gdk_export BAT *BATcalclsh(BAT *b1, BAT *b2, BAT *s1, BAT *s2, bool abort_on_error);
gdk_export BAT *BATcalclshcst(BAT *b, const ValRecord *v, BAT *s, bool abort_on_error);
gdk_export BAT *BATcalccstlsh(const ValRecord *v, BAT *b, BAT *s, bool abort_on_error);
gdk_export BAT *BATcalcrsh(BAT *b1, BAT *b2, BAT *s1, BAT *s2, bool abort_on_error);
gdk_export BAT *BATcalcrshcst(BAT *b, const ValRecord *v, BAT *s, bool abort_on_error);
gdk_export BAT *BATcalccstrsh(const ValRecord *v, BAT *b, BAT *s, bool abort_on_error);
gdk_export BAT *BATcalclt(BAT *b1, BAT *b2, BAT *s1, BAT *s2);
gdk_export BAT *BATcalcltcst(BAT *b, const ValRecord *v, BAT *s);
gdk_export BAT *BATcalccstlt(const ValRecord *v, BAT *b, BAT *s);
gdk_export BAT *BATcalcle(BAT *b1, BAT *b2, BAT *s1, BAT *s2);
gdk_export BAT *BATcalclecst(BAT *b, const ValRecord *v, BAT *s);
gdk_export BAT *BATcalccstle(const ValRecord *v, BAT *b, BAT *s);
gdk_export BAT *BATcalcgt(BAT *b1, BAT *b2, BAT *s1, BAT *s2);
gdk_export BAT *BATcalcgtcst(BAT *b, const ValRecord *v, BAT *s);
gdk_export BAT *BATcalccstgt(const ValRecord *v, BAT *b, BAT *s);
gdk_export BAT *BATcalcge(BAT *b1, BAT *b2, BAT *s1, BAT *s2);
gdk_export BAT *BATcalcgecst(BAT *b, const ValRecord *v, BAT *s);
gdk_export BAT *BATcalccstge(const ValRecord *v, BAT *b, BAT *s);
gdk_export BAT *BATcalceq(BAT *b1, BAT *b2, BAT *s1, BAT *s2, bool nil_matches);
gdk_export BAT *BATcalceqcst(BAT *b, const ValRecord *v, BAT *s, bool nil_matches);
gdk_export BAT *BATcalccsteq(const ValRecord *v, BAT *b, BAT *s, bool nil_matches);
gdk_export BAT *BATcalcne(BAT *b1, BAT *b2, BAT *s1, BAT *s2, bool nil_matches);
gdk_export BAT *BATcalcnecst(BAT *b, const ValRecord *v, BAT *s, bool nil_matches);
gdk_export BAT *BATcalccstne(const ValRecord *v, BAT *b, BAT *s, bool nil_matches);
gdk_export BAT *BATcalccmp(BAT *b1, BAT *b2, BAT *s1, BAT *s2);
gdk_export BAT *BATcalccmpcst(BAT *b, const ValRecord *v, BAT *s);
gdk_export BAT *BATcalccstcmp(const ValRecord *v, BAT *b, BAT *s);
gdk_export BAT *BATcalcbetween(BAT *b, BAT *lo, BAT *hi, BAT *s, BAT *slo, BAT *shi, bool symmetric, bool linc, bool hinc, bool nils_false, bool anti);
gdk_export BAT *BATcalcbetweencstcst(BAT *b, const ValRecord *lo, const ValRecord *hi, BAT *s, bool symmetric, bool linc, bool hinc, bool nils_false, bool anti);
gdk_export BAT *BATcalcbetweenbatcst(BAT *b, BAT *lo, const ValRecord *hi, BAT *s, BAT *slo, bool symmetric, bool linc, bool hinc, bool nils_false, bool anti);
gdk_export BAT *BATcalcbetweencstbat(BAT *b, const ValRecord *lo, BAT *hi, BAT *s, BAT *shi, bool symmetric, bool linc, bool hinc, bool nils_false, bool anti);
gdk_export gdk_return VARcalcbetween(ValPtr ret, const ValRecord *v, const ValRecord *lo, const ValRecord *hi, bool symmetric, bool linc, bool hinc, bool nils_false, bool anti);
gdk_export BAT *BATcalcifthenelse(BAT *b, BAT *b1, BAT *b2);
gdk_export BAT *BATcalcifthenelsecst(BAT *b, BAT *b1, const ValRecord *c2);
gdk_export BAT *BATcalcifthencstelse(BAT *b, const ValRecord *c1, BAT *b2);
gdk_export BAT *BATcalcifthencstelsecst(BAT *b, const ValRecord *c1, const ValRecord *c2);
gdk_export gdk_return VARcalcnot(ValPtr ret, const ValRecord *v);
gdk_export gdk_return VARcalcnegate(ValPtr ret, const ValRecord *v);
gdk_export gdk_return VARcalcabsolute(ValPtr ret, const ValRecord *v);
gdk_export gdk_return VARcalcincr(ValPtr ret, const ValRecord *v, bool abort_on_error);
gdk_export gdk_return VARcalcdecr(ValPtr ret, const ValRecord *v, bool abort_on_error);
gdk_export gdk_return VARcalciszero(ValPtr ret, const ValRecord *v);
gdk_export gdk_return VARcalcsign(ValPtr ret, const ValRecord *v);
gdk_export gdk_return VARcalcisnil(ValPtr ret, const ValRecord *v);
gdk_export gdk_return VARcalcisnotnil(ValPtr ret, const ValRecord *v);
gdk_export gdk_return VARcalcadd(ValPtr ret, const ValRecord *lft, const ValRecord *rgt, bool abort_on_error);
gdk_export gdk_return VARcalcsub(ValPtr ret, const ValRecord *lft, const ValRecord *rgt, bool abort_on_error);
gdk_export gdk_return VARcalcmul(ValPtr ret, const ValRecord *lft, const ValRecord *rgt, bool abort_on_error);
gdk_export gdk_return VARcalcdiv(ValPtr ret, const ValRecord *lft, const ValRecord *rgt, bool abort_on_error);
gdk_export gdk_return VARcalcmod(ValPtr ret, const ValRecord *lft, const ValRecord *rgt, bool abort_on_error);
gdk_export gdk_return VARcalcxor(ValPtr ret, const ValRecord *lft, const ValRecord *rgt);
gdk_export gdk_return VARcalcor(ValPtr ret, const ValRecord *lft, const ValRecord *rgt);
gdk_export gdk_return VARcalcand(ValPtr ret, const ValRecord *lft, const ValRecord *rgt);
gdk_export gdk_return VARcalclsh(ValPtr ret, const ValRecord *lft, const ValRecord *rgt, bool abort_on_error);
gdk_export gdk_return VARcalcrsh(ValPtr ret, const ValRecord *lft, const ValRecord *rgt, bool abort_on_error);
gdk_export gdk_return VARcalclt(ValPtr ret, const ValRecord *lft, const ValRecord *rgt);
gdk_export gdk_return VARcalcgt(ValPtr ret, const ValRecord *lft, const ValRecord *rgt);
gdk_export gdk_return VARcalcle(ValPtr ret, const ValRecord *lft, const ValRecord *rgt);
gdk_export gdk_return VARcalcge(ValPtr ret, const ValRecord *lft, const ValRecord *rgt);
gdk_export gdk_return VARcalceq(ValPtr ret, const ValRecord *lft, const ValRecord *rgt, bool nil_matches);
gdk_export gdk_return VARcalcne(ValPtr ret, const ValRecord *lft, const ValRecord *rgt, bool nil_matches);
gdk_export gdk_return VARcalccmp(ValPtr ret, const ValRecord *lft, const ValRecord *rgt);
gdk_export BAT *BATconvert(BAT *b, BAT *s, int tp, bool abort_on_error, uint8_t scale1, uint8_t scale2, uint8_t precision);
gdk_export gdk_return VARconvert(ValPtr ret, const ValRecord *v, bool abort_on_error, uint8_t scale1, uint8_t scale2, uint8_t precision);
gdk_export gdk_return BATcalcavg(BAT *b, BAT *s, dbl *avg, BUN *vals, int scale);
gdk_export BAT *BATgroupsum(BAT *b, BAT *g, BAT *e, BAT *s, int tp, bool skip_nils, bool abort_on_error);
gdk_export BAT *BATgroupprod(BAT *b, BAT *g, BAT *e, BAT *s, int tp, bool skip_nils, bool abort_on_error);
gdk_export gdk_return BATgroupavg(BAT **bnp, BAT **cntsp, BAT *b, BAT *g, BAT *e, BAT *s, int tp, bool skip_nils, bool abort_on_error, int scale);
gdk_export gdk_return BATgroupavg3(BAT **avgp, BAT **remp, BAT **cntp, BAT *b, BAT *g, BAT *e, BAT *s, bool skip_nils);
gdk_export BAT *BATgroupavg3combine(BAT *avg, BAT *rem, BAT *cnt, BAT *g, BAT *e, bool skip_nils);
gdk_export BAT *BATgroupcount(BAT *b, BAT *g, BAT *e, BAT *s, int tp, bool skip_nils, bool abort_on_error);
gdk_export BAT *BATgroupsize(BAT *b, BAT *g, BAT *e, BAT *s, int tp, bool skip_nils, bool abort_on_error);
gdk_export BAT *BATgroupmin(BAT *b, BAT *g, BAT *e, BAT *s, int tp, bool skip_nils, bool abort_on_error);
gdk_export BAT *BATgroupmax(BAT *b, BAT *g, BAT *e, BAT *s, int tp, bool skip_nils, bool abort_on_error);
gdk_export BAT *BATgroupmedian(BAT *b, BAT *g, BAT *e, BAT *s, int tp, bool skip_nils, bool abort_on_error);
gdk_export BAT *BATgroupquantile(BAT *b, BAT *g, BAT *e, BAT *s, int tp, double quantile, bool skip_nils, bool abort_on_error);
gdk_export BAT *BATgroupmedian_avg(BAT *b, BAT *g, BAT *e, BAT *s, int tp, bool skip_nils, bool abort_on_error);
gdk_export BAT *BATgroupquantile_avg(BAT *b, BAT *g, BAT *e, BAT *s, int tp, double quantile, bool skip_nils, bool abort_on_error);
/* helper function for grouped aggregates */
gdk_export const char *BATgroupaggrinit(
BAT *b, BAT *g, BAT *e, BAT *s,
/* outputs: */
oid *minp, oid *maxp, BUN *ngrpp,
struct canditer *ci, BUN *ncand);
gdk_export gdk_return BATsum(void *res, int tp, BAT *b, BAT *s, bool skip_nils, bool abort_on_error, bool nil_if_empty);
gdk_export gdk_return BATprod(void *res, int tp, BAT *b, BAT *s, bool skip_nils, bool abort_on_error, bool nil_if_empty);
gdk_export void *BATmax(BAT *b, void *aggr);
gdk_export void *BATmin(BAT *b, void *aggr);
gdk_export void *BATmax_skipnil(BAT *b, void *aggr, bit skipnil);
gdk_export void *BATmin_skipnil(BAT *b, void *aggr, bit skipnil);
gdk_export dbl BATcalcstdev_population(dbl *avgp, BAT *b);
gdk_export dbl BATcalcstdev_sample(dbl *avgp, BAT *b);
gdk_export BAT *BATgroupstdev_sample(BAT *b, BAT *g, BAT *e, BAT *s, int tp, bool skip_nils, bool abort_on_error);
gdk_export BAT *BATgroupstdev_population(BAT *b, BAT *g, BAT *e, BAT *s, int tp, bool skip_nils, bool abort_on_error);
gdk_export dbl BATcalcvariance_population(dbl *avgp, BAT *b);
gdk_export dbl BATcalcvariance_sample(dbl *avgp, BAT *b);
gdk_export BAT *BATgroupvariance_sample(BAT *b, BAT *g, BAT *e, BAT *s, int tp, bool skip_nils, bool abort_on_error);
gdk_export BAT *BATgroupvariance_population(BAT *b, BAT *g, BAT *e, BAT *s, int tp, bool skip_nils, bool abort_on_error);
gdk_export dbl BATcalccovariance_sample(BAT *b1, BAT *b2);
gdk_export dbl BATcalccovariance_population(BAT *b1, BAT *b2);
gdk_export dbl BATcalccorrelation(BAT *b1, BAT *b2);
gdk_export BAT *BATgroupcovariance_sample(BAT *b1, BAT *b2, BAT *g, BAT *e, BAT *s, int tp, bool skip_nils, bool abort_on_error);
gdk_export BAT *BATgroupcovariance_population(BAT *b1, BAT *b2, BAT *g, BAT *e, BAT *s, int tp, bool skip_nils, bool abort_on_error);
gdk_export BAT *BATgroupcorrelation(BAT *b1, BAT *b2, BAT *g, BAT *e, BAT *s, int tp, bool skip_nils, bool abort_on_error);
gdk_export BAT *BATgroupstr_group_concat(BAT *b, BAT *g, BAT *e, BAT *s, BAT *sep, bool skip_nils, bool abort_on_error, const char *restrict separator);
gdk_export gdk_return BATstr_group_concat(ValPtr res, BAT *b, BAT *s, BAT *sep, bool skip_nils, bool abort_on_error, bool nil_if_empty, const char *restrict separator);
gdk_export gdk_return GDKanalytical_str_group_concat(BAT *r, BAT *p, BAT *o, BAT *b, BAT *sep, BAT *s, BAT *e, const char *restrict separator, int frame_type);

@ -0,0 +1,214 @@
/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* Copyright 1997 - July 2008 CWI, August 2008 - 2022 MonetDB B.V.
*/
#ifndef _GDK_CAND_H_
#define _GDK_CAND_H_
/* candidates by design are ordered oid lists, besides native oid bats
* there are
* void bats for dense oid lists,
* negative oid lists
* masked oid lists
*/
#define CAND_NEGOID 0
#define CAND_MSK 1
typedef struct {
uint64_t
type:1,
// mask:1,
firstbit:48;
} ccand_t;
#define CCAND(b) ((ccand_t *) (b)->tvheap->base)
#define complex_cand(b) ((b)->ttype == TYPE_void && (b)->tvheap != NULL)
#define negoid_cand(b) (complex_cand(b) && CCAND(b)->type == CAND_NEGOID)
#define mask_cand(b) (complex_cand(b) && CCAND(b)->type == CAND_MSK)
#define ccand_first(b) ((b)->tvheap->base + sizeof(ccand_t))
#define ccand_free(b) ((b)->tvheap->free - sizeof(ccand_t))
struct canditer {
BAT *s; /* candidate BAT the iterator is based on */
union {
struct { /* for all except cand_mask */
const oid *oids; /* candidate or exceptions for non-dense */
BUN offset; /* how much of candidate list BAT we skipped */
oid add; /* value to add because of exceptions seen */
};
struct { /* only for cand_mask */
const uint32_t *mask; /* bitmask */
BUN nextmsk;
oid mskoff;
uint8_t nextbit;
uint8_t firstbit;
uint8_t lastbit;
};
};
oid seq; /* first candidate */
oid hseq; /* hseqbase from s/b for first candidate */
BUN nvals; /* number of values in .oids/.mask */
BUN ncand; /* number of candidates */
BUN next; /* next BUN to return value for */
enum {
cand_dense, /* simple dense BAT, i.e. no look ups */
cand_materialized, /* simple materialized OID list */
cand_except, /* list of exceptions in vheap */
cand_mask, /* bitmask (TYPE_msk) bat as candidate list */
} tpe;
};
/* returns the position of the lowest order bit in x, i.e. the
* smallest n such that (x & (1<<n)) != 0; must not be called with 0 */
static inline int __attribute__((__const__))
candmask_lobit(uint32_t x)
{
assert(x != 0);
#if defined(__GNUC__)
return __builtin_ctz(x) /* ffs(x) - 1 */;
#elif defined(_MSC_VER)
unsigned long idx;
if (_BitScanForward(&idx, x))
return (int) idx;
return -1;
#else
/* use binary search for the lowest set bit */
int n = 1;
if ((x & 0x0000FFFF) == 0) { n += 16; x >>= 16; }
if ((x & 0x000000FF) == 0) { n += 8; x >>= 8; }
if ((x & 0x0000000F) == 0) { n += 4; x >>= 4; }
if ((x & 0x00000003) == 0) { n += 2; x >>= 2; }
return n - (x & 1);
#endif
}
/* population count: count number of 1 bits in a value */
static inline uint32_t __attribute__((__const__))
candmask_pop(uint32_t x)
{
#if defined(__GNUC__)
return (uint32_t) __builtin_popcount(x);
#elif defined(_MSC_VER)
return (uint32_t) __popcnt((unsigned int) (x));
#else
/* divide and conquer implementation (the two versions are
* essentially equivalent, but the first version is written a
* bit smarter) */
#if 1
x -= (x >> 1) & ~0U/3 /* 0x55555555 */; /* 3-1=2; 2-1=1; 1-0=1; 0-0=0 */
x = (x & ~0U/5) + ((x >> 2) & ~0U/5) /* 0x33333333 */;
x = (x + (x >> 4)) & ~0UL/0x11 /* 0x0F0F0F0F */;
x = (x + (x >> 8)) & ~0UL/0x101 /* 0x00FF00FF */;
x = (x + (x >> 16)) & 0xFFFF /* ~0UL/0x10001 */;
#else
x = (x & 0x55555555) + ((x >> 1) & 0x55555555);
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
x = (x & 0x0F0F0F0F) + ((x >> 4) & 0x0F0F0F0F);
x = (x & 0x00FF00FF) + ((x >> 8) & 0x00FF00FF);
x = (x & 0x0000FFFF) + ((x >> 16) & 0x0000FFFF);
#endif
return x;
#endif
}
#define canditer_next_dense(ci) ((ci)->seq + (ci)->next++)
#define canditer_next_mater(ci) ((ci)->oids[(ci)->next++])
static inline oid
canditer_next_except(struct canditer *ci)
{
oid o = ci->seq + ci->add + ci->next++;
while (ci->add < ci->nvals && o == ci->oids[ci->add]) {
ci->add++;
o++;
}
return o;
}
static inline oid
canditer_next_mask(struct canditer *ci)
{
/* since .next < .ncand, we know there must be another
* candidate */
while ((ci->mask[ci->nextmsk] >> ci->nextbit) == 0) {
ci->nextmsk++;
ci->nextbit = 0;
}
ci->nextbit += candmask_lobit(ci->mask[ci->nextmsk] >> ci->nextbit);
oid o = ci->mskoff + ci->nextmsk * 32 + ci->nextbit;
if (++ci->nextbit == 32) {
ci->nextbit = 0;
ci->nextmsk++;
}
ci->next++;
return o;
}
static inline oid
canditer_next(struct canditer *ci)
{
if (ci->next == ci->ncand)
return oid_nil;
switch (ci->tpe) {
case cand_dense:
return canditer_next_dense(ci);
case cand_materialized:
assert(ci->next < ci->nvals);
return canditer_next_mater(ci);
case cand_except:
return canditer_next_except(ci);
case cand_mask:
/* work around compiler error: control reaches end of
* non-void function */
break;
}
assert(ci->tpe == cand_mask);
return canditer_next_mask(ci);
}
#define canditer_search_dense(ci, o, next) ((o) < (ci)->seq ? next ? 0 : BUN_NONE : (o) >= (ci)->seq + (ci)->ncand ? next ? (ci)->ncand : BUN_NONE : (o) - (ci)->seq)
gdk_export BUN canditer_init(struct canditer *ci, BAT *b, BAT *s);
gdk_export oid canditer_peek(struct canditer *ci);
gdk_export oid canditer_last(const struct canditer *ci);
gdk_export oid canditer_prev(struct canditer *ci);
gdk_export oid canditer_peekprev(struct canditer *ci);
gdk_export oid canditer_idx(const struct canditer *ci, BUN p);
#define canditer_idx_dense(ci, p) ((p >= (ci)->ncand)?oid_nil:((ci)->seq + p))
gdk_export void canditer_setidx(struct canditer *ci, BUN p);
gdk_export void canditer_reset(struct canditer *ci);
gdk_export BUN canditer_search(const struct canditer *ci, oid o, bool next);
static inline bool
canditer_contains(struct canditer *ci, oid o)
{
if (ci->tpe == cand_mask) {
if (o < ci->mskoff)
return false;
o -= ci->mskoff;
BUN p = o / 32;
if (p >= ci->nvals)
return false;
o %= 32;
if (p == ci->nvals - 1 && o >= ci->lastbit)
return false;
return ci->mask[p] & (1U << o);
}
return canditer_search(ci, o, false) != BUN_NONE;
}
gdk_export oid canditer_mask_next(const struct canditer *ci, oid o, bool next);
gdk_export BAT *canditer_slice(const struct canditer *ci, BUN lo, BUN hi);
gdk_export BAT *canditer_sliceval(const struct canditer *ci, oid lo, oid hi);
gdk_export BAT *canditer_slice2(const struct canditer *ci, BUN lo1, BUN hi1, BUN lo2, BUN hi2);
gdk_export BAT *canditer_slice2val(const struct canditer *ci, oid lo1, oid hi1, oid lo2, oid hi2);
gdk_export BAT *BATnegcands(BUN nr, BAT *odels);
gdk_export BAT *BATmaskedcands(oid hseq, BUN nr, BAT *masked, bool selected);
gdk_export BAT *BATunmask(BAT *b);
gdk_export BAT *BATmergecand(BAT *a, BAT *b);
gdk_export BAT *BATintersectcand(BAT *a, BAT *b);
gdk_export BAT *BATdiffcand(BAT *a, BAT *b);
#endif /* _GDK_CAND_H_ */

@ -0,0 +1,45 @@
/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* Copyright 1997 - July 2008 CWI, August 2008 - 2022 MonetDB B.V.
*/
#ifndef _GDK_DELTA_H_
#define _GDK_DELTA_H_
/*
* We make sure here that the BUNs section of a BAT at least starts 4
* bytes from the BUN start. This ensures that the first data item of
* e.g. a BAT[void,bit] is (at least) integer aligned. This optimizes
* processing on such BATs (DDBENCH).
*/
static inline void
DELTAinit(BAT *b)
{
BATsetcount(b, 0);
b->theap->free = 0;
b->batInserted = 0;
b->tshift = ATOMelmshift(Tsize(b));
TRC_DEBUG(DELTA,
"%s free %zu ins " BUNFMT " base %p\n",
BBP_logical(b->batCacheid),
b->theap->free,
b->batInserted,
b->theap->base);
}
/*
* Upon saving a BAT, we should convert the delta marker BUN pointers
* into indexes and convert them back into pointers upon reload.
*
* The BATdirty(b) tells you whether a BAT's main memory
* representation differs from its saved image on stable storage. But
* *not* whether it has changed since last transaction commit (it can
* be storage-clean, but transaction-dirty). For this we have
* DELTAdirty(b).
*/
#define DELTAdirty(b) ((b)->batInserted < BUNlast(b))
#endif /* _GDK_DELTA_H_ */

@ -0,0 +1,299 @@
/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* Copyright 1997 - July 2008 CWI, August 2008 - 2022 MonetDB B.V.
*/
#ifndef _GDK_SEARCH_H_
#define _GDK_SEARCH_H_
struct Hash {
int type; /* type of index entity */
uint8_t width; /* width of hash entries */
BUN mask1; /* .mask1 < .nbucket <= .mask2 */
BUN mask2; /* ... both are power-of-two minus one */
BUN nbucket; /* number of valid hash buckets */
BUN nunique; /* number of unique values */
BUN nheads; /* number of chain heads */
void *Bckt; /* hash buckets, points into .heapbckt */
void *Link; /* collision list, points into .heaplink */
Heap heaplink; /* heap where the hash links are stored */
Heap heapbckt; /* heap where the hash buckets are stored */
};
static inline BUN
HASHbucket(const Hash *h, BUN v)
{
return (v &= h->mask2) < h->nbucket ? v : v & h->mask1;
}
gdk_export gdk_return BAThash(BAT *b);
gdk_export void HASHdestroy(BAT *b);
gdk_export BUN HASHprobe(const Hash *h, const void *v);
gdk_export BUN HASHlist(Hash *h, BUN i);
#define BUN2 2
#define BUN4 4
#if SIZEOF_BUN == 8
#define BUN8 8
#endif
#ifdef BUN2
typedef uint16_t BUN2type;
#endif
typedef uint32_t BUN4type;
#if SIZEOF_BUN > 4
typedef uint64_t BUN8type;
#endif
#ifdef BUN2
#define BUN2_NONE ((BUN2type) UINT16_C(0xFFFF))
#endif
#define BUN4_NONE ((BUN4type) UINT32_C(0xFFFFFFFF))
#ifdef BUN8
#define BUN8_NONE ((BUN8type) UINT64_C(0xFFFFFFFFFFFFFFFF))
#endif
/* play around with h->Bckt[i] and h->Link[j] */
static inline void
HASHput(Hash *h, BUN i, BUN v)
{
/* if v == BUN_NONE, assigning the value to a BUN2type
* etc. automatically converts to BUN2_NONE etc. */
switch (h->width) {
#ifdef BUN2
case BUN2:
((BUN2type *) h->Bckt)[i] = (BUN2type) v;
break;
#endif
default: /* BUN4 */
((BUN4type *) h->Bckt)[i] = (BUN4type) v;
break;
#ifdef BUN8
case BUN8:
((BUN8type *) h->Bckt)[i] = (BUN8type) v;
break;
#endif
}
}
static inline void
HASHputlink(Hash *h, BUN i, BUN v)
{
/* if v == BUN_NONE, assigning the value to a BUN2type
* etc. automatically converts to BUN2_NONE etc. */
switch (h->width) {
#ifdef BUN2
case BUN2:
assert(v == BUN_NONE || v == BUN2_NONE || v < i);
((BUN2type *) h->Link)[i] = (BUN2type) v;
break;
#endif
default: /* BUN4 */
assert(v == BUN_NONE || v == BUN4_NONE || v < i);
((BUN4type *) h->Link)[i] = (BUN4type) v;
break;
#ifdef BUN8
case BUN8:
assert(v == BUN_NONE || v == BUN8_NONE || v < i);
((BUN8type *) h->Link)[i] = (BUN8type) v;
break;
#endif
}
}
static inline BUN __attribute__((__pure__))
HASHget(const Hash *h, BUN i)
{
switch (h->width) {
#ifdef BUN2
case BUN2:
i = (BUN) ((BUN2type *) h->Bckt)[i];
return i == BUN2_NONE ? BUN_NONE : i;
#endif
default: /* BUN4 */
i = (BUN) ((BUN4type *) h->Bckt)[i];
return i == BUN4_NONE ? BUN_NONE : i;
#ifdef BUN8
case BUN8:
i = (BUN) ((BUN8type *) h->Bckt)[i];
return i == BUN8_NONE ? BUN_NONE : i;
#endif
}
}
static inline BUN __attribute__((__pure__))
HASHgetlink(const Hash *h, BUN i)
{
switch (h->width) {
#ifdef BUN2
case BUN2:
i = (BUN) ((BUN2type *) h->Link)[i];
return i == BUN2_NONE ? BUN_NONE : i;
#endif
default: /* BUN4 */
i = (BUN) ((BUN4type *) h->Link)[i];
return i == BUN4_NONE ? BUN_NONE : i;
#ifdef BUN8
case BUN8:
i = (BUN) ((BUN8type *) h->Link)[i];
return i == BUN8_NONE ? BUN_NONE : i;
#endif
}
}
/* mix_bte(0x80) == 0x80 */
#define mix_bte(X) ((unsigned int) (unsigned char) (X))
/* mix_sht(0x8000) == 0x8000 */
#define mix_sht(X) ((unsigned int) (unsigned short) (X))
/* mix_int(0x81060038) == 0x80000000 */
#define mix_int(X) (((unsigned int) (X) >> 7) ^ \
((unsigned int) (X) >> 13) ^ \
((unsigned int) (X) >> 21) ^ \
(unsigned int) (X))
/* mix_lng(0x810600394347424F) == 0x8000000000000000 */
#define mix_lng(X) (((ulng) (X) >> 7) ^ \
((ulng) (X) >> 13) ^ \
((ulng) (X) >> 21) ^ \
((ulng) (X) >> 31) ^ \
((ulng) (X) >> 38) ^ \
((ulng) (X) >> 46) ^ \
((ulng) (X) >> 56) ^ \
(ulng) (X))
#ifdef HAVE_HGE
/* mix_hge(0x810600394347424F90AC1429D6BFCC57) ==
* 0x80000000000000000000000000000000 */
#define mix_hge(X) (((uhge) (X) >> 7) ^ \
((uhge) (X) >> 13) ^ \
((uhge) (X) >> 21) ^ \
((uhge) (X) >> 31) ^ \
((uhge) (X) >> 38) ^ \
((uhge) (X) >> 46) ^ \
((uhge) (X) >> 56) ^ \
((uhge) (X) >> 65) ^ \
((uhge) (X) >> 70) ^ \
((uhge) (X) >> 78) ^ \
((uhge) (X) >> 85) ^ \
((uhge) (X) >> 90) ^ \
((uhge) (X) >> 98) ^ \
((uhge) (X) >> 107) ^ \
((uhge) (X) >> 116) ^ \
(uhge) (X))
#endif
#define hash_loc(H,V) hash_any(H,V)
#define hash_var(H,V) hash_any(H,V)
#define hash_any(H,V) HASHbucket(H, ATOMhash((H)->type, (V)))
#define hash_bte(H,V) (assert((H)->nbucket >= 256), (BUN) mix_bte(*(const unsigned char*) (V)))
#define hash_sht(H,V) (assert((H)->nbucket >= 65536), (BUN) mix_sht(*(const unsigned short*) (V)))
#define hash_int(H,V) HASHbucket(H, (BUN) mix_int(*(const unsigned int *) (V)))
/* XXX return size_t-sized value for 8-byte oid? */
#define hash_lng(H,V) HASHbucket(H, (BUN) mix_lng(*(const ulng *) (V)))
#ifdef HAVE_HGE
#define hash_hge(H,V) HASHbucket(H, (BUN) mix_hge(*(const uhge *) (V)))
#endif
#if SIZEOF_OID == SIZEOF_INT
#define hash_oid(H,V) hash_int(H,V)
#else
#define hash_oid(H,V) hash_lng(H,V)
#endif
#define hash_flt(H,V) hash_int(H,V)
#define hash_dbl(H,V) hash_lng(H,V)
static inline BUN __attribute__((__const__))
mix_uuid(const uuid *u)
{
ulng u1, u2;
u1 = (ulng) (uint8_t) u->u[0] << 56 |
(ulng) (uint8_t) u->u[1] << 48 |
(ulng) (uint8_t) u->u[2] << 40 |
(ulng) (uint8_t) u->u[3] << 32 |
(ulng) (uint8_t) u->u[4] << 24 |
(ulng) (uint8_t) u->u[5] << 16 |
(ulng) (uint8_t) u->u[6] << 8 |
(ulng) (uint8_t) u->u[7];
u2 = (ulng) (uint8_t) u->u[8] << 56 |
(ulng) (uint8_t) u->u[9] << 48 |
(ulng) (uint8_t) u->u[10] << 40 |
(ulng) (uint8_t) u->u[11] << 32 |
(ulng) (uint8_t) u->u[12] << 24 |
(ulng) (uint8_t) u->u[13] << 16 |
(ulng) (uint8_t) u->u[14] << 8 |
(ulng) (uint8_t) u->u[15];
/* we're not using mix_hge since this way we get the same result
* on systems with and without 128 bit integer support */
return (BUN) (mix_lng(u1) ^ mix_lng(u2));
}
#define hash_uuid(H,V) HASHbucket(H, mix_uuid((const uuid *) (V)))
/*
* @- hash-table supported loop over BUNs The first parameter `bi' is
* a BAT iterator, the second (`h') should point to the Hash
* structure, and `v' a pointer to an atomic value (corresponding to
* the head column of `b'). The 'hb' is an BUN index, pointing out the
* `hb'-th BUN.
*/
#define HASHloop(bi, h, hb, v) \
for (hb = HASHget(h, HASHprobe(h, v)); \
hb != BUN_NONE; \
hb = HASHgetlink(h, hb)) \
if (ATOMcmp(h->type, v, BUNtail(bi, hb)) == 0)
#define HASHloop_str(bi, h, hb, v) \
for (hb = HASHget(h, HASHbucket(h, strHash(v))); \
hb != BUN_NONE; \
hb = HASHgetlink(h, hb)) \
if (strEQ(v, BUNtvar(bi, hb)))
#define HASHlooploc(bi, h, hb, v) \
for (hb = HASHget(h, HASHprobe(h, v)); \
hb != BUN_NONE; \
hb = HASHgetlink(h, hb)) \
if (ATOMcmp(h->type, v, BUNtloc(bi, hb)) == 0)
#define HASHloopvar(bi, h, hb, v) \
for (hb = HASHget(h, HASHprobe(h, v)); \
hb != BUN_NONE; \
hb = HASHgetlink(h, hb)) \
if (ATOMcmp(h->type, v, BUNtvar(bi, hb)) == 0)
#define HASHloop_TYPE(bi, h, hb, v, TYPE) \
for (hb = HASHget(h, hash_##TYPE(h, v)); \
hb != BUN_NONE; \
hb = HASHgetlink(h,hb)) \
if (* (const TYPE *) (v) == * (const TYPE *) BUNtloc(bi, hb))
/* need to take special care comparing nil floating point values */
#define HASHloop_fTYPE(bi, h, hb, v, TYPE) \
for (hb = HASHget(h, hash_##TYPE(h, v)); \
hb != BUN_NONE; \
hb = HASHgetlink(h,hb)) \
if (is_##TYPE##_nil(* (const TYPE *) (v)) \
? is_##TYPE##_nil(* (const TYPE *) BUNtloc(bi, hb)) \
: * (const TYPE *) (v) == * (const TYPE *) BUNtloc(bi, hb))
#define HASHloop_bte(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, bte)
#define HASHloop_sht(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, sht)
#define HASHloop_int(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, int)
#define HASHloop_lng(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, lng)
#ifdef HAVE_HGE
#define HASHloop_hge(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, hge)
#endif
#define HASHloop_flt(bi, h, hb, v) HASHloop_fTYPE(bi, h, hb, v, flt)
#define HASHloop_dbl(bi, h, hb, v) HASHloop_fTYPE(bi, h, hb, v, dbl)
#ifdef HAVE_HGE
#define HASHloop_uuid(bi, hsh, hb, v) \
for (hb = HASHget(hsh, hash_uuid(hsh, v)); \
hb != BUN_NONE; \
hb = HASHgetlink(hsh,hb)) \
if (((const uuid *) (v))->h == ((const uuid *) BUNtloc(bi, hb))->h)
#else
#define HASHloop_uuid(bi, h, hb, v) \
for (hb = HASHget(h, hash_uuid(h, v)); \
hb != BUN_NONE; \
hb = HASHgetlink(h,hb)) \
if (memcmp((const uuid *) (v), (const uuid *) BUNtloc(bi, hb), 16) == 0)
// if (((const uuid *) (v))->l[0] == ((const uuid *) BUNtloc(bi, hb))->l[0] && ((const uuid *) (v))->l[1] == ((const uuid *) BUNtloc(bi, hb))->l[1])
#endif
#endif /* _GDK_SEARCH_H_ */

@ -0,0 +1,197 @@
/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* Copyright 1997 - July 2008 CWI, August 2008 - 2022 MonetDB B.V.
*/
#ifndef GDK_POSIX_H
#define GDK_POSIX_H
#include <sys/types.h>
#include <time.h>
#ifdef HAVE_FTIME
#include <sys/timeb.h> /* ftime */
#endif
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h> /* gettimeofday */
#endif
#ifndef HAVE_SYS_SOCKET_H
#ifdef HAVE_WINSOCK_H
#include <winsock.h> /* for timeval */
#endif
#endif
#include "gdk_system.h" /* gdk_export */
#ifdef NATIVE_WIN32
#include <io.h>
#include <direct.h>
#endif
/*
* @- virtual memory
*/
#define MT_VMUNITLOG 16
#define MT_VMUNITSIZE (1 << MT_VMUNITLOG)
/* make sure POSIX_MADV_* and posix_madvise() are defined somehow */
#ifdef HAVE_SYS_MMAN_H
# ifndef __USE_BSD
# define __USE_BSD
# endif
# include <sys/mman.h>
#endif
#ifdef __linux__
/* on Linux, posix_madvise does not seem to work, fall back to classic
* madvise */
#undef HAVE_POSIX_MADVISE
#undef HAVE_POSIX_FADVISE
#undef POSIX_MADV_NORMAL
#undef POSIX_MADV_RANDOM
#undef POSIX_MADV_SEQUENTIAL
#undef POSIX_MADV_WILLNEED
#undef POSIX_MADV_DONTNEED
#endif
#ifndef HAVE_POSIX_MADVISE
# ifdef HAVE_MADVISE
# define posix_madvise madvise
# define HAVE_POSIX_MADVISE 1
# ifndef MADV_RANDOM
# define MADV_RANDOM 0
# endif
# ifndef POSIX_MADV_NORMAL
# define POSIX_MADV_NORMAL MADV_NORMAL
# define POSIX_MADV_RANDOM MADV_RANDOM
# define POSIX_MADV_SEQUENTIAL MADV_SEQUENTIAL
# define POSIX_MADV_WILLNEED MADV_WILLNEED
# define POSIX_MADV_DONTNEED MADV_DONTNEED
# endif
# else
# define posix_madvise(x,y,z) 0
# ifndef POSIX_MADV_NORMAL
# define POSIX_MADV_NORMAL 0
# define POSIX_MADV_RANDOM 0
# define POSIX_MADV_SEQUENTIAL 0
# define POSIX_MADV_WILLNEED 0
# define POSIX_MADV_DONTNEED 0
# endif
# endif
#endif
/* in case they are still not defined, define these values as
* something that doesn't do anything */
#ifndef POSIX_MADV_NORMAL
#define POSIX_MADV_NORMAL 0
#endif
#ifndef POSIX_MADV_RANDOM
#define POSIX_MADV_RANDOM 0
#endif
#ifndef POSIX_MADV_SEQUENTIAL
#define POSIX_MADV_SEQUENTIAL 0
#endif
#ifndef POSIX_MADV_WILLNEED
#define POSIX_MADV_WILLNEED 0
#endif
#ifndef POSIX_MADV_DONTNEED
#define POSIX_MADV_DONTNEED 0
#endif
/* the new mmap modes, mimic default MADV_* madvise POSIX constants */
#define MMAP_NORMAL POSIX_MADV_NORMAL /* no further special treatment */
#define MMAP_RANDOM POSIX_MADV_RANDOM /* expect random page references */
#define MMAP_SEQUENTIAL POSIX_MADV_SEQUENTIAL /* expect sequential page references */
#define MMAP_WILLNEED POSIX_MADV_WILLNEED /* will need these pages */
#define MMAP_DONTNEED POSIX_MADV_DONTNEED /* don't need these pages */
#define MMAP_READ 1024 /* region is readable (default if ommitted) */
#define MMAP_WRITE 2048 /* region may be written into */
#define MMAP_COPY 4096 /* writable, but changes never reach file */
#define MMAP_ASYNC 8192 /* asynchronous writes (default if ommitted) */
#define MMAP_SYNC 16384 /* writing is done synchronously */
/* in order to be sure of madvise and msync modes, pass them to mmap()
* call as well */
gdk_export size_t MT_getrss(void);
gdk_export bool MT_path_absolute(const char *path);
/*
* @+ Posix under WIN32
* WIN32 actually supports many Posix functions directly. Some it
* does not, though. For some functionality we move in Monet from
* Posix calls to MT_*() calls, which translate easier to WIN32.
* Examples are MT_mmap() , MT_sleep_ms() and MT_path_absolute(). Why?
* In the case of mmap() it is much easier for WIN32 to get a filename
* parameter rather than a file-descriptor. That is the reason in the
* case of mmap() to go for a MT_mmap() solution.
*
* For some other functionality, we do not need to abandon the Posix
* interface, though. Two cases can be distinguished. Missing
* functions in WIN32 are directly implemented
* (e.g. dlopen()/dlsym()/dlclose()). Posix functions in WIN32 whose
* functionality should be changed a bit. Examples are
* stat()/rename()/mkdir()/rmdir() who under WIN32 do not work if the
* path ends with a directory separator, but should work according to
* Posix. We remap such functions using a define to an equivalent
* win_*() function (which in its implementation calls through to the
* WIN32 function).
*/
gdk_export void *mdlopen(const char *library, int mode);
#ifdef NATIVE_WIN32
#define RTLD_LAZY 1
#define RTLD_NOW 2
#define RTLD_GLOBAL 4
gdk_export void *dlopen(const char *file, int mode);
gdk_export int dlclose(void *handle);
gdk_export void *dlsym(void *handle, const char *name);
gdk_export char *dlerror(void);
#ifndef HAVE_GETTIMEOFDAY
gdk_export int gettimeofday(struct timeval *tv, int *ignore_zone);
#endif
#endif /* NATIVE_WIN32 */
#ifndef HAVE_LOCALTIME_R
gdk_export struct tm *localtime_r(const time_t *restrict, struct tm *restrict);
#endif
#ifndef HAVE_GMTIME_R
gdk_export struct tm *gmtime_r(const time_t *restrict, struct tm *restrict);
#endif
#ifndef HAVE_ASCTIME_R
gdk_export char *asctime_r(const struct tm *restrict, char *restrict);
#endif
#ifndef HAVE_CTIME_R
gdk_export char *ctime_r(const time_t *restrict, char *restrict);
#endif
#ifndef HAVE_STRERROR_R
gdk_export int strerror_r(int errnum, char *buf, size_t buflen);
#endif
static inline const char *
GDKstrerror(int errnum, char *buf, size_t buflen)
{
#if !defined(_GNU_SOURCE) || ((_POSIX_C_SOURCE >= 200112L) && !_GNU_SOURCE)
if (strerror_r(errnum, buf, buflen) == 0)
return buf;
snprintf(buf, buflen, "Unknown error %d", errnum);
return buf;
#else
return strerror_r(errnum, buf, buflen);
#endif
}
#endif /* GDK_POSIX_H */

@ -0,0 +1,722 @@
/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* Copyright 1997 - July 2008 CWI, August 2008 - 2022 MonetDB B.V.
*/
#ifndef _GDK_SYSTEM_H_
#define _GDK_SYSTEM_H_
#ifdef WIN32
#ifndef LIBGDK
#define gdk_export extern __declspec(dllimport)
#else
#define gdk_export extern __declspec(dllexport)
#endif
#else
#define gdk_export extern
#endif
/* if __has_attribute is not known to the preprocessor, we ignore
* attributes completely; if it is known, use it to find out whether
* specific attributes that we use are known */
#ifndef __has_attribute
#ifndef __GNUC__
/* we can define __has_attribute as 1 since we define __attribute__ as empty */
#define __has_attribute(attr) 1
#ifndef __attribute__
#define __attribute__(attr) /* empty */
#endif
#else
/* older GCC does have attributes, but not __has_attribute and not all
* attributes that we use are known */
#define __has_attribute__alloc_size__ 1
#define __has_attribute__cold__ 1
#define __has_attribute__const__ 1
#define __has_attribute__constructor__ 1
#define __has_attribute__designated_init__ 0
#define __has_attribute__format__ 1
#define __has_attribute__malloc__ 1
#define __has_attribute__nonnull__ 1
#define __has_attribute__nonstring__ 0
#define __has_attribute__pure__ 1
#define __has_attribute__returns_nonnull__ 0
#define __has_attribute__visibility__ 1
#define __has_attribute__warn_unused_result__ 1
#define __has_attribute(attr) __has_attribute##attr
#endif
#endif
#if !__has_attribute(__alloc_size__)
#define __alloc_size__(a)
#endif
#if !__has_attribute(__cold__)
#define __cold__
#endif
#if !__has_attribute(__const__)
#define __const__
#endif
#if !__has_attribute(__constructor__)
#define __constructor__
#endif
#if !__has_attribute(__designated_init__)
#define __designated_init__
#endif
#if !__has_attribute(__format__)
#define __format__(a,b,c)
#endif
#if !__has_attribute(__malloc__)
#define __malloc__
#endif
#if !__has_attribute(__nonnull__)
#define __nonnull__(a)
#endif
#if !__has_attribute(__nonstring__)
#define __nonstring__
#endif
#if !__has_attribute(__pure__)
#define __pure__
#endif
#if !__has_attribute(__returns_nonnull__)
#define __returns_nonnull__
#endif
#if !__has_attribute(__visibility__)
#define __visibility__(a)
#elif defined(__CYGWIN__)
#define __visibility__(a)
#endif
#if !__has_attribute(__warn_unused_result__)
#define __warn_unused_result__
#endif
/* also see gdk.h for these */
#define THRDMASK (1)
#define TEMMASK (1<<10)
/*
* @- pthreads Includes and Definitions
*/
#ifdef HAVE_PTHREAD_H
/* don't re-include config.h; on Windows, don't redefine pid_t in an
* incompatible way */
#undef HAVE_CONFIG_H
#ifdef pid_t
#undef pid_t
#endif
#include <sched.h>
#include <pthread.h>
#endif
#ifdef HAVE_SEMAPHORE_H
# include <semaphore.h>
#endif
#ifdef HAVE_DISPATCH_DISPATCH_H
#include <dispatch/dispatch.h>
#endif
#ifdef HAVE_SYS_PARAM_H
# include <sys/param.h> /* prerequisite of sys/sysctl on OpenBSD */
#endif
#ifdef BSD /* BSD macro is defined in sys/param.h */
# include <sys/sysctl.h>
#endif
/* new pthread interface, where the thread id changed to a struct */
#ifdef PTW32_VERSION
#define PTW32 1
#endif
/* debug and errno integers */
gdk_export int GDKdebug;
gdk_export void GDKsetdebug(int debug);
gdk_export int GDKgetdebug(void);
gdk_export int GDKnr_threads;
/* API */
/*
* @- sleep
*/
gdk_export void MT_sleep_ms(unsigned int ms);
/*
* @- MT Thread Api
*/
typedef size_t MT_Id; /* thread number. will not be zero */
enum MT_thr_detach { MT_THR_JOINABLE, MT_THR_DETACHED };
#define MT_NAME_LEN 32 /* length of thread/semaphore/etc. names */
#define UNKNOWN_THREAD "unknown thread"
typedef int64_t lng;
typedef struct QryCtx {
const lng starttime;
lng querytimeout;
} QryCtx;
gdk_export bool MT_thread_init(void);
gdk_export int MT_create_thread(MT_Id *t, void (*function) (void *),
void *arg, enum MT_thr_detach d,
const char *threadname);
gdk_export const char *MT_thread_getname(void);
gdk_export void *MT_thread_getdata(void);
gdk_export void MT_thread_setdata(void *data);
gdk_export void MT_exiting_thread(void);
gdk_export MT_Id MT_getpid(void);
gdk_export int MT_join_thread(MT_Id t);
gdk_export QryCtx *MT_thread_get_qry_ctx(void);
gdk_export void MT_thread_set_qry_ctx(QryCtx *ctx);
#if SIZEOF_VOID_P == 4
/* "limited" stack size on 32-bit systems */
/* to avoid address space fragmentation */
#define THREAD_STACK_SIZE ((size_t)1024*1024)
#else
/* "increased" stack size on 64-bit systems */
/* since some compilers seem to require this */
/* for burg-generated code in pathfinder */
/* and address space fragmentation is no issue */
#define THREAD_STACK_SIZE ((size_t)2*1024*1024)
#endif
/*
* @- MT Lock API
*/
#include "matomic.h"
/* define this to keep lock statistics (can be expensive) */
/* #define LOCK_STATS 1 */
#ifdef LOCK_STATS
#include "gdk_tracer.h"
#define _DBG_LOCK_COUNT_0(l) \
do { \
(void) ATOMIC_INC(&GDKlockcnt); \
TRC_DEBUG(TEM, "Locking %s...\n", (l)->name); \
} while (0)
#define _DBG_LOCK_LOCKER(l) \
do { \
(l)->locker = __func__; \
(l)->thread = MT_thread_getname(); \
} while (0)
#define _DBG_LOCK_UNLOCKER(l) \
do { \
(l)->locker = __func__; \
(l)->thread = NULL; \
TRC_DEBUG(TEM, "Unlocking %s\n", (l)->name); \
} while (0)
#define _DBG_LOCK_CONTENTION(l) \
do { \
TRC_DEBUG(TEM, "Lock %s contention\n", (l)->name); \
(void) ATOMIC_INC(&GDKlockcontentioncnt); \
(void) ATOMIC_INC(&(l)->contention); \
} while (0)
#define _DBG_LOCK_SLEEP(l) ((void) ATOMIC_INC(&(l)->sleep))
#define _DBG_LOCK_COUNT_2(l) \
do { \
(l)->count++; \
if ((l)->next == (struct MT_Lock *) -1) { \
while (ATOMIC_TAS(&GDKlocklistlock) != 0) \
; \
(l)->next = GDKlocklist; \
(l)->prev = NULL; \
if (GDKlocklist) \
GDKlocklist->prev = (l); \
GDKlocklist = (l); \
ATOMIC_CLEAR(&GDKlocklistlock); \
} \
TRC_DEBUG(TEM, "Locking %s complete\n", (l)->name); \
} while (0)
#define _DBG_LOCK_INIT(l) \
do { \
(l)->count = 0; \
ATOMIC_INIT(&(l)->contention, 0); \
ATOMIC_INIT(&(l)->sleep, 0); \
(l)->locker = NULL; \
(l)->thread = NULL; \
/* if name starts with "sa_" don't link in GDKlocklist */ \
/* since the lock is in memory that is governed by the */ \
/* SQL storage allocator, and hence we have no control */ \
/* over when the lock is destroyed and the memory freed */ \
if (strncmp((l)->name, "sa_", 3) != 0) { \
while (ATOMIC_TAS(&GDKlocklistlock) != 0) \
; \
if (GDKlocklist) \
GDKlocklist->prev = (l); \
(l)->next = GDKlocklist; \
(l)->prev = NULL; \
GDKlocklist = (l); \
ATOMIC_CLEAR(&GDKlocklistlock); \
} else { \
(l)->next = NULL; \
(l)->prev = NULL; \
} \
} while (0)
#define _DBG_LOCK_DESTROY(l) \
do { \
/* if name starts with "sa_" don't link in GDKlocklist */ \
/* since the lock is in memory that is governed by the */ \
/* SQL storage allocator, and hence we have no control */ \
/* over when the lock is destroyed and the memory freed */ \
if (strncmp((l)->name, "sa_", 3) != 0) { \
while (ATOMIC_TAS(&GDKlocklistlock) != 0) \
; \
if ((l)->next) \
(l)->next->prev = (l)->prev; \
if ((l)->prev) \
(l)->prev->next = (l)->next; \
else if (GDKlocklist == (l)) \
GDKlocklist = (l)->next; \
ATOMIC_CLEAR(&GDKlocklistlock); \
ATOMIC_DESTROY(&(l)->contention); \
ATOMIC_DESTROY(&(l)->sleep); \
} \
} while (0)
#else
#define _DBG_LOCK_COUNT_0(l) ((void) 0)
#define _DBG_LOCK_CONTENTION(l) ((void) 0)
#define _DBG_LOCK_SLEEP(l) ((void) 0)
#define _DBG_LOCK_COUNT_2(l) ((void) 0)
#define _DBG_LOCK_INIT(l) ((void) 0)
#define _DBG_LOCK_DESTROY(l) ((void) 0)
#define _DBG_LOCK_LOCKER(l) ((void) 0)
#define _DBG_LOCK_UNLOCKER(l) ((void) 0)
#endif
#if !defined(HAVE_PTHREAD_H) && defined(WIN32)
typedef struct MT_Lock {
CRITICAL_SECTION lock;
char name[MT_NAME_LEN];
#ifdef LOCK_STATS
size_t count;
ATOMIC_TYPE contention;
ATOMIC_TYPE sleep;
struct MT_Lock *volatile next;
struct MT_Lock *volatile prev;
const char *locker;
const char *thread;
#endif
} MT_Lock;
/* Windows defines read as _read and adds a deprecation warning to read
* if you were to still use that. We need the token "read" here. We
* cannot simply #undef read, since that messes up the deprecation
* stuff. So we define _read as read to change the token back to "read"
* where replacement stops (recursive definitions are allowed in C and
* are handled well). After our use, we remove the definition of _read
* so everything reverts back to the way it was. Bonus: this also works
* if "read" was not defined. */
#define _read read
#pragma section(".CRT$XCU", read)
#undef _read
#ifdef _WIN64
#define _LOCK_PREF_ ""
#else
#define _LOCK_PREF_ "_"
#endif
#define MT_LOCK_INITIALIZER(n) { 0 }; \
static void wininit_##n(void) \
{ \
MT_lock_init(&n, #n); \
} \
__declspec(allocate(".CRT$XCU")) void (*wininit_##n##_)(void) = wininit_##n; \
__pragma(comment(linker, "/include:" _LOCK_PREF_ "wininit_" #n "_"))
#define MT_lock_init(l, n) \
do { \
InitializeCriticalSection(&(l)->lock); \
strcpy_len((l)->name, (n), sizeof((l)->name)); \
_DBG_LOCK_INIT(l); \
} while (0)
#define MT_lock_try(l) TryEnterCriticalSection(&(l)->lock)
#define MT_lock_set(l) \
do { \
_DBG_LOCK_COUNT_0(l); \
if (!MT_lock_try(l)) { \
_DBG_LOCK_CONTENTION(l); \
MT_thread_setlockwait(l); \
EnterCriticalSection(&(l)->lock); \
MT_thread_setlockwait(NULL); \
} \
_DBG_LOCK_LOCKER(l); \
_DBG_LOCK_COUNT_2(l); \
} while (0)
#define MT_lock_unset(l) \
do { \
_DBG_LOCK_UNLOCKER(l); \
LeaveCriticalSection(&(l)->lock); \
} while (0)
#define MT_lock_destroy(l) \
do { \
_DBG_LOCK_DESTROY(l); \
DeleteCriticalSection(&(l)->lock); \
} while (0)
typedef struct MT_RWLock {
SRWLOCK lock;
char name[MT_NAME_LEN];
} MT_RWLock;
#define MT_RWLOCK_INITIALIZER(n) { .lock = SRWLOCK_INIT, .name = #n, }
#define MT_rwlock_init(l, n) \
do { \
InitializeSRWLock(&(l)->lock); \
strcpy_len((l)->name, (n), sizeof((l)->name)); \
} while (0)
#define MT_rwlock_destroy(l) ((void) 0)
#define MT_rwlock_rdlock(l) AcquireSRWLockShared(&(l)->lock)
#define MT_rwlock_rdtry(l) TryAcquireSRWLockShared(&(l)->lock)
#define MT_rwlock_rdunlock(l) ReleaseSRWLockShared(&(l)->lock)
#define MT_rwlock_wrlock(l) AcquireSRWLockExclusive(&(l)->lock)
#define MT_rwlock_wrtry(l) TryAcquireSRWLockExclusive(&(l)->lock)
#define MT_rwlock_wrunlock(l) ReleaseSRWLockExclusive(&(l)->lock)
#else
typedef struct MT_Lock {
pthread_mutex_t lock;
char name[MT_NAME_LEN];
#ifdef LOCK_STATS
size_t count;
ATOMIC_TYPE contention;
ATOMIC_TYPE sleep;
struct MT_Lock *volatile next;
struct MT_Lock *volatile prev;
const char *locker;
const char *thread;
#endif
} MT_Lock;
#ifdef LOCK_STATS
#define MT_LOCK_INITIALIZER(n) { .lock = PTHREAD_MUTEX_INITIALIZER, .name = #n, .next = (struct MT_Lock *) -1, }
#else
#define MT_LOCK_INITIALIZER(n) { .lock = PTHREAD_MUTEX_INITIALIZER, .name = #n, }
#endif
#define MT_lock_init(l, n) \
do { \
pthread_mutex_init(&(l)->lock, 0); \
strcpy_len((l)->name, (n), sizeof((l)->name)); \
_DBG_LOCK_INIT(l); \
} while (0)
#define MT_lock_try(l) (pthread_mutex_trylock(&(l)->lock) == 0)
#ifdef LOCK_STATS
#define MT_lock_set(l) \
do { \
_DBG_LOCK_COUNT_0(l); \
if (!MT_lock_try(l)) { \
_DBG_LOCK_CONTENTION(l); \
MT_thread_setlockwait(l); \
pthread_mutex_lock(&(l)->lock); \
MT_thread_setlockwait(NULL); \
} \
_DBG_LOCK_LOCKER(l); \
_DBG_LOCK_COUNT_2(l); \
} while (0)
#else
#define MT_lock_set(l) pthread_mutex_lock(&(l)->lock)
#endif
#define MT_lock_unset(l) \
do { \
_DBG_LOCK_UNLOCKER(l); \
pthread_mutex_unlock(&(l)->lock); \
} while (0)
#define MT_lock_destroy(l) \
do { \
_DBG_LOCK_DESTROY(l); \
pthread_mutex_destroy(&(l)->lock); \
} while (0)
#if !defined(__GLIBC__) || __GLIBC__ > 2 || (__GLIBC__ == 2 && defined(__GLIBC_MINOR__) && __GLIBC_MINOR__ >= 30)
/* this is the normal implementation of our pthreads-based read-write lock */
typedef struct MT_RWLock {
pthread_rwlock_t lock;
char name[MT_NAME_LEN];
} MT_RWLock;
#define MT_RWLOCK_INITIALIZER(n) \
{ .lock = PTHREAD_RWLOCK_INITIALIZER, .name = #n, }
#define MT_rwlock_init(l, n) \
do { \
pthread_rwlock_init(&(l)->lock, NULL); \
strcpy_len((l)->name, (n), sizeof((l)->name)); \
} while (0)
#define MT_rwlock_destroy(l) pthread_rwlock_destroy(&(l)->lock)
#define MT_rwlock_rdlock(l) pthread_rwlock_rdlock(&(l)->lock)
#define MT_rwlock_rdtry(l) (pthread_rwlock_tryrdlock(&(l)->lock) == 0)
#define MT_rwlock_rdunlock(l) pthread_rwlock_unlock(&(l)->lock)
#define MT_rwlock_wrlock(l) pthread_rwlock_wrlock(&(l)->lock)
#define MT_rwlock_wrtry(l) (pthread_rwlock_trywrlock(&(l)->lock) == 0)
#define MT_rwlock_wrunlock(l) pthread_rwlock_unlock(&(l)->lock)
#else
/* in glibc before 2.30, there was a deadlock condition in the tryrdlock
* and trywrlock functions, we work around that by not using the
* implementation at all
* see https://sourceware.org/bugzilla/show_bug.cgi?id=23844 for a
* discussion and comment 14 for the analysis */
typedef struct MT_RWLock {
pthread_mutex_t lock;
ATOMIC_TYPE readers;
char name[MT_NAME_LEN];
} MT_RWLock;
#define MT_RWLOCK_INITIALIZER(n) \
{ .lock = PTHREAD_MUTEX_INITIALIZER, .readers = ATOMIC_VAR_INIT(0), .name = #n, }
#define MT_rwlock_init(l, n) \
do { \
pthread_mutex_init(&(l)->lock, 0); \
ATOMIC_INIT(&(l)->readers, 0); \
strcpy_len((l)->name, (n), sizeof((l)->name)); \
} while (0)
#define MT_rwlock_destroy(l) \
do { \
pthread_mutex_destroy(&(l)->lock); \
ATOMIC_DESTROY(&(l)->readers); \
} while (0)
#define MT_rwlock_rdlock(l) \
do { \
pthread_mutex_lock(&(l)->lock); \
(void) ATOMIC_INC(&(l)->readers); \
pthread_mutex_unlock(&(l)->lock); \
} while (0)
static inline bool
MT_rwlock_rdtry(MT_RWLock *l)
{
if (pthread_mutex_trylock(&l->lock) != 0)
return false;
(void) ATOMIC_INC(&(l)->readers);
pthread_mutex_unlock(&l->lock);
return true;
}
#define MT_rwlock_rdunlock(l) \
do { \
(void) ATOMIC_DEC(&(l)->readers); \
} while (0)
#define MT_rwlock_wrlock(l) \
do { \
pthread_mutex_lock(&(l)->lock); \
while (ATOMIC_GET(&(l)->readers) > 0) \
MT_sleep_ms(1); \
} while (0)
static inline bool
MT_rwlock_wrtry(MT_RWLock *l)
{
if (pthread_mutex_trylock(&l->lock) != 0)
return false;
if (ATOMIC_GET(&l->readers) > 0) {
pthread_mutex_unlock(&l->lock);
return false;
}
return true;
}
#define MT_rwlock_wrunlock(l) pthread_mutex_unlock(&(l)->lock);
#endif
#endif
#ifdef LOCK_STATS
gdk_export void GDKlockstatistics(int);
gdk_export MT_Lock * volatile GDKlocklist;
gdk_export ATOMIC_FLAG GDKlocklistlock;
gdk_export ATOMIC_TYPE GDKlockcnt;
gdk_export ATOMIC_TYPE GDKlockcontentioncnt;
gdk_export ATOMIC_TYPE GDKlocksleepcnt;
#endif
/*
* @- MT Semaphore API
*/
#if !defined(HAVE_PTHREAD_H) && defined(WIN32)
typedef struct {
HANDLE sema;
char name[MT_NAME_LEN];
} MT_Sema;
#define MT_sema_init(s, nr, n) \
do { \
assert((s)->sema == NULL); \
strcpy_len((s)->name, (n), sizeof((s)->name)); \
(s)->sema = CreateSemaphore(NULL, nr, 0x7fffffff, NULL); \
} while (0)
#define MT_sema_destroy(s) \
do { \
assert((s)->sema != NULL); \
CloseHandle((s)->sema); \
(s)->sema = NULL; \
} while (0)
#define MT_sema_up(s) ReleaseSemaphore((s)->sema, 1, NULL)
#define MT_sema_down(s) \
do { \
TRC_DEBUG(TEM, "Sema %s down...\n", (s)->name); \
if (WaitForSingleObject((s)->sema, 0) != WAIT_OBJECT_0) { \
MT_thread_setsemawait(s); \
while (WaitForSingleObject((s)->sema, INFINITE) != WAIT_OBJECT_0) \
; \
MT_thread_setsemawait(NULL); \
} \
TRC_DEBUG(TEM, "Sema %s down complete\n", (s)->name); \
} while (0)
#elif defined(HAVE_DISPATCH_SEMAPHORE_CREATE)
/* MacOS X */
typedef struct {
dispatch_semaphore_t sema;
char name[MT_NAME_LEN];
} MT_Sema;
#define MT_sema_init(s, nr, n) \
do { \
strcpy_len((s)->name, (n), sizeof((s)->name)); \
(s)->sema = dispatch_semaphore_create((long) (nr)); \
} while (0)
#define MT_sema_destroy(s) dispatch_release((s)->sema)
#define MT_sema_up(s) dispatch_semaphore_signal((s)->sema)
#define MT_sema_down(s) dispatch_semaphore_wait((s)->sema, DISPATCH_TIME_FOREVER)
#elif defined(_AIX) || defined(__MACH__)
/* simulate semaphores using mutex and condition variable */
typedef struct {
int cnt;
pthread_mutex_t mutex;
pthread_cond_t cond;
char name[MT_NAME_LEN];
} MT_Sema;
#define MT_sema_init(s, nr, n) \
do { \
strcpy_len((s)->name, (n), sizeof((s)->name)); \
(s)->cnt = (nr); \
pthread_mutex_init(&(s)->mutex, 0); \
pthread_cond_init(&(s)->cond, 0); \
} while (0)
#define MT_sema_destroy(s) \
do { \
pthread_mutex_destroy(&(s)->mutex); \
pthread_cond_destroy(&(s)->cond); \
} while (0)
#define MT_sema_up(s) \
do { \
pthread_mutex_lock(&(s)->mutex); \
if ((s)->cnt++ < 0) { \
pthread_cond_signal(&(s)->cond); \
} \
pthread_mutex_unlock(&(s)->mutex); \
} while (0)
#define MT_sema_down(s) \
do { \
TRC_DEBUG(TEM, "Sema %s down...\n", (s)->name); \
pthread_mutex_lock(&(s)->mutex); \
if (--(s)->cnt < 0) { \
MT_thread_setsemawait(s); \
do { \
pthread_cond_wait(&(s)->cond, \
&(s)->mutex); \
} while ((s)->cnt < 0); \
MT_thread_setsemawait(NULL); \
pthread_mutex_unlock(&(s)->mutex); \
} \
TRC_DEBUG(TEM, "Sema %s down complete\n", (s)->name); \
} while (0)
#else
typedef struct {
sem_t sema;
char name[MT_NAME_LEN];
} MT_Sema;
#define MT_sema_init(s, nr, n) \
do { \
strcpy_len((s)->name, (n), sizeof((s)->name)); \
sem_init(&(s)->sema, 0, nr); \
} while (0)
#define MT_sema_destroy(s) sem_destroy(&(s)->sema)
#define MT_sema_up(s) \
do { \
TRC_DEBUG(TEM, "Sema %s up\n", (s)->name); \
sem_post(&(s)->sema); \
} while (0)
#define MT_sema_down(s) \
do { \
TRC_DEBUG(TEM, "Sema %s down...\n", (s)->name); \
if (sem_trywait(&(s)->sema) != 0) { \
MT_thread_setsemawait(s); \
while (sem_wait(&(s)->sema) != 0) \
; \
MT_thread_setsemawait(NULL); \
} \
TRC_DEBUG(TEM, "Sema %s down complete\n", (s)->name); \
} while (0)
#endif
gdk_export void MT_thread_setlockwait(MT_Lock *lock);
gdk_export void MT_thread_setsemawait(MT_Sema *sema);
gdk_export void MT_thread_setworking(const char *work);
gdk_export void MT_thread_setalgorithm(const char *algo);
gdk_export const char *MT_thread_getalgorithm(void);
gdk_export int MT_check_nr_cores(void);
#endif /*_GDK_SYSTEM_H_*/

@ -0,0 +1,262 @@
/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* Copyright 1997 - July 2008 CWI, August 2008 - 2022 MonetDB B.V.
*/
/* // TODO: Complete it when documentation is accepted
*
* Tracer is the general logging system for the MonetDB stack modelled
* after the well-known logging schemes (e.g: Python). It provides a
* number of logging levels and options to increase or reduce the
* verbosity either of individual code parts or of the codebase as a
* whole. It allows users to focus on logging messages related to
* certain steps of execution, which can be proved handy when it comes
* to debugging. The behavior of Tracer can be controlled at runtime
* using the SQL API described later on. Certain calls require an "id"
* to operate which can be found on the list of each section below.
*
* Internally, the logger uses a buffer to capture log messages before
* they are forwarded to the specific adapter.
*
* - Sets the minimum flush level that an event will trigger the
* logger to flush the buffer
* - Produces messages to the output stream. It is also used as a
* fallback mechanism in case GDKtracer fails to log for whatever
* reason.
* - Struct buffer with allocated space etc.
* - Flush buffer sends the messages to the selected adapter
* - Write about the log structure (e.g: MT_thread_get_name + datetime
* + blah blah)
*/
#ifndef _GDK_TRACER_H_
#define _GDK_TRACER_H_
#define GENERATE_ENUM(ENUM) ENUM,
// ADAPTERS
#define FOREACH_ADPTR(ADPTR) \
ADPTR( BASIC ) \
ADPTR( PROFILER ) \
ADPTR( MBEDDED ) \
\
ADPTR( ADAPTERS_COUNT )
typedef enum {
FOREACH_ADPTR(GENERATE_ENUM)
} adapter_t;
// LOG LEVELS
#define FOREACH_LEVEL(LEVEL) \
LEVEL( M_CRITICAL ) \
LEVEL( M_ERROR ) \
LEVEL( M_WARNING ) \
LEVEL( M_INFO ) \
LEVEL( M_DEBUG ) \
\
LEVEL( LOG_LEVELS_COUNT )
typedef enum {
FOREACH_LEVEL(GENERATE_ENUM)
} log_level_t;
// LAYERS
#define FOREACH_LAYER(LAYER) \
LAYER( MDB_ALL ) \
LAYER( SQL_ALL ) \
LAYER( MAL_ALL ) \
LAYER( GDK_ALL ) \
\
LAYER( LAYERS_COUNT )
typedef enum {
FOREACH_LAYER(GENERATE_ENUM)
} layer_t;
/*
*
* NOTE: Adding/Removing components will affect the test tracer00.mal
* See the test file for more details.
*
*/
// COMPONENTS
#define FOREACH_COMP(COMP) \
COMP( ACCELERATOR ) \
COMP( ALGO ) \
COMP( ALLOC ) \
COMP( BAT_ ) \
COMP( CHECK_ ) \
COMP( DELTA ) \
COMP( HEAP ) \
COMP( IO_ ) \
COMP( PAR ) \
COMP( PERF ) \
COMP( TEM ) \
COMP( THRD ) \
\
COMP( GEOM ) \
COMP( FITS ) \
COMP( SHP ) \
\
COMP( SQL_PARSER ) \
COMP( SQL_TRANS ) \
COMP( SQL_REWRITER ) \
COMP( SQL_EXECUTION ) \
COMP( SQL_STORE ) \
\
COMP( MAL_WLC ) \
COMP( MAL_REMOTE ) \
COMP( MAL_MAPI ) \
COMP( MAL_SERVER ) \
\
COMP( MAL_OPTIMIZER ) \
\
COMP( GDK ) \
\
COMP( COMPONENTS_COUNT )
typedef enum {
FOREACH_COMP(GENERATE_ENUM)
} component_t;
/*
* Logging macros
*/
gdk_export log_level_t lvl_per_component[];
// If the LOG_LEVEL of the message is one of the following: CRITICAL,
// ERROR or WARNING it is logged no matter the component. In any other
// case the component is taken into account
#define GDK_TRACER_TEST(LOG_LEVEL, COMP) \
(LOG_LEVEL <= M_WARNING || \
lvl_per_component[COMP] >= LOG_LEVEL)
#define GDK_TRACER_LOG_BODY(LOG_LEVEL, COMP, MSG, ...) \
GDKtracer_log(__FILE__, __func__, __LINE__, \
LOG_LEVEL, COMP, NULL, MSG, ##__VA_ARGS__)
#ifdef __COVERITY__
/* hide this for static code analysis: too many false positives */
#define GDK_TRACER_LOG(LOG_LEVEL, COMP, MSG, ...) ((void) 0)
#else
#define GDK_TRACER_LOG(LOG_LEVEL, COMP, MSG, ...) \
do { \
if (GDK_TRACER_TEST(LOG_LEVEL, COMP)) { \
GDK_TRACER_LOG_BODY(LOG_LEVEL, COMP, MSG, \
## __VA_ARGS__); \
} \
} while (0)
#endif
#define TRC_CRITICAL(COMP, MSG, ...) \
GDK_TRACER_LOG_BODY(M_CRITICAL, COMP, MSG, ## __VA_ARGS__)
#define TRC_ERROR(COMP, MSG, ...) \
GDK_TRACER_LOG_BODY(M_ERROR, COMP, MSG, ## __VA_ARGS__)
#define TRC_WARNING(COMP, MSG, ...) \
GDK_TRACER_LOG_BODY(M_WARNING, COMP, MSG, ## __VA_ARGS__)
#define TRC_INFO(COMP, MSG, ...) \
GDK_TRACER_LOG(M_INFO, COMP, MSG, ## __VA_ARGS__)
#define TRC_DEBUG(COMP, MSG, ...) \
GDK_TRACER_LOG(M_DEBUG, COMP, MSG, ## __VA_ARGS__)
// Conditional logging - Example usage
// NOTE: When using the macro with *_IF always use the macro with
// *_ENDIF for logging. Avoiding to do that will result into checking
// the LOG_LEVEL of the the COMPONENT 2 times. Also NEVER use the
// *_ENDIF macro without before performing a check with *_IF
// macro. Such an action will have as a consequence logging everything
// without taking into account the LOG_LEVEL of the COMPONENT.
/*
TRC_CRITICAL_IF(SQL_STORE)
{
TRC_CRITICAL_ENDIF(SQL_STORE, "Test\n")
}
*/
#define TRC_CRITICAL_IF(COMP) \
/* if (GDK_TRACER_TEST(M_CRITICAL, COMP)) */
#define TRC_ERROR_IF(COMP) \
/* if (GDK_TRACER_TEST(M_ERROR, COMP)) */
#define TRC_WARNING_IF(COMP) \
/* if (GDK_TRACER_TEST(M_WARNING, COMP)) */
#define TRC_INFO_IF(COMP) \
if (GDK_TRACER_TEST(M_INFO, COMP))
#define TRC_DEBUG_IF(COMP) \
if (GDK_TRACER_TEST(M_DEBUG, COMP))
#define TRC_CRITICAL_ENDIF(COMP, MSG, ...) \
GDK_TRACER_LOG_BODY(M_CRITICAL, COMP, MSG, ## __VA_ARGS__)
#define TRC_ERROR_ENDIF(COMP, MSG, ...) \
GDK_TRACER_LOG_BODY(M_ERROR, COMP, MSG, ## __VA_ARGS__)
#define TRC_WARNING_ENDIF(COMP, MSG, ...) \
GDK_TRACER_LOG_BODY(M_WARNING, COMP, MSG, ## __VA_ARGS__)
#define TRC_INFO_ENDIF(COMP, MSG, ...) \
GDK_TRACER_LOG_BODY(M_INFO, COMP, MSG, ## __VA_ARGS__)
#define TRC_DEBUG_ENDIF(COMP, MSG, ...) \
GDK_TRACER_LOG_BODY(M_DEBUG, COMP, MSG, ## __VA_ARGS__)
/*
* GDKtracer API
* For the allowed log_levels, components and layers see the
* LOG_LEVEL, COMPONENT and LAYER enum respectively.
*/
// Used for logrotate
gdk_export void GDKtracer_reinit_basic(int sig);
gdk_export gdk_return GDKtracer_set_tracefile(const char *tracefile);
gdk_export gdk_return GDKtracer_stop(void);
gdk_export gdk_return GDKtracer_set_component_level(const char *comp, const char *lvl);
gdk_export const char *GDKtracer_get_component_level(const char *comp);
gdk_export gdk_return GDKtracer_reset_component_level(const char *comp);
gdk_export gdk_return GDKtracer_set_layer_level(const char *layer, const char *lvl);
gdk_export gdk_return GDKtracer_reset_layer_level(const char *layer);
gdk_export gdk_return GDKtracer_set_flush_level(const char *lvl);
gdk_export gdk_return GDKtracer_reset_flush_level(void);
gdk_export gdk_return GDKtracer_set_adapter(const char *adapter);
gdk_export gdk_return GDKtracer_reset_adapter(void);
gdk_export void GDKtracer_log(const char *file, const char *func,
int lineno, log_level_t lvl,
component_t comp,
const char *syserr,
_In_z_ _Printf_format_string_ const char *format, ...)
__attribute__((__format__(__printf__, 7, 8)));
gdk_export gdk_return GDKtracer_flush_buffer(void);
#endif /* _GDK_TRACER_H_ */

@ -0,0 +1,357 @@
/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* Copyright 1997 - July 2008 CWI, August 2008 - 2022 MonetDB B.V.
*/
#ifndef _GDK_UTILS_H_
#define _GDK_UTILS_H_
#include <setjmp.h>
gdk_export const char *GDKgetenv(const char *name);
gdk_export bool GDKgetenv_istext(const char *name, const char* text);
gdk_export bool GDKgetenv_isyes(const char *name);
gdk_export bool GDKgetenv_istrue(const char *name);
gdk_export int GDKgetenv_int(const char *name, int def);
gdk_export gdk_return GDKsetenv(const char *name, const char *value);
gdk_export gdk_return GDKcopyenv(BAT **key, BAT **val, bool writable);
/*
* @+ Memory management
* Memory management in GDK mostly relies on the facilities offered by
* the underlying OS. The below routines monitor the available memory
* resources which consist of physical swap space and logical vm
* space. There are three kinds of memory, that affect these two
* resources in different ways:
*
* - memory mapping
* which ask for a logical region of virtual memory space. In
* principle, no physical memory is needed to keep the system afloat
* here, as the memory mapped file is swapped onto a disk object
* that already exists.
*
* Actually, there are two kings of memory mapping used in GDK,
* namely read-only direct mapped and writable copy-on write. For
* the dirty pages, the latter actually also consumes physical
* memory resources, but that is ignored here for simplicity.
*
* - anonymous virtual memory
* This is virtual memory that is mapped on the swap file. Hence,
* this consumes both logical VM space resources and physical memory
* space.
*
* - malloced memory
* comes from the heap and directly consumes physical memory
* resources.
*
* The malloc routine checks the memory consumption every 1000 calls,
* or for calls larger that 50000 bytes. Consequently, at least every
* 50MB increase, alloc memory is checked. The VM calls always check
* the memory consumption.
*/
/* default setting to administer everything */
#define GDK_MEM_NULLALLOWED
#if SIZEOF_VOID_P==8
#define GDK_VM_MAXSIZE LL_CONSTANT(4398046511104) /* :-) a 64-bit OS: 4TB */
#else
#define GDK_VM_MAXSIZE LL_CONSTANT(1610612736) /* :-| a 32-bit OS: 1.5GB */
#endif
/* virtual memory defines */
gdk_export size_t _MT_npages;
gdk_export size_t _MT_pagesize;
#define MT_pagesize() _MT_pagesize
#define MT_npages() _MT_npages
gdk_export size_t GDK_mem_maxsize; /* max allowed size of committed memory */
gdk_export size_t GDK_vm_maxsize; /* max allowed size of reserved vm */
gdk_export void *GDKmmap(const char *path, int mode, size_t len)
__attribute__((__warn_unused_result__));
gdk_export gdk_return GDKmunmap(void *addr, size_t len);
gdk_export size_t GDKmem_cursize(void); /* RAM/swapmem that MonetDB has claimed from OS */
gdk_export size_t GDKvm_cursize(void); /* current MonetDB VM address space usage */
gdk_export void *GDKmalloc(size_t size)
__attribute__((__malloc__))
__attribute__((__alloc_size__(1)))
__attribute__((__warn_unused_result__));
gdk_export void *GDKzalloc(size_t size)
__attribute__((__malloc__))
__attribute__((__alloc_size__(1)))
__attribute__((__warn_unused_result__));
gdk_export void *GDKrealloc(void *pold, size_t size)
__attribute__((__alloc_size__(2)))
__attribute__((__warn_unused_result__));
gdk_export void GDKfree(void *blk);
gdk_export str GDKstrdup(const char *s)
__attribute__((__malloc__))
__attribute__((__warn_unused_result__));
gdk_export str GDKstrndup(const char *s, size_t n)
__attribute__((__malloc__))
__attribute__((__warn_unused_result__));
gdk_export size_t GDKmallocated(const void *s);
gdk_export void MT_init(void); /* init the package. */
struct opt;
gdk_export gdk_return GDKinit(struct opt *set, int setlen, bool embedded);
/* used for testing only */
gdk_export void GDKsetmallocsuccesscount(lng count);
/*
* Upon closing the session, all persistent BATs should be saved and
* the transient BATs should be removed. The buffer pool manager
* takes care of this.
*/
gdk_export void GDKexit(int status);
gdk_export bool GDKexiting(void);
gdk_export void GDKprepareExit(void);
gdk_export void GDKreset(int status);
/* global version number */
gdk_export const char *GDKversion(void)
__attribute__((__const__));
/* ABI version of GDK library */
gdk_export const char *GDKlibversion(void)
__attribute__((__const__));
// these are used in embedded mode to jump out of GDKfatal
gdk_export jmp_buf GDKfataljump;
gdk_export str GDKfatalmsg;
gdk_export bit GDKfataljumpenable;
/* Timers
* The following relative timers are available for inspection.
* Note that they may consume recognizable overhead.
*
*/
gdk_export lng GDKusec(void);
gdk_export int GDKms(void);
#if !defined(NDEBUG) && !defined(__COVERITY__)
/* In debugging mode, replace GDKmalloc and other functions with a
* version that optionally prints calling information.
*
* We have two versions of this code: one using a GNU C extension, and
* one using traditional C. The GNU C version also prints the name of
* the calling function.
*/
#ifdef __GNUC__
#define GDKmalloc(s) \
({ \
size_t _size = (s); \
void *_res = GDKmalloc(_size); \
TRC_DEBUG(ALLOC, "GDKmalloc(%zu) -> %p\n", \
_size, _res); \
_res; \
})
#define GDKzalloc(s) \
({ \
size_t _size = (s); \
void *_res = GDKzalloc(_size); \
TRC_DEBUG(ALLOC, "GDKzalloc(%zu) -> %p\n", \
_size, _res); \
_res; \
})
#define GDKrealloc(p, s) \
({ \
void *_ptr = (p); \
size_t _size = (s); \
char _buf[12]; \
snprintf(_buf, sizeof(_buf), "%p", _ptr); \
void *_res = GDKrealloc(_ptr, _size); \
TRC_DEBUG(ALLOC, "GDKrealloc(%s,%zu) -> %p\n", \
_buf, _size, _res); \
_res; \
})
#define GDKfree(p) \
({ \
void *_ptr = (p); \
if (_ptr) \
TRC_DEBUG(ALLOC, "GDKfree(%p)\n", _ptr); \
GDKfree(_ptr); \
})
#define GDKstrdup(s) \
({ \
const char *_str = (s); \
void *_res = GDKstrdup(_str); \
TRC_DEBUG(ALLOC, "GDKstrdup(len=%zu) -> %p\n", \
_str ? strlen(_str) : 0, _res); \
_res; \
})
#define GDKstrndup(s, n) \
({ \
const char *_str = (s); \
size_t _n = (n); \
void *_res = GDKstrndup(_str, _n); \
TRC_DEBUG(ALLOC, "GDKstrndup(len=%zu) -> %p\n", \
_n, _res); \
_res; \
})
#define GDKmmap(p, m, l) \
({ \
const char *_path = (p); \
int _mode = (m); \
size_t _len = (l); \
void *_res = GDKmmap(_path, _mode, _len); \
TRC_DEBUG(ALLOC, "GDKmmap(%s,0x%x,%zu) -> %p\n", \
_path ? _path : "NULL", \
(unsigned) _mode, _len, \
_res); \
_res; \
})
#define GDKmunmap(p, l) \
({ void *_ptr = (p); \
size_t _len = (l); \
gdk_return _res = GDKmunmap(_ptr, _len); \
TRC_DEBUG(ALLOC, \
"GDKmunmap(%p,%zu) -> %u\n", \
_ptr, _len, _res); \
_res; \
})
#define malloc(s) \
({ \
size_t _size = (s); \
void *_res = malloc(_size); \
TRC_DEBUG(ALLOC, "malloc(%zu) -> %p\n", \
_size, _res); \
_res; \
})
#define calloc(n, s) \
({ \
size_t _nmemb = (n); \
size_t _size = (s); \
void *_res = calloc(_nmemb,_size); \
TRC_DEBUG(ALLOC, "calloc(%zu,%zu) -> %p\n", \
_nmemb, _size, _res); \
_res; \
})
#define realloc(p, s) \
({ \
void *_ptr = (p); \
size_t _size = (s); \
char _buf[12]; \
snprintf(_buf, sizeof(_buf), "%p", _ptr); \
void *_res = realloc(_ptr, _size); \
TRC_DEBUG(ALLOC, "realloc(%s,%zu) -> %p\n", \
_buf, _size, _res); \
_res; \
})
#define free(p) \
({ \
void *_ptr = (p); \
TRC_DEBUG(ALLOC, "free(%p)\n", _ptr); \
free(_ptr); \
})
#else
static inline void *
GDKmalloc_debug(size_t size)
{
void *res = GDKmalloc(size);
TRC_DEBUG(ALLOC, "GDKmalloc(%zu) -> %p\n", size, res);
return res;
}
#define GDKmalloc(s) GDKmalloc_debug((s))
static inline void *
GDKzalloc_debug(size_t size)
{
void *res = GDKzalloc(size);
TRC_DEBUG(ALLOC, "GDKzalloc(%zu) -> %p\n", size, res);
return res;
}
#define GDKzalloc(s) GDKzalloc_debug((s))
static inline void *
GDKrealloc_debug(void *ptr, size_t size)
{
void *res = GDKrealloc(ptr, size);
TRC_DEBUG(ALLOC, "GDKrealloc(%p,%zu) -> %p\n", ptr, size, res);
return res;
}
#define GDKrealloc(p, s) GDKrealloc_debug((p), (s))
static inline void
GDKfree_debug(void *ptr)
{
TRC_DEBUG(ALLOC, "GDKfree(%p)\n", ptr);
GDKfree(ptr);
}
#define GDKfree(p) GDKfree_debug((p))
static inline char *
GDKstrdup_debug(const char *str)
{
void *res = GDKstrdup(str);
TRC_DEBUG(ALLOC, "GDKstrdup(len=%zu) -> %p\n",
str ? strlen(str) : 0, res);
return res;
}
#define GDKstrdup(s) GDKstrdup_debug((s))
static inline char *
GDKstrndup_debug(const char *str, size_t n)
{
void *res = GDKstrndup(str, n);
TRC_DEBUG(ALLOC, "GDKstrndup(len=%zu) -> %p\n", n, res);
return res;
}
#define GDKstrndup(s, n) GDKstrndup_debug((s), (n))
static inline void *
GDKmmap_debug(const char *path, int mode, size_t len)
{
void *res = GDKmmap(path, mode, len);
TRC_DEBUG(ALLOC, "GDKmmap(%s,0x%x,%zu) -> %p\n",
path ? path : "NULL", (unsigned) mode, len, res);
return res;
}
#define GDKmmap(p, m, l) GDKmmap_debug((p), (m), (l))
static inline gdk_return
GDKmunmap_debug(void *ptr, size_t len)
{
gdk_return res = GDKmunmap(ptr, len);
TRC_DEBUG(ALLOC, "GDKmunmap(%p,%zu) -> %d\n",
ptr, len, (int) res);
return res;
}
#define GDKmunmap(p, l) GDKmunmap_debug((p), (l))
static inline void *
malloc_debug(size_t size)
{
void *res = malloc(size);
TRC_DEBUG(ALLOC, "malloc(%zu) -> %p\n", size, res);
return res;
}
#define malloc(s) malloc_debug((s))
static inline void *
calloc_debug(size_t nmemb, size_t size)
{
void *res = calloc(nmemb, size);
TRC_DEBUG(ALLOC, "calloc(%zu,%zu) -> %p\n", nmemb, size, res);
return res;
}
#define calloc(n, s) calloc_debug((n), (s))
static inline void *
realloc_debug(void *ptr, size_t size)
{
void *res = realloc(ptr, size);
TRC_DEBUG(ALLOC, "realloc(%p,%zu) -> %p \n", ptr, size, res);
return res;
}
#define realloc(p, s) realloc_debug((p), (s))
static inline void
free_debug(void *ptr)
{
TRC_DEBUG(ALLOC, "free(%p)\n", ptr);
free(ptr);
}
#define free(p) free_debug((p))
#endif
#endif
#endif /* _GDK_UTILS_H_ */

@ -0,0 +1,240 @@
/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* Copyright 1997 - July 2008 CWI, August 2008 - 2022 MonetDB B.V.
*/
/*
* (c) Martin Kersten
* MonetDB Basic Definitions
* Definitions that need to included in every file of the Monet system, as well as in user defined module implementations.
*/
#ifndef _MAL_H
#define _MAL_H
#include "gdk.h"
#ifdef WIN32
#ifndef LIBMONETDB5
#define mal_export extern __declspec(dllimport)
#else
#define mal_export extern __declspec(dllexport)
#endif
#else
#define mal_export extern
#endif
#ifdef HAVE_SYS_TIMES_H
# include <sys/times.h>
#endif
/*
* MonetDB Calling Options
* The number of invocation arguments is kept to a minimum.
* See `man mserver5` or tools/mserver/mserver5.1
* for additional system variable settings.
*/
#define MAXSCRIPT 64
mal_export lng MALdebug;
/*
* MonetDB assumes it can use most of the machines memory,
* leaving a small portion for other programs.
*/
#define GB (((lng)1024)*1024*1024)
#define MEMORY_THRESHOLD (0.2 * GDK_mem_maxsize > 8 * GB? GDK_mem_maxsize - 8 * GB: 0.8 * GDK_mem_maxsize)
mal_export char monet_cwd[FILENAME_MAX];
mal_export char monet_characteristics[4096];
mal_export stream *maleventstream;
/*
See gdk/gdk.h for the definition of all debug masks.
See `man mserver5` or tools/mserver/mserver5.1
for a documentation of all debug options.
*/
#define GRPthreads (THRDMASK | PARMASK)
#define GRPmemory (ALLOCMASK )
#define GRPproperties (CHECKMASK | PROPMASK | BATMASK )
#define GRPio (IOMASK | PERFMASK )
#define GRPheaps (HEAPMASK)
#define GRPtransactions (TMMASK | DELTAMASK | TEMMASK)
#define GRPmodules (LOADMASK)
#define GRPalgorithms (ALGOMASK)
#define GRPperformance (DEADBEEFMASK)
#define GRPforcemito (FORCEMITOMASK | NOSYNCMASK)
mal_export MT_Lock mal_contextLock;
mal_export MT_Lock mal_remoteLock;
mal_export MT_Lock mal_profileLock ;
mal_export MT_Lock mal_copyLock ;
mal_export MT_Lock mal_delayLock ;
mal_export int mal_init(char *modules[], int embedded);
mal_export _Noreturn void mal_exit(int status);
mal_export void mal_reset(void);
mal_export const char *mal_version(void);
/* This should be here, but cannot, as "Client" isn't known, yet ... |-(
* For now, we move the prototype declaration to src/mal/mal_client.c,
* the only place where it is currently used. Maybe, we should concider
* also moving the implementation there...
*/
/* Listing modes are globally known */
#define LIST_INPUT 1 /* echo original input */
#define LIST_MAL_NAME 2 /* show variable name */
#define LIST_MAL_TYPE 4 /* show type resolutoin */
#define LIST_MAL_VALUE 8 /* list bat tuple count */
#define LIST_MAL_PROPS 16 /* show variable properties */
#define LIST_MAL_MAPI 32 /* output Mapi compatible output */
#define LIST_MAL_REMOTE 64 /* output MAL for remote execution */
#define LIST_MAL_FLOW 128 /* output MAL dataflow dependencies */
#define LIST_MAL_ALGO 256 /* output algorithm used */
#define LIST_MAL_CALL (LIST_MAL_NAME | LIST_MAL_VALUE )
#define LIST_MAL_DEBUG (LIST_MAL_NAME | LIST_MAL_VALUE | LIST_MAL_TYPE | LIST_MAL_PROPS | LIST_MAL_FLOW)
#define LIST_MAL_ALL (LIST_MAL_NAME | LIST_MAL_VALUE | LIST_MAL_TYPE | LIST_MAL_MAPI)
/* type check status is kept around to improve type checking efficiency */
#define TYPE_ERROR -1
#define TYPE_UNKNOWN 0
#define TYPE_RESOLVED 2
#define GARBAGECONTROL 3
#define VARARGS 1 /* deal with variable arguments */
#define VARRETS 2
typedef int malType;
typedef str (*MALfcn) ();
typedef struct SYMDEF {
struct SYMDEF *peer; /* where to look next */
struct SYMDEF *skip; /* skip to next different symbol */
const char *name;
int kind; /* what kind of symbol */
struct MALBLK *def; /* the details of the MAL fcn */
} *Symbol, SymRecord;
typedef struct VARRECORD {
char name[IDLENGTH]; /* use the space for the full name */
char kind; /* Could be either _, X or C to stamp the variable type */
malType type; /* internal type signature */
bool constant:1,
typevar:1,
fixedtype:1,
//FREE SPOT NOW:1,
cleanup:1,
initialized:1,
used:1,
disabled:1;
short depth; /* scope block depth, set to -1 if not used */
ValRecord value;
int declared; /* pc index when it was first assigned */
int updated; /* pc index when it was first updated */
int eolife; /* pc index when it should be garbage collected */
int stc; /* pc index for rendering schema.table.column */
BUN rowcnt; /* estimated row count*/
} *VarPtr, VarRecord;
/* For performance analysis we keep track of the number of calls and
* the total time spent while executing the instruction. (See
* mal_profiler.c)
*/
typedef struct {
bte token; /* instruction type */
bit barrier; /* flow of control modifier takes:
BARRIER, LEAVE, REDO, EXIT, CATCH, RAISE */
bit typechk; /* type check status */
bte gc; /* garbage control flags */
bte polymorphic; /* complex type analysis */
bit varargs; /* variable number of arguments */
int jump; /* controlflow program counter */
int pc; /* location in MAL plan for profiler*/
MALfcn fcn; /* resolved function address */
struct MALBLK *blk; /* resolved MAL function address */
/* inline statistics */
lng clock; /* when the last call was started */
lng ticks; /* total micro seconds spent in last call */
int calls; /* number of calls made to this instruction */
lng totticks; /* total time spent on this instruction. */
lng wbytes; /* number of bytes produced in last instruction */
/* the core admin */
const char *modname; /* module context, reference into namespace */
const char *fcnname; /* function name, reference into namespace */
int argc, retc, maxarg; /* total and result argument count */
int argv[FLEXIBLE_ARRAY_MEMBER]; /* at least a few entries */
} *InstrPtr, InstrRecord;
typedef struct MALBLK {
char binding[IDLENGTH]; /* related C-function */
str help; /* supportive commentary */
str statichelp; /* static help string should not be freed */
oid tag; /* unique block tag */
struct MALBLK *alternative;
int vtop; /* next free slot */
int vsize; /* size of variable arena */
int vid; /* generate local variable counter */
VarRecord *var; /* Variable table */
int stop; /* next free slot */
int ssize; /* byte size of arena */
InstrPtr *stmt; /* Instruction location */
bool inlineProp:1, /* inline property */
unsafeProp:1; /* unsafe property */
str errors; /* left over errors */
struct MALBLK *history; /* of optimizer actions */
short keephistory; /* do we need the history at all */
int maxarg; /* keep track on the maximal arguments used */
ptr replica; /* for the replicator tests */
/* During the run we keep track on the maximum number of concurrent threads and memory claim */
int workers;
lng memory;
lng starttime; /* track when the query started, for resource management */
lng runtime; /* average execution time of block in ticks */
int calls; /* number of calls */
lng optimize; /* total optimizer time */
} *MalBlkPtr, MalBlkRecord;
#define STACKINCR 128
#define MAXGLOBALS (4 * STACKINCR)
typedef int (*DFhook) (void *, void *, void *, void *);
typedef struct MALSTK {
int stksize;
int stktop;
int stkbot; /* the first variable to be initialized */
int stkdepth; /* to protect against runtime stack overflow */
int calldepth; /* to protect against runtime stack overflow */
short keepAlive; /* do not garbage collect when set */
/*
* Parallel processing is mostly driven by dataflow, but within this context
* there may be different schemes to take instructions into execution.
* The admission scheme (and wrapup) are the necessary scheduler hooks.
*/
DFhook admit;
DFhook wrapup;
/*
* It is handy to administer the timing in the stack frame
* for use in profiling instructions.
*/
struct timeval clock; /* time this stack was created */
char cmd; /* debugger and runtime communication */
char status; /* srunning 'R' suspended 'S', quiting 'Q' */
int pcup; /* saved pc upon a recursive all */
oid tag; /* unique invocation call tag */
int workers; /* Actual number of concurrent workers */
lng memory; /* Actual memory claims for highwater mark */
struct MALSTK *up; /* stack trace list */
struct MALBLK *blk; /* associated definition */
ValRecord stk[FLEXIBLE_ARRAY_MEMBER];
} MalStack, *MalStkPtr;
#endif /* _MAL_H*/

@ -0,0 +1,195 @@
/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* Copyright 1997 - July 2008 CWI, August 2008 - 2022 MonetDB B.V.
*/
#ifndef _MAL_CLIENT_H_
#define _MAL_CLIENT_H_
#include "mal.h"
#include "mal_module.h"
#define SCENARIO_PROPERTIES 8
enum clientmode {
FREECLIENT,
FINISHCLIENT,
RUNCLIENT,
BLOCKCLIENT
};
/*
* The prompt structure is designed to simplify recognition of the
* language framework for interaction. For access through an API we
* assume the prompt is an ASCII string surrounded by a \001 character. This
* simplifies recognition. The information between the prompt brackets
* can be used to pass the mode to the front-end. Moreover, the prompt
* can be dropped if a single stream of information is expected from the
* server (see mal_profiler.c).
*
* The user can request server-side compilation as part of the
* initialization string. See the documentation on Scenarios.
*/
typedef struct CLIENT_INPUT {
bstream *fdin;
size_t yycur;
int listing;
char *prompt;
struct CLIENT_INPUT *next;
} ClientInput;
typedef struct CLIENT {
int idx; /* entry in mal_clients */
oid user; /* user id in the auth administration */
str username; /* for event processor */
/*
* The actions for a client is separated into several stages:
* parsing, strategic optimization, tactical optimization, and
* execution. The routines to handle them are obtained once the
* scenario is chosen. Each stage carries a state descriptor, but
* they share the IO state description. A backup structure is
* provided to temporarily switch to another scenario.
*/
str scenario; /* scenario management references */
str oldscenario;
void *state[SCENARIO_PROPERTIES], *oldstate[SCENARIO_PROPERTIES];
MALfcn phase[SCENARIO_PROPERTIES], oldphase[SCENARIO_PROPERTIES];
char itrace; /* trace execution using interactive mdb */
/* if set to 'S' it will put the process to sleep */
bit sqlprofiler; /* control off-line sql performance monitoring */
/*
* Each session comes with resource limitations and predefined settings.
*/
char optimizer[IDLENGTH];/* The optimizer pipe preferred for this session */
int workerlimit; /* maximum number of workthreads processing a query */
int memorylimit; /* Memory claim highwater mark, 0 = no limit */
lng querytimeout; /* query abort after x usec, 0 = no limit*/
lng sessiontimeout; /* session abort after x usec, 0 = no limit */
time_t login; /* Time when this session started */
lng session; /* usec since start of server */
time_t idle; /* Time when the session became idle */
/*
* For program debugging and performance trace we keep the actual resource claims.
*/
time_t lastcmd; /* set when query is received */
/* The user can request a TRACE SQL statement, calling for collecting the events locally */
BAT *profticks;
BAT *profstmt;
BAT *profevents;
ATOMIC_TYPE lastprint; /* when we last printed the query, to be depricated */
/*
* Communication channels for the interconnect are stored here.
* It is perfectly legal to have a client without input stream.
* It will simply terminate after consuming the input buffer.
*/
const char *srcFile; /* NULL for stdin, or file name */
bstream *fdin;
size_t yycur; /* the scanners current position */
/*
* Keeping track of instructions executed is a valuable tool for
* script processing and debugging. It can be changed at runtime
* for individual clients using the operation clients.listing(mask).
* A listing bit controls the level of detail to be generated during
* program execution tracing. The lowest level (1) simply dumps the
* input, (2) also demonstrates the MAL internal structure, (4) adds
* the type information.
*/
int listing;
str prompt; /* acknowledge prompt */
size_t promptlength;
ClientInput *bak; /* used for recursive script and string execution */
stream *fdout; /* streams from and to user. */
/*
* In interactive mode, reading one line at a time, we should be
* aware of parsing compound structures, such as functions and
* barrier blocks. The level of nesting is maintained in blkmode,
* which is reset to zero upon encountering an end instruction, or
* the closing bracket has been detected. Once the complete
* structure has been parsed the program can be checked and
* executed. Nesting is indicated using a '+' before the prompt.
*/
int blkmode; /* control block parsing */
/*
* The MAL debugger uses the client record to keep track of any
* pervasive debugger command. For detailed information on the
* debugger features.
*/
int debug;
enum clientmode mode; /* FREECLIENT..BLOCKED */
/*
* Client records are organized into a two-level dependency tree,
* where children may be created to deal with parallel processing
* activities. Each client runs in its own process thread. Its
* identity is retained here for access by others (=father).
*/
MT_Sema s; /* sema to (de)activate thread */
Thread mythread;
str errbuf; /* location of GDK exceptions */
struct CLIENT *father;
/*
* Each client has a private entry point into the namespace and
* object space (the global variables). Moreover, the parser needs
* some administration variables to keep track of critical elements.
*/
Module usermodule; /* private user scope */
Module curmodule; /* where to deliver the symbol, used by parser , only freed globally */
Symbol curprg; /* container for the malparser */
Symbol backup; /* saving the parser context for functions,commands/patterns */
MalStkPtr glb; /* global variable stack */
/*
* Here are pointers to scenario backends contexts. For the time
* being just SQL. We need a pointer for each of them, since they
* have to be able to interoperate with each other, e.g. both
* contexts at the same time are in use.
*/
void *sqlcontext;
/*
* The workload for replication/replay is saved initially as a MAL block.
* It is split into the capturing part (wlc) and the replay part (wlr).
* This allows a single server to act as both a master and a replica.
*/
int wlc_kind; // used by master to characterise the compound transaction
MalBlkPtr wlc;
/*
* Errors during copy into are collected in a user specific column set
*/
BAT *error_row;
BAT *error_fld;
BAT *error_msg;
BAT *error_input;
size_t blocksize;
protocol_version protocol;
bool filetrans; /* whether the client can read files for us */
char *handshake_options;
char *query; /* string, identify whatever we're working on */
} *Client, ClientRec;
mal_export int MAL_MAXCLIENTS;
mal_export ClientRec *mal_clients;
mal_export Client MCgetClient(int id);
mal_export Client MCinitClient(oid user, bstream *fin, stream *fout);
mal_export Client MCforkClient(Client father);
mal_export void MCstopClients(Client c);
mal_export int MCactiveClients(void);
mal_export size_t MCmemoryClaim(void);
mal_export void MCcloseClient(Client c);
mal_export str MCsuspendClient(int id);
mal_export str MCawakeClient(int id);
mal_export int MCpushClientInput(Client c, bstream *new_input, int listing, char *prompt);
mal_export int MCvalid(Client c);
#endif /* _MAL_CLIENT_H_ */

@ -0,0 +1,139 @@
/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* Copyright 1997 - July 2008 CWI, August 2008 - 2022 MonetDB B.V.
*/
/*
* The error strings are geared at answering the question "what happened".
* Optional information about "why it happened" is added
* as local strings in the code base with possibly runtime information.
* Information on "how to avoid it" is sporadically added using expected values.
*
* General considerations on error classes are summarized below:
* MAL_MALLOC_FAIL
* An operation allocates space for an object failed.
* Either the pre-requisites are not satisfied, or the system
* runs low on resources and can not accomodate the object.
* For failures to create BATs it sometimes indicates that an
* intermediate BAT size is too large.
* OPERATION_FAILED
* Mostly the module and function name are indicative enough.
* If possible more is said about the error context,
* informative references to arguments or variables,
* provided it is produced by the underlying implementation.
* GDK_EXCEPTION
* In general these are generated deep inside the kernel.
* They are captured by the MAL interpreter.
* SEMANTIC_*
* The patterns can be used to defer semantic type errors
* to runtime.
*
* Getting all exception strings in one place improves consistency
* and maintenance.
*
* At a later stage we could introduce internationalization support, i.e.
* use a translation table where needed.
*/
#ifndef MAL_ERRORS
#define MAL_ERRORS
#define SQLSTATE(sqlstate) #sqlstate "!"
#define MANUAL_HELP "See documentation for details"
#define PROGRAM_GENERAL "Program contains errors."
#define PROGRAM_NYI "Not yet implemented"
#define SYNTAX_GENERAL "Syntax error detected."
#define SYNTAX_SIGNATURE "Function signature missing."
#define SEMANTIC_GENERAL "Semantic errors detected"
#define SEMANTIC_PROGRAM_ERRORS "Program contains semantic errors."
#define SEMANTIC_SIGNATURE_MISSING "Function signature missing."
#define SEMANTIC_OPERATION_MISSING "Operation not found."
#define SEMANTIC_TYPE_ERROR "Explicit type required"
#define SEMANTIC_TYPE_MISMATCH "Type mismatch"
#define INTERNAL_BAT_ACCESS "Internal error, can not access BAT."
#define INTERNAL_BAT_HEAD "BAT has wrong head type"
#define INTERNAL_OBJ_CREATE "Can not create object"
#define INTERNAL_AUTHORIZATION "authorization BATs not empty"
#define MAL_MALLOC_FAIL "Could not allocate space"
#define MAL_STACK_FAIL "Running out of stack space."
#define MAL_CALLDEPTH_FAIL "Recursive call limit reached."
#define INVCRED_ACCESS_DENIED "access denied for user"
#define INVCRED_INVALID_USER "invalid credentials for user"
#define INVCRED_REMOVE_USER "Can not remove user"
#define INVCRED_WRONG_ID "Undefined client id"
#define RUNTIME_IO_EOF "Attempt to read beyond end-of-file"
#define RUNTIME_FILE_NOT_FOUND "File not found"
#define RUNTIME_UNLINK "File could not be unlinked"
#define RUNTIME_DIR_ERROR "Unable to open directory"
#define RUNTIME_CREATE_ERROR "Unable to create file/directory"
#define RUNTIME_STREAM_FAILED "Could not create stream"
#define RUNTIME_STREAM_WRITE "Could not write to stream"
#define RUNTIME_STREAM_INPUT "Could not read from stream"
#define RUNTIME_LOAD_ERROR "Loading error"
#define RUNTIME_OBJECT_MISSING "Object not found"
#define RUNTIME_SIGNATURE_MISSING "The <module>.<function> not found"
#define RUNTIME_OBJECT_UNDEFINED "Object not found"
#define RUNTIME_UNKNOWN_INSTRUCTION "Instruction type not supported"
#define RUNTIME_QRY_TIMEOUT "Query aborted due to timeout"
#define RUNTIME_SESSION_TIMEOUT "Query aborted due to session timeout"
#define OPERATION_FAILED "operation failed"
#define _OPERATION_FAILED "Operation failed"
#define BOX_CLOSED "Box is not open"
#define SABAOTH_NOT_INITIALIZED "Sabaoth not initialized"
#define SABAOTH_USE_RESTRICTION "Sabaoth was not initialized as active database"
#define SCENARIO_NOT_FOUND "Scenario not initialized"
#define MACRO_SYNTAX_ERROR "RETURN statement is not the last one"
#define MACRO_DUPLICATE "Duplicate macro expansion"
#define MACRO_TOO_DEEP "Too many macro expansions"
#define OPTIMIZER_CYCLE "Too many optimization cycles"
#define ILLARG_NOTNIL " NIL not allowed"
#define ILLARG_CONSTANTS "Constant argument required"
#define ILLEGAL_ARGUMENT "Illegal argument"
#define IDENTIFIER_EXPECTED "Identifier expected"
#define POSITIVE_EXPECTED "Argument must be positive"
#define ARGUMENT_TOO_LARGE "Argument too large"
#define TOO_MANY_BITS "Too many bits"
#define DUPLICATE_DEFINITION "Duplicate definition"
#define RANGE_ERROR "Range error"
#define SERVER_STOPPED "Server stopped"
#define XML_PARSE_ERROR "Document parse error"
#define XML_COMMENT_ERROR "Comment may not contain '--'"
#define XML_PI_ERROR "No processing instruction target specified"
#define XML_VERSION_ERROR "Illegal XML version"
#define XML_STANDALONE_ERROR "Illegal XML standalone value"
#define XML_NOT_WELL_FORMED "Resulting document not well-formed"
#define XML_ATTRIBUTE_ERROR "No attribute name specified"
#define XML_ATTRIBUTE_INVALID "Invalid attribute name"
#define XML_NO_ELEMENT "No element name specified"
#define XML_NO_NAMESPACE "Namespace support not implemented"
#define XML_ILLEGAL_NAMESPACE "Illegal namespace"
#define XML_ILLEGAL_ATTRIBUTE "Illegal attribute"
#define XML_ILLEGAL_CONTENT "Illegal content"
/* GDK_EXCEPTION should be by itself, i.e. not concatenated with some
* other string; to enforce this we have the parentheses */
#define GDK_EXCEPTION ("GDK reported error.")
#define MAL_DEPRECATED "Deprecated MAL operation."
#define TYPE_NOT_SUPPORTED "Type is not supported"
#endif /* MAL_ERRORS */

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save