diff --git a/.gitignore b/.gitignore index 3ef09c6..c262c14 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ tests/datagen_jose/histgen tests/datagen_jose/tickgen datagen +tests/private *.dSYM testmain.lib testmain.exp diff --git a/Makefile b/Makefile index 4344195..ee95e4c 100644 --- a/Makefile +++ b/Makefile @@ -97,6 +97,6 @@ docker: docker build -t aquery . clean: - rm *.shm *.o dll.so server.so server.bin -rf 2> $(NULL_DEVICE) || true + rm *.shm *.o dll.so server.so server.bin libaquery.a libaquery.lib -rf 2> $(NULL_DEVICE) || true diff --git a/README.md b/README.md index 6aace6e..83a072b 100644 --- a/README.md +++ b/README.md @@ -38,19 +38,22 @@ AQuery++ Database is a cross-platform, In-Memory Column-Store Database that inco - [x] Selection/Order by push-down - [x] Join Optimization (Only in Hybrid Engine) -## TODO: +## Known Issues: -- [x] User Module load syntax parsing (fn definition/registration) -- [x] User Module initialize location - [x] User Module test - -> Interval based triggers -- [x] Optimize Compilation Process, using static libraries, hot reloading server binary +- [ ] Interval based triggers +- [x] Hot reloading server binary - [x] Bug fixes: type deduction misaligned in Hybrid Engine - -> Investigation: Using postproc only for q1 in Hybrid Engine (make is_special always on) -- [x] Limitation: putting ColRefs back to monetdb. +- [ ] Investigation: Using postproc only for q1 in Hybrid Engine (make is_special always on) +- [x] Limitation: putting ColRefs back to monetdb. (Comparison) - [ ] C++ Meta-Programming: Eliminate template recursions as much as possible. - [x] Limitation: Date and Time, String operations, Funcs in groupby agg. - +- [ ] Functionality: Basic helper functions in aquery +- [ ] Improvement: More DDLs, e.g. drop table, update table, etc. +- [ ] Bug: Join-Aware Column management +- [ ] Bug: Order By after Group By + + # Installation ## Requirements 1. Recent version of Linux, Windows or MacOS, with recent C++ compiler that has C++17 (1z) support. (however c++20 is recommended if available for heterogeneous lookup on unordered containers) @@ -67,16 +70,23 @@ AQuery++ Database is a cross-platform, In-Memory Column-Store Database that inco ## Usage `python3 prompt.py` will launch the interactive command prompt. The server binary will be autometically rebuilt and started. #### Commands: -- ``: parse sql statement -- `f `: parse all sql statements in file +- ``: parse AQuery statement +- `f `: parse all AQuery statements in file - `dbg` start debugging session -- `print`: printout parsed sql statements -- `exec`: execute last parsed statement(s) with AQuery Execution Engine. AQuery Execution Engine executes query by compiling it to C++ code and then executing it. - -- `xexec`: execute last parsed statement(s) with Hybrid Execution Engine. Hybrid Execution Engine decouples the query into two parts. The sql-compliant part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed. -- `r`: run the last generated code snippet +- `print`: printout parsed AQuery statements + +- `xexec`: execute last parsed statement(s) with Hybrid Execution Engine. Hybrid Execution Engine decouples the query into two parts. The standard SQL (MonetDB dialect) part is executed by an Embedded version of Monetdb and everything else is executed by a post-process module which is generated by AQuery++ Compiler in C++ and then compiled and executed. - `save `: save current code snippet. will use random filename if not specified. - `exit`: quit the prompt -#### Example: +- `exec`: execute last parsed statement(s) with AQuery Execution Engine (Old). AQuery Execution Engine executes query by compiling it to C++ code and then executing it. +- `r`: run the last generated code snippet +### Example: `f moving_avg.a`
`xexec` + +See ./tests/ for more examples. + +## Notes for arm64 macOS users +- In theory, AQuery++ could work on both native arm64 and x86_64 through Rosetta. But for maximum performance, running native is preferred. +- However, they can't be mixed up, i.e. make sure every component, `python` binary, `C++ compiler`, `monetdb` library and system commandline utilities such as `uname` should have the same architecture. +- Because I can't get access to an arm-based mac to fully test this setup, there might still be issues. Please open an issue if you encounter any problems. \ No newline at end of file diff --git a/server/types.h b/server/types.h index 63c523b..43d2d1a 100644 --- a/server/types.h +++ b/server/types.h @@ -28,7 +28,7 @@ namespace types { }; static constexpr const char* printf_str[] = { "%d", "%f", "%s", "%lf", "%Lf", "%ld", "%d", "%hi", "%s", "%s", "%c", "%u", "%lu", "%s", "%hu", "%hhu", "%s", "%s", "Vector<%s>", "%s", "NULL", "ERROR" }; - static constexpr const char* SQL_Type[] = { "INT", "REAL", "VARCHAR(15)", "DOUBLE", "DOUBLE", "BIGINT", "HUGEINT", "SMALLINT", "DATE", "TIME", "TINYINT", + static constexpr const char* SQL_Type[] = { "INT", "REAL", "TEXT", "DOUBLE", "DOUBLE", "BIGINT", "HUGEINT", "SMALLINT", "DATE", "TIME", "TINYINT", "INT", "BIGINT", "HUGEINT", "SMALLINT", "TINYINT", "BIGINT", "BOOL", "BIGINT", "TIMESTAMP", "NULL", "ERROR"}; diff --git a/tests/datagen_jose/RandGen.H b/tests/datagen_jose/RandGen.hpp similarity index 100% rename from tests/datagen_jose/RandGen.H rename to tests/datagen_jose/RandGen.hpp diff --git a/tests/datagen_jose/Time.C b/tests/datagen_jose/Time.cpp similarity index 100% rename from tests/datagen_jose/Time.C rename to tests/datagen_jose/Time.cpp diff --git a/tests/datagen_jose/Time.H b/tests/datagen_jose/Time.hpp similarity index 100% rename from tests/datagen_jose/Time.H rename to tests/datagen_jose/Time.hpp diff --git a/tests/datagen_jose/cal.C b/tests/datagen_jose/cal.C deleted file mode 100644 index cfff4e8..0000000 --- a/tests/datagen_jose/cal.C +++ /dev/null @@ -1,79 +0,0 @@ -#include -#include -#include - -#include "cal.H" -using namespace std; - -Calendar::Calendar(void) -{ - time_t clk = time(0); - struct tm *now = localtime(&clk); - _currdate = asJulianNumber(now->tm_mon+1, now->tm_mday, now->tm_year+1900); -} - -Calendar::~Calendar() -{} - -// year_ in yyyy format -unsigned int Calendar::asJulianNumber(int month_,int day_,int year_) -{ - unsigned long c,ya; - - if (month_>2) month_-=3; - else { month_+=9; year_--; } - c=year_/100; - ya=year_-100*c; - return ((146097*c)>>2)+((1461*ya)>>2)+(153*month_+2)/5+day_+1721119; -} - -void Calendar::split(int& month_,int& day_,int& year_) -{ - unsigned long d; - unsigned long j=_currdate-1721119; - year_=(int) (((j<<2)-1)/146097); - j=(j<<2)-1-146097*year_; - d=(j>>2); - j=((d<<2)+3)/1461; - d=(d<<2)+3-1461*j; - d=(d+4)>>2; - month_=(int)(5*d-3)/153; - d=5*d-3-153*month_; - day_=(int)((d+5)/5); - year_=(int)(100*year_+j); - if (month_<10) month_+=3; - else { month_-=9; year_++; } -} - -int Calendar::dayInWeek(void) -{ - return ((((_currdate+1)%7)+6)%7)+1; -} - -Calendar &Calendar::nextWeekday(void) -{ - (*this) += 1; - while (!isWeekday()) (*this)+= 1; - return *this; -} - -int Calendar::isWeekday(void) -{ - return (dayInWeek()<6)?1:0; -} - -Calendar &Calendar::operator+= (int incr_) -{ - _currdate += incr_; - return *this; -} - -ostream &operator<< (ostream &os_, Calendar &that_) -{ - int mo, day, year; - that_.split(mo,day,year); - os_ << year << "-" << mo << "-" << day; - // the below is a pain for monetdb - //os_ << mo << "/" << day << "/" << year; - return os_; -} diff --git a/tests/datagen_jose/cal.cpp b/tests/datagen_jose/cal.cpp index 56cd2c6..d9b4c85 100644 --- a/tests/datagen_jose/cal.cpp +++ b/tests/datagen_jose/cal.cpp @@ -2,7 +2,8 @@ #include #include -#include "cal.H" +#include "cal.hpp" +using namespace std; Calendar::Calendar(void) { @@ -71,6 +72,8 @@ ostream &operator<< (ostream &os_, Calendar &that_) { int mo, day, year; that_.split(mo,day,year); - os_ << mo << "/" << day << "/" << year; + os_ << year << "-" << mo << "-" << day; + // the below is a pain for monetdb + //os_ << mo << "/" << day << "/" << year; return os_; } diff --git a/tests/datagen_jose/cal.H b/tests/datagen_jose/cal.hpp similarity index 100% rename from tests/datagen_jose/cal.H rename to tests/datagen_jose/cal.hpp diff --git a/tests/datagen_jose/gen.C b/tests/datagen_jose/gen.cpp similarity index 97% rename from tests/datagen_jose/gen.C rename to tests/datagen_jose/gen.cpp index aeb34f6..403f79c 100644 --- a/tests/datagen_jose/gen.C +++ b/tests/datagen_jose/gen.cpp @@ -4,7 +4,7 @@ #define genIMPLEMENTATION #include -#include "RandGen.H" +#include "RandGen.hpp" int num[6]; int nelems=0; diff --git a/tests/datagen_jose/histgen.C b/tests/datagen_jose/histgen.cpp similarity index 99% rename from tests/datagen_jose/histgen.C rename to tests/datagen_jose/histgen.cpp index e2ac660..9aad8b9 100644 --- a/tests/datagen_jose/histgen.C +++ b/tests/datagen_jose/histgen.cpp @@ -10,8 +10,8 @@ #include #include -#include "RandGen.H" -#include "cal.H" +#include "RandGen.hpp" +#include "cal.hpp" using namespace std; inline int max(int a, int b) { diff --git a/tests/datagen_jose/makefile b/tests/datagen_jose/makefile index a4c16cc..147dc19 100644 --- a/tests/datagen_jose/makefile +++ b/tests/datagen_jose/makefile @@ -5,16 +5,16 @@ all: histgen tickgen clean: rm -rf *.o histgen tickgen -%.o: %.C - g++-12 -Ofast -march=native -g -c $< +%.o: %.cpp + $(CXX) -Ofast -march=native -g -c $< tickgen: cal.o Time.o tickgen.o - g++-12 -lstdc++ -Ofast -march=native -flto -o tickgen cal.o Time.o tickgen.o + $(CXX) -lstdc++ -Ofast -march=native -flto -o tickgen cal.o Time.o tickgen.o histgen: cal.o histgen.o - g++-12 -lstdc++ -Ofast -flto -march=native -o histgen cal.o histgen.o + $(CXX) -lstdc++ -Ofast -flto -march=native -o histgen cal.o histgen.o timetest: Time.o timetest.o - g++-12 -lstdc++ -g -o timetest Time.o timetest.o + $(CXX) -lstdc++ -g -o timetest Time.o timetest.o diff --git a/tests/datagen_jose/tickgen.C b/tests/datagen_jose/tickgen.cpp similarity index 98% rename from tests/datagen_jose/tickgen.C rename to tests/datagen_jose/tickgen.cpp index d332633..3c20356 100644 --- a/tests/datagen_jose/tickgen.C +++ b/tests/datagen_jose/tickgen.cpp @@ -10,9 +10,9 @@ #include #include -#include "RandGen.H" -#include "cal.H" -#include "Time.H" +#include "RandGen.hpp" +#include "cal.hpp" +#include "Time.hpp" using namespace std;