From dfb3ec23809890b4d5738dd1e8614ca426a6f884 Mon Sep 17 00:00:00 2001 From: Bill Date: Sun, 9 Apr 2023 09:56:32 +0800 Subject: [PATCH] update --- .gitignore | 1 + aquery_parser/__init__.py | 2 +- aquery_parser/parser.py | 1 - benchmark/h2o/groupby.sql | 5 +++-- build.py | 2 +- common/types.py | 4 ++-- engine/ast.py | 4 +++- engine/expr.py | 2 +- msc-plugin/libaquery.vcxproj | 1 + msc-plugin/msc-plugin.vcxproj | 18 ++++++++++-------- msc-plugin/server.vcxproj | 13 +++++++++++++ msc-plugin/testmain.vcxproj | 13 +++++++++++++ msvs-py/msvs-py.pyproj | 1 - server/aggregations.h | 9 ++++++--- server/gc.h | 3 ++- server/libaquery.h | 17 +++++++++++++++++ server/table.h | 24 ++++++++++++------------ server/vector_type.hpp | 12 +++++++++--- 18 files changed, 95 insertions(+), 37 deletions(-) diff --git a/.gitignore b/.gitignore index 3aa14fc..2a4803e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +duckdb.dll *.swp tests/datagen_jose/histgen tests/datagen_jose/tickgen diff --git a/aquery_parser/__init__.py b/aquery_parser/__init__.py index 61fd967..30ba54c 100644 --- a/aquery_parser/__init__.py +++ b/aquery_parser/__init__.py @@ -13,7 +13,7 @@ import json from threading import Lock from aquery_parser.parser import scrub -from aquery_parser.utils import ansi_string, simple_op, normal_op +from aquery_parser.utils import simple_op, normal_op import aquery_parser.parser parse_locker = Lock() # ENSURE ONLY ONE PARSING AT A TIME common_parser = None diff --git a/aquery_parser/parser.py b/aquery_parser/parser.py index a4237b6..0cf9eec 100644 --- a/aquery_parser/parser.py +++ b/aquery_parser/parser.py @@ -7,7 +7,6 @@ # Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # Bill Sun 2022 - 2023 -from sre_parse import WHITESPACE from mo_parsing.helpers import restOfLine from mo_parsing.infix import delimited_list diff --git a/benchmark/h2o/groupby.sql b/benchmark/h2o/groupby.sql index 9e30c0f..f5dcb20 100644 --- a/benchmark/h2o/groupby.sql +++ b/benchmark/h2o/groupby.sql @@ -14,9 +14,10 @@ SELECT id6, sum(v1) AS v1, sum(v2) AS v2, sum(v3) AS v3 FROM source GROUP BY id6 SELECT id3, max(v1) - min(v2) AS range_v1_v2 FROM source GROUP BY id3; -- 0.857 | 0.467 | 2.236 -- select top 2 from each grp --- SELECT id6, subvec(v3,0,2) AS v3 FROM source GROUP BY id6 order by v3; +SELECT id6, subvec(v3,0,2) AS v3 FROM source GROUP BY id6 order by v3; -- implement corr ---SELECT id2, id4, pow(corr(v1, v2), 2) AS r2 FROM source GROUP BY id2, id4; +SELECT id2, id4, pow(corr(v1, v2), 2) AS r2 FROM source GROUP BY id2, id4; +-- NA | 0.240 | 0.6 SELECT id1, id2, id3, id4, id5, id6, sum(v3) AS v3, count(*) AS cnt FROM source GROUP BY id1, id2, id3, id4, id5, id6; -- 2.669 | 1.232 | 2.221(1.8) diff --git a/build.py b/build.py index bf14ece..11c08fb 100644 --- a/build.py +++ b/build.py @@ -187,7 +187,7 @@ class build_manager: def __init__(self) -> None: self.method = 'make' self.cxx = '' - self.OptimizationLv = '0' # [O0, O1, O2, O3, Ofast] + self.OptimizationLv = '4' # [O0, O1, O2, O3, Ofast] self.Platform = 'amd64' self.PCH = os.environ['PCH'] if 'PCH' in os.environ else 1 self.StaticLib = 1 diff --git a/common/types.py b/common/types.py index 804b800..f4d7354 100644 --- a/common/types.py +++ b/common/types.py @@ -298,7 +298,7 @@ def subvec_behavior(op: OperatorBase, c_code, *x): if not c_code: return f'{op.sqlname}({", ".join([f"{xx}" for xx in x])})' else: - return f'{x[0]}.subvec({x[1]}{f", {x[2]}" if len(x) == 2 else ""})' + return f'{x[0]}.subvec({x[1]}{f", {x[2]}" if len(x) == 3 else ""})' # arithmetic opadd = OperatorBase('add', 2, auto_extension, cname = '+', sqlname = '+', call = binary_op_behavior) @@ -346,7 +346,7 @@ fnvars = OperatorBase('vars', [1, 2], fp(ext(ty_clamp(auto_extension, -1))), cna fnstds = OperatorBase('stddevs', [1, 2], fp(ext(ty_clamp(auto_extension, -1))), cname = 'stddevs', sqlname = 'STDDEVS', call = windowed_fn_behavor) fncnt = OperatorBase('count', 1, int_return, cname = 'count', sqlname = 'COUNT', call = count_behavior) fnpack = OperatorBase('pack', -1, pack_return, cname = 'pack', sqlname = 'PACK', call = pack_behavior) -fnsubvec = OperatorBase('subvec', [1, 2, 3], as_is, cname = 'subvec', sqlname = 'SUBVEC', call = subvec_behavior) +fnsubvec = OperatorBase('subvec', [1, 2, 3], ty_clamp(as_is, 0, 1), cname = 'subvec', sqlname = 'SUBVEC', call = subvec_behavior) # special def is_null_call_behavior(op:OperatorBase, c_code : bool, x : str): if c_code : diff --git a/engine/ast.py b/engine/ast.py index 723b436..30b1f9f 100644 --- a/engine/ast.py +++ b/engine/ast.py @@ -591,7 +591,9 @@ class groupby_c(ast_node): def consume(self, _): self.scanner.finalize() + self.context.emitc('printf("ht_construct: %lld\\n", (chrono::high_resolution_clock::now() - timer).count()); timer = chrono::high_resolution_clock::now();') self.context.emitc(f'auto {self.vecs} = {self.group}.ht_postproc({self.total_sz});') + self.context.emitc('printf("ht_postproc: %lld\\n", (chrono::high_resolution_clock::now() - timer).count()); timer = chrono::high_resolution_clock::now();') # def deal_with_assumptions(self, assumption:assumption, out:TableInfo): # gscanner = scan(self, self.group) # val_var = 'val_'+base62uuid(7) @@ -694,7 +696,7 @@ class groupby_c(ast_node): gscanner.add(f'{ce[0]}[{gscanner.it_var}] = ({get_var_names_ex(ex)});\n') gscanner.add(f'GC::scratch_space->release();') - self.context.emitc('printf("ht_construct: %lld\\n", (chrono::high_resolution_clock::now() - timer).count());timer = chrono::high_resolution_clock::now();') + self.context.emitc('printf("ht_initfrom: %lld\\n", (chrono::high_resolution_clock::now() - timer).count());timer = chrono::high_resolution_clock::now();') gscanner.finalize() self.context.emitc(f'GC::scratch_space = nullptr;') diff --git a/engine/expr.py b/engine/expr.py index 8eba317..b735595 100644 --- a/engine/expr.py +++ b/engine/expr.py @@ -171,7 +171,7 @@ class expr(ast_node): special_func = [*self.context.udf_map.keys(), *self.context.module_map.keys(), "maxs", "mins", "avgs", "sums", "deltas", "last", "first", - "stddevs", "vars", "ratios", "pack", "truncate"] + "stddevs", "vars", "ratios", "pack", "truncate", "subvec"] if ( self.context.special_gb diff --git a/msc-plugin/libaquery.vcxproj b/msc-plugin/libaquery.vcxproj index f2a7de2..71d30b9 100644 --- a/msc-plugin/libaquery.vcxproj +++ b/msc-plugin/libaquery.vcxproj @@ -289,6 +289,7 @@ $(ProjectDir)\..\monetdb\msvc $(SolutionDir)..\msc-plugin\pch_msc.pch $(IntDir)vc$(PlatformToolsetVersion).pdb + Fast Console diff --git a/msc-plugin/msc-plugin.vcxproj b/msc-plugin/msc-plugin.vcxproj index 831937a..84e278a 100644 --- a/msc-plugin/msc-plugin.vcxproj +++ b/msc-plugin/msc-plugin.vcxproj @@ -134,7 +134,7 @@ Level3 true - QUERY_DECLSPEC=__declspec(dllimport);THREADING;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + QUERY_DECLSPEC=__declspec(dllexport);THREADING;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) true stdcpplatest Guard @@ -166,7 +166,7 @@ true true false - QUERY_DECLSPEC=__declspec(dllimport);THREADING;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + QUERY_DECLSPEC=__declspec(dllexport);THREADING;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true stdcpplatest $(ProjectDir)\..\monetdb\msvc @@ -209,7 +209,7 @@ true true false - QUERY_DECLSPEC=__declspec(dllimport);THREADING;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + QUERY_DECLSPEC=__declspec(dllexport);THREADING;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true stdcpplatest $(ProjectDir)\..\monetdb\msvc @@ -250,7 +250,7 @@ Level3 true - QUERY_DECLSPEC=__declspec(dllimport);THREADING;_ALLOW_RTCc_IN_STL;_CRT_SECURE_NO_WARNINGS;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + QUERY_DECLSPEC=__declspec(dllexport);THREADING;_ALLOW_RTCc_IN_STL;_CRT_SECURE_NO_WARNINGS;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) true stdcpplatest false @@ -300,12 +300,12 @@ true true false - QUERY_DECLSPEC=__declspec(dllimport);THREADING;_CRT_SECURE_NO_WARNINGS;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + QUERY_DECLSPEC=__declspec(dllexport);THREADING;_CRT_SECURE_NO_WARNINGS;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true stdcpplatest $(ProjectDir)\..\monetdb\msvc stdc17 - Use + NotUsing pch_msc.hpp None Full @@ -325,7 +325,9 @@ false $(IntDir)vc$(PlatformToolsetVersion).pdb - pch_msc.hpp + + + Fast Console @@ -343,7 +345,7 @@ true true false - QUERY_DECLSPEC=__declspec(dllimport);THREADING;_CRT_SECURE_NO_WARNINGS;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + QUERY_DECLSPEC=__declspec(dllexport);THREADING;_CRT_SECURE_NO_WARNINGS;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true stdcpplatest $(ProjectDir)\..\monetdb\msvc diff --git a/msc-plugin/server.vcxproj b/msc-plugin/server.vcxproj index a9bfa60..24523ce 100644 --- a/msc-plugin/server.vcxproj +++ b/msc-plugin/server.vcxproj @@ -314,6 +314,18 @@ stdc17 $(ProjectDir)\..\monetdb\msvc None + Full + AnySuitable + Speed + true + true + true + false + false + false + AdvancedVectorExtensions2 + Fast + false Console @@ -324,6 +336,7 @@ false /WHOLEARCHIVE:libaquery.lib %(AdditionalOptions) ../monetdb/msvc;../deps + false diff --git a/msc-plugin/testmain.vcxproj b/msc-plugin/testmain.vcxproj index 4c55b62..042d0d8 100644 --- a/msc-plugin/testmain.vcxproj +++ b/msc-plugin/testmain.vcxproj @@ -316,6 +316,18 @@ stdc17 $(ProjectDir)\..\monetdb\msvc None + Full + AnySuitable + Speed + true + true + false + false + false + AdvancedVectorExtensions2 + Fast + false + true Console @@ -327,6 +339,7 @@ /WHOLEARCHIVE:libaquery.lib %(AdditionalOptions) true ../deps/;../monetdb/msvc + false diff --git a/msvs-py/msvs-py.pyproj b/msvs-py/msvs-py.pyproj index eb1fe67..1ccdd88 100644 --- a/msvs-py/msvs-py.pyproj +++ b/msvs-py/msvs-py.pyproj @@ -49,7 +49,6 @@ - diff --git a/server/aggregations.h b/server/aggregations.h index 252c169..de975f8 100644 --- a/server/aggregations.h +++ b/server/aggregations.h @@ -380,14 +380,17 @@ auto corr(const VT& x, const VT2&y) { sxy += x[i] * y[i]; sy2 += y[i] * y[i]; } - return (sxy - FPType(sx*sy)) + return (len*sxy - FPType(sx*sy)) / - (len * sqrt( - (sx2 - FPType(sx*sx)/len) * (sy2 - FPType(sy*sy)/len) + (sqrt( + (len*sx2 - FPType(sx*sx)) * (len*sy2 - FPType(sy*sy)) ) ); } +void pow(auto x, auto y, auto& z) { + z = pow(x, y); +} template class VT> inline types::GetFPType>> stddev(const VT& arr) { diff --git a/server/gc.h b/server/gc.h index 226fefc..e911b44 100644 --- a/server/gc.h +++ b/server/gc.h @@ -85,7 +85,8 @@ public: start_deamon(); GC::gc_handle = this; - this->scratch.init(1); + this->scratch.init(65536); + GC::scratch_space = &this->scratch; } // 256 MB ~GC(){ diff --git a/server/libaquery.h b/server/libaquery.h index 7ae22a4..8d3c2f4 100644 --- a/server/libaquery.h +++ b/server/libaquery.h @@ -300,4 +300,21 @@ inline _This_Type* AQ_DupObject(_This_Type* __val) { void print_monetdb_results(void* _srv, const char* sep, const char* end, uint32_t limit); StoredProcedure get_procedure(Context* cxt, const char* name); + +#define AQTIMER(x) auto __timer##x = std::chrono::high_resolution_clock::now(); + +#define __AQTIMERLAP__IMPL(x, y) \ + printf(#y": %llu\n", (unsigned long long)( \ + std::chrono::high_resolution_clock::now() - __timer##x \ + ).count()); \ + __timer##x = std::chrono::high_resolution_clock::now(); + +#define __AQTIMERLAP_STRIP(X, Y, IMPL, ...) IMPL + +#define AQTIMERLAP(...) \ + __AQTIMERLAP_STRIP(__VA_ARGS__, \ + __AQTIMERLAP__IMPL(__VA_ARGS__), \ + __AQTIMERLAP__IMPL(, __VA_ARGS__), \ + __AQTIMERLAP__IMPL(,) \ + ) #endif diff --git a/server/table.h b/server/table.h index 7539c5d..644a092 100644 --- a/server/table.h +++ b/server/table.h @@ -149,18 +149,18 @@ public: return *this; } - ColView<_Ty> operator [](vector_type& idxs) const { - return ColView<_Ty>(*this, std::move(idxs)); - } - ColView<_Ty> operator [](const vector_type& idxs) const { - return ColView<_Ty>(*this, idxs); - } - //vector_type<_Ty> operator[](vector_type& idxs) const { - // vector_type<_Ty> ret(idxs.size); - // for (uint32_t i = 0; i < idxs.size; ++i) - // ret.container[i] = this->container[idxs[i]]; - // return ret; - //} + // ColView<_Ty> operator [](vector_type& idxs) const { + // return ColView<_Ty>(*this, idxs); + // } + // ColView<_Ty> operator [](const vector_type& idxs) const { + // return ColView<_Ty>(*this, idxs); + // } + vector_type<_Ty> operator[](vector_type& idxs) const { + vector_type<_Ty> ret(idxs.size); + for (uint32_t i = 0; i < idxs.size; ++i) + ret.container[i] = this->container[idxs[i]]; + return ret; + } vector_type<_Ty> operator [](const std::vector& idxs) const { vector_type<_Ty> ret (this->size); uint32_t i = 0; diff --git a/server/vector_type.hpp b/server/vector_type.hpp index 188a135..d7704ad 100644 --- a/server/vector_type.hpp +++ b/server/vector_type.hpp @@ -27,13 +27,19 @@ template class vector_type : public vector_base<_Ty>{ public: typedef vector_type<_Ty> Decayed_t; + void inline _ref(vector_type<_Ty>& vt) { + // make a reference of vt + this->size = vt.size; + this->capacity = 0; + this->container = vt.container; + } void inline _copy(const vector_type<_Ty>& vt) { // quick init while using malloc //if (capacity > 0) free(container); this->size = vt.size; this->capacity = vt.capacity; if (capacity) { - //puts("copy"); + puts("copy"); this->container = (_Ty*)malloc(size * sizeof(_Ty)); memcpy(container, vt.container, sizeof(_Ty) * size); } @@ -63,7 +69,7 @@ public: container = (_Ty*)GC::scratch_space->alloc(size * sizeof(_Ty)); this->capacity = 0; } - else{ + else { container = (_Ty*)malloc(size * sizeof(_Ty)); } // TODO: calloc for objects. @@ -84,7 +90,7 @@ public: _copy(vt); } constexpr vector_type(vector_type<_Ty>& vt) noexcept : capacity(0) { - _move(std::move(vt)); + _ref(vt); } constexpr vector_type(vector_type<_Ty>&& vt) noexcept : capacity(0) { _move(std::move(vt));