diff --git a/.gitignore b/.gitignore index 880d6f1..2894de7 100644 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,5 @@ server/packages/** *.ipynb *.cmake *.stackdump +saves +*.exe diff --git a/a.exe b/a.exe deleted file mode 100644 index 0d9dd35..0000000 Binary files a/a.exe and /dev/null differ diff --git a/a.exp b/a.exp deleted file mode 100644 index 8c63c3b..0000000 Binary files a/a.exp and /dev/null differ diff --git a/a.lib b/a.lib deleted file mode 100644 index 67fa0ac..0000000 Binary files a/a.lib and /dev/null differ diff --git a/aquery_config.py b/aquery_config.py index 1992699..a44366f 100644 --- a/aquery_config.py +++ b/aquery_config.py @@ -30,4 +30,7 @@ elif os.name == 'posix': if os_platform == 'win': os.add_dll_directory('c:/msys64/usr/bin') os.add_dll_directory(os.path.abspath('./msc-plugin')) - print("adding path") \ No newline at end of file + print("adding path") +else: + import readline + \ No newline at end of file diff --git a/engine/expr.py b/engine/expr.py index 4efeff6..8cddb05 100644 --- a/engine/expr.py +++ b/engine/expr.py @@ -131,4 +131,5 @@ class expr(ast_node): def consume(self, _): self.cexpr = expr.toCExpr(self._expr) def __str__(self): - return self.cexpr \ No newline at end of file + return self.cexpr + \ No newline at end of file diff --git a/engine/types.py b/engine/types.py index 4bb71cf..699bbeb 100644 --- a/engine/types.py +++ b/engine/types.py @@ -74,6 +74,7 @@ type_table = dict() AnyT = Types(0) LazyT = Types(240, name = 'Lazy', cname = '', sqlname = '', ctype_name = '') DoubleT = Types(17, name = 'double', cname='double', sqlname = 'DOUBLE', is_fp = True) +LDoubleT = Types(18, name = 'long double', cname='long double', sqlname = 'LDOUBLE', is_fp = True) FloatT = Types(16, name = 'float', cname = 'float', sqlname = 'REAL', long_type = DoubleT, is_fp = True) HgeT = Types(9, name = 'int128',cname='__int128_t', sqlname = 'HUGEINT', fp_type = DoubleT) @@ -87,13 +88,33 @@ UIntT = Types(7, name = 'uint32', sqlname = 'UINT32', long_type=ULongT, fp_type= UShortT = Types(6, name = 'uint16', sqlname = 'UINT16', long_type=ULongT, fp_type=FloatT) UByteT = Types(5, name = 'uint8', sqlname = 'UINT8', long_type=ULongT, fp_type=FloatT) StrT = Types(200, name = 'str', cname = 'const char*', sqlname='VARCHAR', ctype_name = 'types::STRING') + + def _ty_make_dict(fn : str, *ty : Types): return {eval(fn):t for t in ty} int_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', LongT, ByteT, ShortT, IntT) +uint_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', ULongT, UByteT, UShortT, UIntT) fp_types : Dict[str, Types] = _ty_make_dict('t.sqlname.lower()', FloatT, DoubleT) builtin_types : Dict[str, Types] = {**_ty_make_dict('t.sqlname.lower()', AnyT, StrT), **int_types, **fp_types} +def get_int128_support(): + for t in int_types.values(): + t.long_type = HgeT + for t in uint_types.values(): + t.long_type = UHgeT + int_types['int128'] = HgeT + uint_types['uint128'] = UHgeT + +def revert_int128_support(): + for t in int_types.values(): + t.long_type = LongT + for t in uint_types.values(): + t.long_type = ULongT + int_types.pop('int128', None) + uint_types.pop('uint128', None) + + type_bylength : Dict[int, TypeCollection] = {} type_bylength[1] = TypeCollection(1, ByteT) type_bylength[2] = TypeCollection(2, ShortT) diff --git a/index.html b/index.html deleted file mode 100644 index a6260f2..0000000 --- a/index.html +++ /dev/null @@ -1,19 +0,0 @@ -Google
搜索 图片 地图 Play 新闻 云端硬盘 日历 翻译 更多 »
网络历史记录 | 设置 | 登录



 

高级搜索

© 2022 - 隐私权 - 条款

\ No newline at end of file diff --git a/moving_avg.a b/moving_avg.a index 49fcae9..1659cb7 100644 --- a/moving_avg.a +++ b/moving_avg.a @@ -10,5 +10,5 @@ FROM sale INTO OUTFILE "moving_avg_output.csv" FIELDS TERMINATED BY ";" -select Mont, mins(2,sales) from sale assuming desc Mont group by sales -into outfile "flatten.csv" \ No newline at end of file +-- select Mont, mins(2,sales) from sale assuming desc Mont group by sales +-- into outfile "flatten.csv" \ No newline at end of file diff --git a/out.cpp b/out.cpp index afc14ae..28eb44b 100644 --- a/out.cpp +++ b/out.cpp @@ -1,17 +1,19 @@ -#include "./udf.hpp" -#include "./server/monetdb_conn.h" -#include "./server/aggregations.h" #include "./server/libaquery.h" +#include "./server/monetdb_conn.h" extern "C" int __DLLEXPORT__ dllmain(Context* cxt) { using namespace std; using namespace types; auto server = static_cast(cxt->alt_server); - auto len_6SzLPm = server->cnt; -auto sales_5fe = ColRef(len_6SzLPm, server->getCol(0)); -auto a_yJz = ColRef(len_6SzLPm, server->getCol(1)); -auto out_4UoFb5 = new TableInfo>>("out_4UoFb5"); -out_4UoFb5->get_col<0>() = (sd(a_yJz) + sales_5fe); -print(*out_4UoFb5); + auto len_5sGusn = server->cnt; +auto sumc_5IN = ColRef<__int128_t>(len_5sGusn, server->getCol(0)); +auto b_79y = ColRef(len_5sGusn, server->getCol(1)); +auto d_4yS = ColRef(len_5sGusn, server->getCol(2)); +auto out_kio0QJ = new TableInfo<__int128_t,int,int>("out_kio0QJ"); +out_kio0QJ->get_col<0>().initfrom(sumc_5IN); +out_kio0QJ->get_col<1>().initfrom(b_79y); +out_kio0QJ->get_col<2>().initfrom(d_4yS); +print(*out_kio0QJ); +puts("done."); return 0; } \ No newline at end of file diff --git a/prompt.py b/prompt.py index 9b60527..e17c71f 100644 --- a/prompt.py +++ b/prompt.py @@ -74,7 +74,6 @@ def init_ipc(): global shm, server, basecmd, mm shm = base62uuid() if sys.platform != 'win32': - import readline shm += '.shm' basecmd = ['bash', '-c', 'rlwrap k'] mm = None @@ -166,6 +165,9 @@ def init_threaded(): global cfg, th, send send = server_so['receive_args'] aquery_config.have_hge = server_so['have_hge']() + if aquery_config.have_hge: + from engine.types import get_int128_support + get_int128_support() th = threading.Thread(target=server_so['main'], args=(-1, ctypes.POINTER(ctypes.c_char_p)(cfg.c)), daemon=True) th.start() @@ -258,6 +260,7 @@ while test_parser: except BaseException as e: # don't care about anything happened in interactive console print(e) + continue elif q.startswith('log'): qs = re.split(r'[ \t]', q) if len(qs) > 1: @@ -270,6 +273,28 @@ while test_parser: continue elif q == 'print': cxt.print(stmts) + continue + elif q.startswith('save'): + savecmd = re.split(r'[ \t]', q) + if len(savecmd) > 1: + fname = savecmd[1] + else: + tm = time.gmtime() + fname = f'{tm.tm_year}{tm.tm_mon}_{tm.tm_mday}_{tm.tm_hour}:{tm.tm_min}:{tm.tm_sec}' + if cxt: + def savefile(attr:str, desc:str): + if hasattr(cxt, attr): + attr : str = getattr(cxt, attr) + if attr: + ext = '.' + desc + name = fname if fname.endswith(ext) else fname + ext + with open('saves/' + name, 'wb') as cfile: + cfile.write(attr.encode('utf-8')) + print(f'saved {desc} code as {name}') + savefile('ccode', 'cpp') + savefile('udf', 'udf') + savefile('sql', 'sql') + continue elif q == 'keep': keep = not keep @@ -285,7 +310,7 @@ while test_parser: elif q == 'rr': # run set_ready() continue - elif q.startswith('save'): + elif q.startswith('save2'): filename = re.split(r'[ \t]', q) if (len(filename) > 1): filename = filename[1] diff --git a/reconstruct/ast.py b/reconstruct/ast.py index fb9c3b8..5c04d14 100644 --- a/reconstruct/ast.py +++ b/reconstruct/ast.py @@ -15,6 +15,7 @@ class ast_node: self.context = parent.context if context is None else context self.parent = parent self.sql = '' + self.ccode = '' if hasattr(parent, 'datasource'): self.datasource = parent.datasource else: @@ -28,7 +29,9 @@ class ast_node: self.context.emit(code) def add(self, code): self.sql += code + ' ' - + def addc(self, code): + self.ccode += code + '\n' + name = 'null' def init(self, _): @@ -64,7 +67,8 @@ class projection(ast_node): self.datasource = join(self, from_clause) if 'assumptions' in from_clause: self.assumptions = enlist(from_clause['assumptions']) - + else: + self.assumptions = [] if self.datasource is not None: self.datasource_changed = True self.prev_datasource = self.context.datasource @@ -98,6 +102,7 @@ class projection(ast_node): this_type = proj_expr.type name = proj_expr.sql compound = True # compound column + proj_expr.cols_mentioned = self.datasource.rec if not proj_expr.is_special: y = lambda x:x name = eval('f\'' + name + '\'') @@ -110,7 +115,6 @@ class projection(ast_node): if self.datasource.rec is not None: self.col_ext = self.col_ext.union(self.datasource.rec) proj_map[i] = [this_type, proj_expr.sql, proj_expr] - if 'name' in proj: # renaming column by AS keyword name += ' AS ' + proj['name'] if not proj_expr.is_special: @@ -147,12 +151,25 @@ class projection(ast_node): self.add('FROM') finialize(self.datasource) finialize(self.where) - finialize(self.group_node) + if self.group_node and not self.group_node.use_sp_gb: + self.add(self.group_node.sql) + + if self.col_ext or self.group_node and self.group_node.use_sp_gb: + self.use_postproc = True + o = self.assumptions if 'orderby' in node: - self.add(orderby(self, node['orderby']).sql) + o.extend(enlist(node['orderby'])) + if o: + self.add(orderby(self, o).sql) + if 'outfile' in node: - self.sql = outfile(self, node['outfile'], sql = self.sql).sql + self.outfile = outfile(self, node['outfile'], sql = self.sql) + if not self.use_postproc: + self.sql += self.outfile.sql + else: + self.outfile = None + if self.parent is None: self.emit(self.sql+';\n') else: @@ -175,7 +192,7 @@ class projection(ast_node): self.context.emitc(f'auto {vname} = ColRef<{typenames[idx].cname}>({length_name}, server->getCol({idx}));') vid2cname[idx] = vname # Create table into context - outtable_name = 'out_' + base62uuid(6) + self.outtable_name = 'out_' + base62uuid(6) out_typenames = [None] * len(proj_map) for key, val in proj_map.items(): @@ -186,18 +203,23 @@ class projection(ast_node): if callable(val[1]): val[1] = val[1](True) decltypestring = val[1] - + if val[0] == LazyT: decltypestring = f'value_type>' - if type(val[2].udf) is udf and val[2].udf.return_pattern == udf.ReturnPattern.elemental_return: - out_typenames[key] = f'ColRef<{decltypestring}>' - else: - out_typenames[key] = decltypestring + out_typenames[key] = decltypestring else: out_typenames[key] = val[0].cname + if (type(val[2].udf_called) is udf and + val[2].udf_called.return_pattern == udf.ReturnPattern.elemental_return + or + self.group_node and self.group_node.use_sp_gb and + val[2].cols_mentioned.intersection( + self.datasource.all_cols.difference(self.group_node.refs)) + ): + out_typenames[key] = f'ColRef<{out_typenames[key]}>' # out_typenames = [v[0].cname for v in proj_map.values()] - self.context.emitc(f'auto {outtable_name} = new TableInfo<{",".join(out_typenames)}>("{outtable_name}");') + self.context.emitc(f'auto {self.outtable_name} = new TableInfo<{",".join(out_typenames)}>("{self.outtable_name}");') # TODO: Inject custom group by code here and flag them in proj_map # Type of UDFs? Complex UDFs, ones with static vars? if self.group_node is not None and self.group_node.use_sp_gb: @@ -206,19 +228,23 @@ class projection(ast_node): for key, val in proj_map.items(): col_name = 'col_' + base62uuid(6) - self.context.emitc(f'decltype(auto) {col_name} = {outtable_name}->get_col<{key}>();') + self.context.emitc(f'decltype(auto) {col_name} = {self.outtable_name}->get_col<{key}>();') gb_cexprs.append((col_name, val[2])) self.group_node.finalize(gb_cexprs, gb_vartable) else: for key, val in proj_map.items(): if type(val[1]) is int: - self.context.emitc(f'{outtable_name}->get_col<{key}>().initfrom({vid2cname[val[1]]});') + self.context.emitc(f'{self.outtable_name}->get_col<{key}>().initfrom({vid2cname[val[1]]});') else: # for funcs evaluate f_i(x, ...) - self.context.emitc(f'{outtable_name}->get_col<{key}>() = {val[1]};') + self.context.emitc(f'{self.outtable_name}->get_col<{key}>() = {val[1]};') # print out col_is - self.context.emitc(f'print(*{outtable_name});') - + self.context.emitc(f'print(*{self.outtable_name});') + + if self.outfile: + self.outfile.finalize() + self.context.emitc(f'puts("done.");') + class orderby(ast_node): name = 'order by' def produce(self, node): @@ -357,11 +383,11 @@ class groupby_c(ast_node): for ce in cexprs: ex = ce[1] materialize_builtin = {} - if type(ex.udf) is udf: - if '_builtin_len' in ex.udf.builtin_used: + if type(ex.udf_called) is udf: + if '_builtin_len' in ex.udf_called.builtin_used: define_len_var() materialize_builtin['_builtin_len'] = len_var - if '_builtin_ret' in ex.udf.builtin_used: + if '_builtin_ret' in ex.udf_called.builtin_used: define_len_var() gscanner.add(f'{ce[0]}.emplace_back({{{len_var}}});\n') materialize_builtin['_builtin_ret'] = f'{ce[0]}.back()' @@ -382,6 +408,7 @@ class groupby(ast_node): node = enlist(node) o_list = [] + self.refs = set() self.dedicated_glist : List[Tuple[expr, Set[ColRef]]] = [] self.use_sp_gb = False for g in node: @@ -392,7 +419,7 @@ class groupby(ast_node): if self.parent.col_ext: this_sp_ref = refs.difference(self.parent.col_ext) self.use_sp_gb = self.use_sp_gb or len(this_sp_ref) > 0 - + self.refs.update(refs) self.dedicated_glist.append((g_expr, refs)) g_str = g_expr.eval(c_code = False) if 'sort' in g and f'{g["sort"]}'.lower() == 'desc': @@ -418,7 +445,7 @@ class join(ast_node): name = 'join' def init(self, _): self.joins:list = [] - self.tables = [] + self.tables : List[TableInfo] = [] self.tables_dir = dict() self.rec = None # self.tmp_name = 'join_' + base62uuid(4) @@ -496,7 +523,9 @@ class join(ast_node): raise ValueError(f'Table name/alias not defined{parsedColExpr[0]}') else: return datasource.parse_col_names(parsedColExpr[1]) - + @property + def all_cols(self): + return set([c for t in self.tables for c in t.columns]) def consume(self, _): self.sql = ', '.join(self.joins) return super().consume(_) @@ -581,13 +610,19 @@ class load(ast_node): class outfile(ast_node): name="_outfile" def __init__(self, parent, node, context = None, *, sql = None): + self.node = node super().__init__(parent, node, context) - self.sql = sql - if self.context.dialect == 'MonetDB': - self.produce = self.produce_monetdb - else: - self.produce = self.produce_aq - + self.sql = sql if sql else '' + + def init(self, _): + assert(type(self.parent) is projection) + if not self.parent.use_postproc: + if self.context.dialect == 'MonetDB': + self.produce = self.produce_monetdb + else: + self.produce = self.produce_aq + + return super().init(_) def produce_aq(self, node): filename = node['loc']['literal'] if 'loc' in node else node['literal'] self.sql += f'INTO OUTFILE "{filename}"' @@ -605,6 +640,15 @@ class outfile(ast_node): d = node['term']['literal'] self.sql += f' delimiters \'{d}\', \'{e}\'' + def finalize(self): + filename = self.node['loc']['literal'] if 'loc' in self.node else self.node['literal'] + sep = ',' if 'term' not in self.node else self.node['term']['literal'] + file_pointer = 'fp_' + base62uuid(6) + self.addc(f'FILE* {file_pointer} = fopen("{filename}", "w");') + self.addc(f'{self.parent.outtable_name}->printall("{sep}", "\\n", nullptr, {file_pointer});') + self.addc(f'fclose({file_pointer});') + self.context.ccode += self.ccode + class udf(ast_node): name = 'udf' first_order = name @@ -863,6 +907,7 @@ class udf(ast_node): else: return udf.ReturnPattern.bulk_return + def include(objs): import inspect for _, cls in inspect.getmembers(objs): diff --git a/reconstruct/expr.py b/reconstruct/expr.py index 6c9fcd7..14d7478 100644 --- a/reconstruct/expr.py +++ b/reconstruct/expr.py @@ -57,6 +57,8 @@ class expr(ast_node): if type(c_code) is bool: self.c_code = c_code + self.udf_called = None + self.cols_mentioned : Optional[set[ColRef]] = None ast_node.__init__(self, parent, node, None) def init(self, _): @@ -97,20 +99,25 @@ class expr(ast_node): if key in special_func and not self.is_special: self.is_special = True if key in self.context.udf_map: - self.root.udf = self.context.udf_map[key] - if key == self.root.udf.name: + self.root.udf_called = self.context.udf_map[key] + if self.is_udfexpr and key == self.root.udf.name: self.root.is_recursive_call_inudf = True + # TODO: make udf_called a set! + p = self.parent + while type(p) is expr and not p.udf_called: + p.udf_called = self.udf_called + p = p.parent p = self.parent while type(p) is expr and not p.is_special: p.is_special = True p = p.parent - + need_decltypestr = any([e.need_decltypestr for e in exp_vals]) - if need_decltypestr or (self.udf and type(op) is udf): + if need_decltypestr or (self.udf_called and type(op) is udf): decltypestr_vals = [e.udf_decltypecall for e in exp_vals] self.udf_decltypecall = op(self.c_code, *decltypestr_vals) - if self.udf and type(op) is udf: + if self.udf_called and type(op) is udf: self.udf_decltypecall = op.decltypecall(self.c_code, *decltypestr_vals) elif self.is_udfexpr: @@ -230,10 +237,10 @@ class expr(ast_node): assert(self.is_root) def call(decltypestr = False) -> str: nonlocal c_code, y, materialize_builtin - if self.udf is not None: + if self.udf_called is not None: loc = locals() - builtin_vars = self.udf.builtin_used - for b in self.udf.builtin_var.all: + builtin_vars = self.udf_called.builtin_used + for b in self.udf_called.builtin_var.all: exec(f'loc["{b}"] = lambda: "{{{b}()}}"') if builtin_vars: if type(materialize_builtin) is dict: diff --git a/reconstruct/storage.py b/reconstruct/storage.py index 963a9fa..dde8773 100644 --- a/reconstruct/storage.py +++ b/reconstruct/storage.py @@ -101,7 +101,8 @@ class Context: self.has_dll = False self.dialect = 'MonetDB' self.have_hge = False - + self.Info = lambda *_: None + self.Info = lambda *_: None self.new() diff --git a/Untitled-1.json b/sample_ast.json similarity index 100% rename from Untitled-1.json rename to sample_ast.json diff --git a/sdk/aquery.h b/sdk/aquery.h index 40865e2..5a45a24 100644 --- a/sdk/aquery.h +++ b/sdk/aquery.h @@ -5,8 +5,15 @@ extern void* Aalloc(size_t sz); extern int Afree(void * mem); template -size_t register_memory(T* ptr){ +size_t register_memory(T* ptr, void(dealloc)(void*)){ [](void* m){ auto _m = static_cast(m); delete _m; }; } - +struct Session{ + struct Statistic{ + size_t total_active; + size_t cnt_object; + size_t total_alloc; + }; + void* memory_map; +}; #define EXPORT __DLLEXPORT__ \ No newline at end of file diff --git a/sdk/aquery_mem.cpp b/sdk/aquery_mem.cpp index fd37545..1788b5b 100644 --- a/sdk/aquery_mem.cpp +++ b/sdk/aquery_mem.cpp @@ -2,4 +2,18 @@ #include #include +#include +Session* session; +void* Aalloc(size_t sz){ + void mem = malloc(sz); + auto memmap = (std::unordered_set*) session->memory_map; + memmap->insert(mem); + return mem; +} + +int Afree(void* mem){ + auto memmap = (std::unordered_set*) session->memory_map; + memmap->erase(mem); + return free(mem); +} diff --git a/server/server.cpp b/server/server.cpp index 9a624aa..8071c86 100644 --- a/server/server.cpp +++ b/server/server.cpp @@ -62,7 +62,7 @@ extern "C" int __DLLEXPORT__ binary_info() { } __AQEXPORT__(bool) have_hge(){ -#if defined(_MONETDBE_LIB_) and defined(HAVE_HGE) +#if defined(_MONETDBE_LIB_) and defined(HAVE_HGE) return HAVE_HGE; #else return false; diff --git a/server/table.h b/server/table.h index 8d4fd7d..8c3a929 100644 --- a/server/table.h +++ b/server/table.h @@ -320,7 +320,6 @@ struct TableInfo { std::string printf_string = generate_printf_string::type ...>(sep, end); - std::string header_string = std::string(); constexpr static int a_cols[] = { cols... }; for(int i = 0; i < sizeof...(cols); ++i) diff --git a/server/types.h b/server/types.h index 88656dd..5c55030 100644 --- a/server/types.h +++ b/server/types.h @@ -24,7 +24,7 @@ namespace types { AINT32, AFLOAT, ASTR, ADOUBLE, ALDOUBLE, AINT64, AINT128, AINT16, ADATE, ATIME, AINT8, AUINT32, AUINT64, AUINT128, AUINT16, AUINT8, VECTOR, NONE, ERROR }; - static constexpr const char* printf_str[] = { "%d", "%f", "%s", "%lf", "%llf", "%ld", "%s", "%hi", "%s", "%s", "%c", + static constexpr const char* printf_str[] = { "%d", "%f", "%s", "%lf", "%Lf", "%ld", "%s", "%hi", "%s", "%s", "%c", "%u", "%lu", "%s", "%hu", "%hhu", "Vector<%s>", "NULL", "ERROR" }; // TODO: deal with data/time <=> str/uint conversion struct date_t { @@ -96,13 +96,13 @@ namespace types { #define __Eq(x) (sizeof(T) == sizeof(x)) template struct GetFPTypeImpl { - using type = Cond(__Eq(float), float, Cond(__Eq(double), double, long double)); + using type = Cond(__Eq(float), float, Cond(__Eq(double), double, double)); }; template using GetFPType = typename GetFPTypeImpl::type>::type; template struct GetLongTypeImpl { - using type = Cond(__U(T), ULL_Type, Cond(Fp(T), long double, LL_Type)); + using type = Cond(__U(T), ULL_Type, Cond(Fp(T), double, LL_Type)); }; template using GetLongType = typename GetLongTypeImpl::type>::type; diff --git a/stock.a b/stock.a index 72170d6..27ced26 100644 --- a/stock.a +++ b/stock.a @@ -21,12 +21,12 @@ INSERT INTO stocks VALUES(16,5) -- SELECT max(price-min(timestamp)) FROM stocks /* "q2" */ -SELECT max(price-mins(price)) FROM stocks +-- SELECT max(price-mins(price)) FROM stocks /* "q3"*/ SELECT price, timestamp FROM stocks where price - timestamp > 1 and not (price*timestamp<100) /* "q4"*/ -SELECT max(price-mins(price)) -FROM stocks - ASSUMING DESC timestamp +-- SELECT max(price-mins(price)) +-- FROM stocks +-- ASSUMING DESC timestamp diff --git a/udf.hpp.gch b/udf.hpp.gch deleted file mode 100644 index d469f43..0000000 Binary files a/udf.hpp.gch and /dev/null differ