From 49a3fc0a788a1734e1c6cb9f50dbc5b2faa66804 Mon Sep 17 00:00:00 2001 From: Bill Sun Date: Sun, 16 Jan 2022 06:34:05 -0500 Subject: [PATCH] bug fixes& code cleanups on parser initial code-gen --- .gitignore | 1 + README.md | 8 +- {mo_sql_parsing => aquery_parser}/__init__.py | 9 +- {mo_sql_parsing => aquery_parser}/keywords.py | 2 +- .../sql_parser.py | 23 +- {mo_sql_parsing => aquery_parser}/types.py | 6 +- {mo_sql_parsing => aquery_parser}/utils.py | 0 {mo_sql_parsing => aquery_parser}/windows.py | 4 +- engine/__init__.py | 13 + engine/ast.py | 65 ++ engine/ddl.py | 22 + mo_sql_parsing/formatting.py | 602 ------------------ moving_avg.a | 11 + moving_avg.csv | 6 + run.py | 14 +- stock.a | 22 + 16 files changed, 174 insertions(+), 634 deletions(-) rename {mo_sql_parsing => aquery_parser}/__init__.py (87%) rename {mo_sql_parsing => aquery_parser}/keywords.py (95%) rename {mo_sql_parsing => aquery_parser}/sql_parser.py (94%) rename {mo_sql_parsing => aquery_parser}/types.py (95%) rename {mo_sql_parsing => aquery_parser}/utils.py (100%) rename {mo_sql_parsing => aquery_parser}/windows.py (94%) create mode 100644 engine/__init__.py create mode 100644 engine/ast.py create mode 100644 engine/ddl.py delete mode 100644 mo_sql_parsing/formatting.py create mode 100644 moving_avg.a create mode 100644 moving_avg.csv create mode 100644 stock.a diff --git a/.gitignore b/.gitignore index 9cc2d1d..36eb050 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ vendor/ ._* .DS_Store .eggs +.vscode diff --git a/README.md b/README.md index c8a59ce..3dd55ed 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ November 2021 - There are [over 800 tests](https://app.travis-ci.com/github/klah ## Parsing SQL - >>> from mo_sql_parsing import parse + >>> from aquery_parser import parse >>> parse("select count(1) from jobs") {'select': {'value': {'count': 1}}, 'from': 'jobs'} @@ -65,7 +65,7 @@ SQLServer uses square brackets to delimit identifiers. For example which conflicts with BigQuery array constructor (eg `[1, 2, 3, 4]`). You may use the SqlServer flavour with - from mo_sql_parsing import parse_sqlserver as parse + from aquery_parser import parse_sqlserver as parse #### NULL is None @@ -83,7 +83,7 @@ The default behaviour of the parser is to output function calls in `simple_op` f You can have the parser emit function calls in `normal_op` format - >>> from mo_sql_parsing import parse, normal_op + >>> from aquery_parser import parse, normal_op >>> parse("select trim(' ' from b+c)", calls=normal_op) which produces calls in a normalized format @@ -111,7 +111,7 @@ MySQL uses both double quotes and single quotes to declare literal strings. Thi You may also generate SQL from the a given JSON document. This is done by the formatter, which is in Alpha state (Oct2021). - >>> from mo_sql_parsing import format + >>> from aquery_parser import format >>> format({"from":"test", "select":["a.b", "c"]}) 'SELECT a.b, c FROM test' diff --git a/mo_sql_parsing/__init__.py b/aquery_parser/__init__.py similarity index 87% rename from mo_sql_parsing/__init__.py rename to aquery_parser/__init__.py index eb8cd34..ea4401c 100644 --- a/mo_sql_parsing/__init__.py +++ b/aquery_parser/__init__.py @@ -12,8 +12,8 @@ from __future__ import absolute_import, division, unicode_literals import json from threading import Lock -from mo_sql_parsing.sql_parser import scrub -from mo_sql_parsing.utils import ansi_string, simple_op, normal_op +from aquery_parser.sql_parser import scrub +from aquery_parser.utils import ansi_string, simple_op, normal_op parse_locker = Lock() # ENSURE ONLY ONE PARSING AT A TIME common_parser = None @@ -82,11 +82,6 @@ def _parse(parser, sql, null, calls): return output -def format(json, **kwargs): - from mo_sql_parsing.formatting import Formatter - - return Formatter(**kwargs).dispatch(json) - _ = json.dumps diff --git a/mo_sql_parsing/keywords.py b/aquery_parser/keywords.py similarity index 95% rename from mo_sql_parsing/keywords.py rename to aquery_parser/keywords.py index fd0c3dc..75f3198 100644 --- a/mo_sql_parsing/keywords.py +++ b/aquery_parser/keywords.py @@ -10,7 +10,7 @@ # SQL CONSTANTS from mo_parsing import * -from mo_sql_parsing.utils import SQL_NULL, keyword +from aquery_parser.utils import SQL_NULL, keyword NULL = keyword("null") / (lambda: SQL_NULL) TRUE = keyword("true") / (lambda: True) diff --git a/mo_sql_parsing/sql_parser.py b/aquery_parser/sql_parser.py similarity index 94% rename from mo_sql_parsing/sql_parser.py rename to aquery_parser/sql_parser.py index b7e22e3..e24479b 100644 --- a/mo_sql_parsing/sql_parser.py +++ b/aquery_parser/sql_parser.py @@ -7,17 +7,14 @@ # Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from operator import add -from textwrap import indent -from mo_parsing import whitespaces from mo_parsing.helpers import restOfLine from mo_parsing.infix import delimited_list from mo_parsing.whitespaces import NO_WHITESPACE, Whitespace -from mo_sql_parsing.keywords import * -from mo_sql_parsing.types import get_column_type, time_functions -from mo_sql_parsing.utils import * -from mo_sql_parsing.windows import window +from aquery_parser.keywords import * +from aquery_parser.types import get_column_type, time_functions +from aquery_parser.utils import * +from aquery_parser.windows import window def no_dashes(tokens, start, string): @@ -29,14 +26,14 @@ def no_dashes(tokens, start, string): string, """Ambiguity: Use backticks (``) around identifiers with dashes, or add space around subtraction operator.""", ) - - + + digit = Char("0123456789") simple_ident = ( Char(FIRST_IDENT_CHAR) - + (Regex("(?<=[^ 0-9])\\-(?=[^ 0-9])") | Char(IDENT_CHAR))[...] + + Char(IDENT_CHAR)[...] # let's not support dashes in var_names. ) -simple_ident = Regex(simple_ident.__regex__()[1]) / no_dashes +simple_ident = Regex(simple_ident.__regex__()[1]) def common_parser(): @@ -536,7 +533,7 @@ def parser(literal_string, ident, sqlserver=False): | assign("default charset", EQ + var_name) ) + Optional(AS.suppress() + infix_notation(query, [])("query")) - )("create table") + )("create_table") create_view = ( keyword("create") @@ -547,7 +544,7 @@ def parser(literal_string, ident, sqlserver=False): + var_name("name") + AS + query("query") - )("create view") + )("create_view") # CREATE INDEX a ON u USING btree (e); create_index = ( diff --git a/mo_sql_parsing/types.py b/aquery_parser/types.py similarity index 95% rename from mo_sql_parsing/types.py rename to aquery_parser/types.py index 06c0bb7..acdd428 100644 --- a/mo_sql_parsing/types.py +++ b/aquery_parser/types.py @@ -12,7 +12,7 @@ from mo_parsing import Forward, Group, Optional, MatchFirst, Literal, ZeroOrMore, export from mo_parsing.infix import delimited_list, RIGHT_ASSOC, LEFT_ASSOC -from mo_sql_parsing.keywords import ( +from aquery_parser.keywords import ( RB, LB, NEG, @@ -24,7 +24,7 @@ from mo_sql_parsing.keywords import ( LT, GT, ) -from mo_sql_parsing.utils import ( +from aquery_parser.utils import ( keyword, to_json_call, int_num, @@ -220,4 +220,4 @@ def get_column_type(expr, var_name, literal_string): return column_type, column_definition, column_def_references -export("mo_sql_parsing.utils", unary_ops) +export("aquery_parser.utils", unary_ops) diff --git a/mo_sql_parsing/utils.py b/aquery_parser/utils.py similarity index 100% rename from mo_sql_parsing/utils.py rename to aquery_parser/utils.py diff --git a/mo_sql_parsing/windows.py b/aquery_parser/windows.py similarity index 94% rename from mo_sql_parsing/windows.py rename to aquery_parser/windows.py index 9bf818a..defc2c4 100644 --- a/mo_sql_parsing/windows.py +++ b/aquery_parser/windows.py @@ -11,8 +11,8 @@ from __future__ import absolute_import, division, unicode_literals from mo_parsing.infix import delimited_list -from mo_sql_parsing.keywords import * -from mo_sql_parsing.utils import * +from aquery_parser.keywords import * +from aquery_parser.utils import * # https://docs.microsoft.com/en-us/sql/t-sql/queries/select-over-clause-transact-sql?view=sql-server-ver15 diff --git a/engine/__init__.py b/engine/__init__.py new file mode 100644 index 0000000..f89c760 --- /dev/null +++ b/engine/__init__.py @@ -0,0 +1,13 @@ +from engine.ast import Context, ast_node +import engine.ddl + +def initialize(): + return Context() + +def generate(ast, cxt): + for k in ast.keys(): + if k in ast_node.types.keys(): + root = ast_node.types[k](None, ast, cxt) + + +__all__ = ["generate"] diff --git a/engine/ast.py b/engine/ast.py new file mode 100644 index 0000000..f43af0a --- /dev/null +++ b/engine/ast.py @@ -0,0 +1,65 @@ +from typing import List + + +class TableInfo: + def __init__(self, table_name, cols, cxt:'Context'): + # statics + self.table_name = table_name + self.columns = dict() # column_name, type + for c in cols: + self.columns[c['name']] = ((list(c['type'].keys()))[0], c) + k9name = self.table_name + c['name'] + if k9name in cxt.k9cols_byname: # duplicate names? + root = cxt.k9cols_byname[k9name] + k9name = k9name + root[1] + root[1] += 1 + cxt.k9cols[c] = k9name + cxt.k9cols_byname[k9name] = (c, 1) + # runtime + self.n_cols = 0 # number of cols + self.order = [] # assumptions + + cxt.tables_byname[self.table_name] = self # construct reverse map + + def get_k9colname(self, cxt:'Context', col_name): + return cxt.k9cols[self.columns[col_name][1]] # well, this is gnarly.. will change later + +class Context: + def __init__(self): + self.tables:List[TableInfo] = [] + self.tables_byname = dict() + self.k9cols = dict() + self.k9cols_byname = dict() + + self.k9code = '' + + def add_table(self, table_name, cols): + tbl = TableInfo(table_name, cols, self) + self.tables.append(tbl) + return tbl + + def emit(self, codelet): + self.k9code += codelet + '\n' + + def __str__(self): + return self.k9code + +class ast_node: + types = dict() + def __init__(self, parent:"ast_node", node, context:Context = None): + self.context = parent.context if context is None else context + self.produce(node) + self.enumerate(node) + self.consume(node) + + def emit(self, code): + self.context.emit(code) + + name = 'null' + + def produce(self, _): + pass + def enumerate(self, _): + pass + def consume(self, _): + pass diff --git a/engine/ddl.py b/engine/ddl.py new file mode 100644 index 0000000..32a8113 --- /dev/null +++ b/engine/ddl.py @@ -0,0 +1,22 @@ +from engine.ast import TableInfo, ast_node +class create_table(ast_node): + name = 'create_table' + def produce(self, node): + ct = node[self.name] + tbl = self.context.add_table(ct['name'], ct['columns']) + # create tables in k9 + for c in ct['columns']: + self.emit(f"{tbl.get_k9colname((list(c['name'].keys())))[0]}:()") + +class insert_into(ast_node): + name = 'insert' + def produce(self, node): + ct = node[self.name] + table:TableInfo = self.context.tables_byname[ct] + + +import sys, inspect + +for name, cls in inspect.getmembers(sys.modules[__name__]): + if inspect.isclass(cls) and issubclass(cls, ast_node): + ast_node.types[name] = cls \ No newline at end of file diff --git a/mo_sql_parsing/formatting.py b/mo_sql_parsing/formatting.py deleted file mode 100644 index ed0f208..0000000 --- a/mo_sql_parsing/formatting.py +++ /dev/null @@ -1,602 +0,0 @@ -# encoding: utf-8 -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http://mozilla.org/MPL/2.0/. -# -# Author: Beto Dealmeida (beto@dealmeida.net) -# - -from __future__ import absolute_import, division, unicode_literals - -import re - -from mo_dots import split_field -from mo_future import first, is_text, string_types, text -from mo_parsing import listwrap - -from mo_sql_parsing.keywords import RESERVED, join_keywords, precedence -from mo_sql_parsing.utils import binary_ops, is_set_op - -MAX_PRECEDENCE = 100 -VALID = re.compile(r"^[a-zA-Z_]\w*$") - - -def is_keyword(identifier): - try: - RESERVED.parse_string(identifier) - return True - except Exception: - return False - - -def should_quote(identifier): - """ - Return true if a given identifier should be quoted. - - This is usually true when the identifier: - - - is a reserved word - - contain spaces - - does not match the regex `[a-zA-Z_]\\w*` - - """ - return identifier != "*" and (not VALID.match(identifier) or is_keyword(identifier)) - - -def escape(ident, ansi_quotes, should_quote): - """ - Escape identifiers. - - ANSI uses double quotes, but many databases use back quotes. - - """ - - def esc(identifier): - if not should_quote(identifier): - return identifier - - quote = '"' if ansi_quotes else "`" - identifier = identifier.replace(quote, 2 * quote) - return "{0}{1}{2}".format(quote, identifier, quote) - - return ".".join(esc(f) for f in split_field(ident)) - - -def Operator(_op): - op_prec = precedence[binary_ops[_op]] - op = " {0} ".format(_op).replace("_", " ").upper() - - def func(self, json, prec): - acc = [] - - if isinstance(json, dict): - # {VARIABLE: VALUE} FORM - k, v = first(json.items()) - json = [k, {"literal": v}] - - for i, v in enumerate(listwrap(json)): - if i == 0: - acc.append(self.dispatch(v, op_prec + 0.25)) - else: - acc.append(self.dispatch(v, op_prec)) - if prec >= op_prec: - return op.join(acc) - else: - return f"({op.join(acc)})" - - return func - - -def isolate(expr, sql, prec): - """ - RETURN sql IN PARENTHESIS IF PREEDENCE > prec - :param expr: expression to isolate - :param sql: sql to return - :param prec: current precedence - """ - if is_text(expr): - return sql - ps = [p for k in expr.keys() for p in [precedence.get(k)] if p is not None] - if not ps: - return sql - elif min(ps) >= prec: - return f"({sql})" - else: - return sql - - -unordered_clauses = [ - "with", - "distinct_on", - "select_distinct", - "select", - "from", - "where", - "groupby", - "having", -] - -ordered_clauses = [ - "orderby", - "limit", - "offset", - "fetch", -] - - -class Formatter: - # infix operators - _concat = Operator("||") - _mul = Operator("*") - _div = Operator("/") - _mod = Operator("%") - _add = Operator("+") - _sub = Operator("-") - _neq = Operator("<>") - _gt = Operator(">") - _lt = Operator("<") - _gte = Operator(">=") - _lte = Operator("<=") - _eq = Operator("=") - _or = Operator("or") - _and = Operator("and") - _binary_and = Operator("&") - _binary_or = Operator("|") - _like = Operator("like") - _not_like = Operator("not like") - _rlike = Operator("rlike") - _not_rlike = Operator("not rlike") - _union = Operator("union") - _union_all = Operator("union all") - _intersect = Operator("intersect") - _minus = Operator("minus") - _except = Operator("except") - - def __init__(self, ansi_quotes=True, should_quote=should_quote): - self.ansi_quotes = ansi_quotes - self.should_quote = should_quote - - def format(self, json): - return self.dispatch(json, 50) - - def dispatch(self, json, prec=100): - if isinstance(json, list): - return self.sql_list(json, prec=precedence["list"]) - if isinstance(json, dict): - if len(json) == 0: - return "" - elif "value" in json: - return self.value(json, prec) - elif "join" in json: - return self._join_on(json) - elif "insert" in json: - return self.insert(json) - elif json.keys() & set(ordered_clauses): - return self.ordered_query(json, prec) - elif json.keys() & set(unordered_clauses): - return self.unordered_query(json, prec) - elif "null" in json: - return "NULL" - elif "trim" in json: - return self._trim(json, prec) - elif "extract" in json: - return self._extract(json, prec) - else: - return self.op(json, prec) - if isinstance(json, string_types): - return escape(json, self.ansi_quotes, self.should_quote) - if json == None: - return "NULL" - - return text(json) - - def sql_list(self, json, prec=precedence["from"] - 1): - sql = ", ".join(self.dispatch(element, prec=MAX_PRECEDENCE) for element in json) - if prec >= precedence["from"]: - return sql - else: - return f"({sql})" - - def value(self, json, prec=precedence["from"]): - parts = [self.dispatch(json["value"], prec)] - if "over" in json: - over = json["over"] - parts.append("OVER") - window = [] - if "partitionby" in over: - window.append("PARTITION BY") - window.append(self.dispatch(over["partitionby"])) - if "orderby" in over: - window.append(self.orderby(over, precedence["window"])) - if "range" in over: - - def wordy(v): - if v < 0: - return [text(abs(v)), "PRECEDING"] - elif v > 0: - return [text(v), "FOLLOWING"] - - window.append("ROWS") - range = over["range"] - min = range.get("min") - max = range.get("max") - - if min is None: - if max is None: - window.pop() # not expected, but deal - elif max == 0: - window.append("UNBOUNDED PRECEDING") - else: - window.append("BETWEEN") - window.append("UNBOUNDED PRECEDING") - window.append("AND") - window.extend(wordy(max)) - elif min == 0: - if max is None: - window.append("UNBOUNDED FOLLOWING") - elif max == 0: - window.append("CURRENT ROW") - else: - window.extend(wordy(max)) - else: - if max is None: - window.append("BETWEEN") - window.extend(wordy(min)) - window.append("AND") - window.append("UNBOUNDED FOLLOWING") - elif max == 0: - window.extend(wordy(min)) - else: - window.append("BETWEEN") - window.extend(wordy(min)) - window.append("AND") - window.extend(wordy(max)) - - window = " ".join(window) - parts.append(f"({window})") - if "name" in json: - parts.extend(["AS", self.dispatch(json["name"])]) - - return " ".join(parts) - - def op(self, json, prec): - if len(json) > 1: - raise Exception("Operators should have only one key!") - key, value = list(json.items())[0] - - # check if the attribute exists, and call the corresponding method; - # note that we disallow keys that start with `_` to avoid giving access - # to magic methods - attr = f"_{key}" - if hasattr(self, attr) and not key.startswith("_"): - method = getattr(self, attr) - op_prec = precedence.get(key, MAX_PRECEDENCE) - if prec >= op_prec: - return method(value, op_prec) - else: - return f"({method(value, op_prec)})" - - # treat as regular function call - if isinstance(value, dict) and len(value) == 0: - return ( - key.upper() + "()" - ) # NOT SURE IF AN EMPTY dict SHOULD BE DELT WITH HERE, OR IN self.format() - else: - params = ", ".join(self.dispatch(p) for p in listwrap(value)) - return f"{key.upper()}({params})" - - def _binary_not(self, value, prec): - return "~{0}".format(self.dispatch(value)) - - def _exists(self, value, prec): - return "{0} IS NOT NULL".format(self.dispatch(value, precedence["is"])) - - def _missing(self, value, prec): - return "{0} IS NULL".format(self.dispatch(value, precedence["is"])) - - def _collate(self, pair, prec): - return "{0} COLLATE {1}".format( - self.dispatch(pair[0], precedence["collate"]), pair[1] - ) - - def _in(self, json, prec): - member, set = json - if "literal" in set: - set = {"literal": listwrap(set["literal"])} - sql = ( - self.dispatch(member, precedence["in"]) - + " IN " - + self.dispatch(set, precedence["in"]) - ) - if prec < precedence["in"]: - sql = f"({sql})" - return sql - - def _nin(self, json, prec): - member, set = json - if "literal" in set: - set = {"literal": listwrap(set["literal"])} - sql = ( - self.dispatch(member, precedence["in"]) - + " NOT IN " - + self.dispatch(set, precedence["in"]) - ) - if prec < precedence["in"]: - sql = f"({sql})" - return sql - - def _case(self, checks, prec): - parts = ["CASE"] - for check in checks if isinstance(checks, list) else [checks]: - if isinstance(check, dict): - if "when" in check and "then" in check: - parts.extend(["WHEN", self.dispatch(check["when"])]) - parts.extend(["THEN", self.dispatch(check["then"])]) - else: - parts.extend(["ELSE", self.dispatch(check)]) - else: - parts.extend(["ELSE", self.dispatch(check)]) - parts.append("END") - return " ".join(parts) - - def _cast(self, json, prec): - expr, type = json - - type_name, params = first(type.items()) - if not params: - type = type_name.upper() - else: - type = {type_name.upper(): params} - - return f"CAST({self.dispatch(expr)} AS {self.dispatch(type)})" - - def _extract(self, json, prec): - interval, value = json["extract"] - i = self.dispatch(interval).upper() - v = self.dispatch(value) - return f"EXTRACT({i} FROM {v})" - - def _interval(self, json, prec): - amount = self.dispatch(json[0], precedence["and"]) - type = self.dispatch(json[1], precedence["and"]) - return f"INTERVAL {amount} {type.upper()}" - - def _literal(self, json, prec=0): - if isinstance(json, list): - return "({0})".format(", ".join( - self._literal(v, precedence["literal"]) for v in json - )) - elif isinstance(json, string_types): - return "'{0}'".format(json.replace("'", "''")) - else: - return str(json) - - def _get(self, json, prec): - v, i = json - v_sql = self.dispatch(v, prec=precedence["literal"]) - i_sql = self.dispatch(i) - return f"{v_sql}[{i_sql}]" - - def _between(self, json, prec): - return "{0} BETWEEN {1} AND {2}".format( - self.dispatch(json[0], precedence["between"]), - self.dispatch(json[1], precedence["between"]), - self.dispatch(json[2], precedence["between"]), - ) - - def _trim(self, json, prec): - c = json.get("characters") - d = json.get("direction") - v = json["trim"] - acc = ["TRIM("] - if d: - acc.append(d.upper()) - acc.append(" ") - if c: - acc.append(self.dispatch(c)) - acc.append(" ") - if c or d: - acc.append("FROM ") - acc.append(self.dispatch(v)) - acc.append(")") - return "".join(acc) - - def _not_between(self, json, prec): - return "{0} NOT BETWEEN {1} AND {2}".format( - self.dispatch(json[0], precedence["between"]), - self.dispatch(json[1], precedence["between"]), - self.dispatch(json[2], precedence["between"]), - ) - - def _distinct(self, json, prec): - return "DISTINCT " + ", ".join( - self.dispatch(v, precedence["select"]) for v in listwrap(json) - ) - - def _select_distinct(self, json, prec): - return "SELECT DISTINCT " + ", ".join(self.dispatch(v) for v in listwrap(json)) - - def _distinct_on(self, json, prec): - return ( - "DISTINCT ON (" + ", ".join(self.dispatch(v) for v in listwrap(json)) + ")" - ) - - def _join_on(self, json, prec): - detected_join = join_keywords & set(json.keys()) - if len(detected_join) == 0: - raise Exception( - 'Fail to detect join type! Detected: "{}" Except one of: "{}"'.format( - [on_keyword for on_keyword in json if on_keyword != "on"][0], - '", "'.join(join_keywords), - ) - ) - - join_keyword = detected_join.pop() - - acc = [] - acc.append(join_keyword.upper()) - acc.append(self.dispatch(json[join_keyword], precedence["join"])) - - if json.get("on"): - acc.append("ON") - acc.append(self.dispatch(json["on"])) - if json.get("using"): - acc.append("USING") - acc.append(self.dispatch(json["using"])) - return " ".join(acc) - - def ordered_query(self, json, prec): - if json.keys() & set(unordered_clauses) - {"from"}: - # regular query - acc = [self.unordered_query(json, precedence["order"])] - else: - # set-op expression - acc = [self.dispatch(json["from"], precedence["order"])] - - acc.extend( - part - for clause in ordered_clauses - if clause in json - for part in [getattr(self, clause)(json, precedence["order"])] - if part - ) - sql = " ".join(acc) - if prec >= precedence["order"]: - return sql - else: - return f"({sql})" - - def unordered_query(self, json, prec): - sql = " ".join( - part - for clause in unordered_clauses - if clause in json - for part in [getattr(self, clause)(json, precedence["from"])] - if part - ) - if prec >= precedence["from"]: - return sql - else: - return f"({sql})" - - def with_(self, json, prec): - if "with" in json: - with_ = json["with"] - if not isinstance(with_, list): - with_ = [with_] - parts = ", ".join( - "{0} AS ({1})".format(part["name"], self.dispatch(part["value"])) - for part in with_ - ) - return "WITH {0}".format(parts) - - def select(self, json, prec): - param = ", ".join(self.dispatch(s) for s in listwrap(json["select"])) - if "top" in json: - top = self.dispatch(json["top"]) - return f"SELECT TOP ({top}) {param}" - if "distinct_on" in json: - return param - else: - return f"SELECT {param}" - - def distinct_on(self, json, prec): - param = ", ".join(self.dispatch(s) for s in listwrap(json["distinct_on"])) - return f"SELECT DISTINCT ON ({param})" - - def select_distinct(self, json, prec): - param = ", ".join(self.dispatch(s) for s in listwrap(json["select_distinct"])) - return f"SELECT DISTINCT {param}" - - def from_(self, json, prec): - is_join = False - from_ = json["from"] - if isinstance(from_, dict) and is_set_op & from_.keys(): - source = self.op(from_, precedence["from"]) - return f"FROM {source}" - - from_ = listwrap(from_) - parts = [] - for v in from_: - if join_keywords & set(v): - is_join = True - parts.append(self._join_on(v, precedence["from"] - 1)) - else: - parts.append(self.dispatch(v, precedence["from"] - 1)) - joiner = " " if is_join else ", " - rest = joiner.join(parts) - return f"FROM {rest}" - - def where(self, json, prec): - expr = self.dispatch(json["where"]) - return f"WHERE {expr}" - - def groupby(self, json, prec): - param = ", ".join(self.dispatch(s) for s in listwrap(json["groupby"])) - return f"GROUP BY {param}" - - def having(self, json, prec): - return "HAVING {0}".format(self.dispatch(json["having"])) - - def orderby(self, json, prec): - param = ", ".join( - ( - self.dispatch(s["value"], precedence["order"]) - + " " - + s.get("sort", "").upper() - ).strip() - for s in listwrap(json["orderby"]) - ) - return f"ORDER BY {param}" - - def limit(self, json, prec): - num = self.dispatch(json["limit"], precedence["order"]) - return f"LIMIT {num}" - - def offset(self, json, prec): - num = self.dispatch(json["offset"], precedence["order"]) - return f"OFFSET {num}" - - def fetch(self, json, prec): - num = self.dispatch(json["offset"], precedence["order"]) - return f"FETCH {num} ROWS ONLY" - - def insert(self, json, prec=precedence["from"]): - acc = ["INSERT"] - if "overwrite" in json: - acc.append("OVERWRITE") - else: - acc.append("INTO") - acc.append(json["insert"]) - - if "columns" in json: - acc.append(self.sql_list(json)) - if "values" in json: - values = json["values"] - if all(isinstance(row, dict) for row in values): - columns = list(sorted(set(k for row in values for k in row.keys()))) - acc.append(self.sql_list(columns)) - if "if exists" in json: - acc.append("IF EXISTS") - acc.append("VALUES") - acc.append(",\n".join( - "(" + ", ".join(self._literal(row[c]) for c in columns) + ")" - for row in values - )) - else: - if "if exists" in json: - acc.append("IF EXISTS") - acc.append("VALUES") - for row in values: - acc.append("(" + ", ".join(self._literal(row)) + ")") - - else: - if json["if exists"]: - acc.append("IF EXISTS") - acc.append(self.dispatch(json["query"])) - return " ".join(acc) - - -setattr(Formatter, "with", Formatter.with_) -setattr(Formatter, "from", Formatter.from_) diff --git a/moving_avg.a b/moving_avg.a new file mode 100644 index 0000000..b38d135 --- /dev/null +++ b/moving_avg.a @@ -0,0 +1,11 @@ +CREATE TABLE sale(Month INT, sales INT) + +LOAD DATA INFILE "moving_avg.csv" +INTO TABLE sale +FIELDS TERMINATED BY "\t" + +SELECT Month,avgs(3,sales) +FROM sale + ASSUMING ASC Month +INTO OUTFILE "moving_avg_output.csv" +FIELDS TERMINATED BY "," diff --git a/moving_avg.csv b/moving_avg.csv new file mode 100644 index 0000000..e304cc2 --- /dev/null +++ b/moving_avg.csv @@ -0,0 +1,6 @@ +Month sales +1 100 +2 120 +4 140 +3 140 +5 130 diff --git a/run.py b/run.py index 67d26cf..cff956d 100644 --- a/run.py +++ b/run.py @@ -1,6 +1,10 @@ import re -import mo_sql_parsing as parser +import aquery_parser as parser +import engine +test_parser = True + +# code to test parser ws = re.compile(r'\s+') q = 'SELECT p.Name, v.Name FROM Production.Product p JOIN Purchasing.ProductVendor pv ON p.ProductID = pv.ProductID JOIN Purchasing.Vendor v ON pv.BusinessEntityID = v.BusinessEntityID WHERE ProductSubcategoryID = 15 ORDER BY v.Name;' @@ -9,9 +13,11 @@ res = parser.parse(q) print(res) -while True: +while test_parser: try: q = input() + if q == 'break': + break trimed = ws.sub(' ', q.lower()).split(' ') if trimed[0] == 'file': fn = 'q.sql' if len(trimed) <= 1 or len(trimed[1]) == 0 \ @@ -25,3 +31,7 @@ while True: print(stmts) except Exception as e: print(type(e), e) + +cxt = engine.initialize() +for s in stmts['stmts']: + engine.generate(s, cxt) diff --git a/stock.a b/stock.a new file mode 100644 index 0000000..d847d8c --- /dev/null +++ b/stock.a @@ -0,0 +1,22 @@ +CREATE TABLE stocks(timestamp INT, price INT) + +INSERT INTO stocks VALUES(1,15) +INSERT INTO stocks VALUES(2,19) +INSERT INTO stocks VALUES(3,16) +INSERT INTO stocks VALUES(4,17) +INSERT INTO stocks VALUES(5,15) +INSERT INTO stocks VALUES(6,13) +INSERT INTO stocks VALUES(7,5) +INSERT INTO stocks VALUES(8,8) +INSERT INTO stocks VALUES(9,7) +INSERT INTO stocks VALUES(10,13) +INSERT INTO stocks VALUES(11,11) +INSERT INTO stocks VALUES(12,14) +INSERT INTO stocks VALUES(13,10) +INSERT INTO stocks VALUES(14,5) +INSERT INTO stocks VALUES(15,2) +INSERT INTO stocks VALUES(16,5) + +SELECT max(price-mins(price)) +FROM stocks + ASSUMING ASC timestamp