bug fixes& code cleanups on parser

initial code-gen
dev
Bill Sun 3 years ago
parent 90712aff7d
commit 49a3fc0a78

1
.gitignore vendored

@ -13,3 +13,4 @@ vendor/
._* ._*
.DS_Store .DS_Store
.eggs .eggs
.vscode

@ -40,7 +40,7 @@ November 2021 - There are [over 800 tests](https://app.travis-ci.com/github/klah
## Parsing SQL ## Parsing SQL
>>> from mo_sql_parsing import parse >>> from aquery_parser import parse
>>> parse("select count(1) from jobs") >>> parse("select count(1) from jobs")
{'select': {'value': {'count': 1}}, 'from': 'jobs'} {'select': {'value': {'count': 1}}, 'from': 'jobs'}
@ -65,7 +65,7 @@ SQLServer uses square brackets to delimit identifiers. For example
which conflicts with BigQuery array constructor (eg `[1, 2, 3, 4]`). You may use the SqlServer flavour with which conflicts with BigQuery array constructor (eg `[1, 2, 3, 4]`). You may use the SqlServer flavour with
from mo_sql_parsing import parse_sqlserver as parse from aquery_parser import parse_sqlserver as parse
#### NULL is None #### NULL is None
@ -83,7 +83,7 @@ The default behaviour of the parser is to output function calls in `simple_op` f
You can have the parser emit function calls in `normal_op` format You can have the parser emit function calls in `normal_op` format
>>> from mo_sql_parsing import parse, normal_op >>> from aquery_parser import parse, normal_op
>>> parse("select trim(' ' from b+c)", calls=normal_op) >>> parse("select trim(' ' from b+c)", calls=normal_op)
which produces calls in a normalized format which produces calls in a normalized format
@ -111,7 +111,7 @@ MySQL uses both double quotes and single quotes to declare literal strings. Thi
You may also generate SQL from the a given JSON document. This is done by the formatter, which is in Alpha state (Oct2021). You may also generate SQL from the a given JSON document. This is done by the formatter, which is in Alpha state (Oct2021).
>>> from mo_sql_parsing import format >>> from aquery_parser import format
>>> format({"from":"test", "select":["a.b", "c"]}) >>> format({"from":"test", "select":["a.b", "c"]})
'SELECT a.b, c FROM test' 'SELECT a.b, c FROM test'

@ -12,8 +12,8 @@ from __future__ import absolute_import, division, unicode_literals
import json import json
from threading import Lock from threading import Lock
from mo_sql_parsing.sql_parser import scrub from aquery_parser.sql_parser import scrub
from mo_sql_parsing.utils import ansi_string, simple_op, normal_op from aquery_parser.utils import ansi_string, simple_op, normal_op
parse_locker = Lock() # ENSURE ONLY ONE PARSING AT A TIME parse_locker = Lock() # ENSURE ONLY ONE PARSING AT A TIME
common_parser = None common_parser = None
@ -82,11 +82,6 @@ def _parse(parser, sql, null, calls):
return output return output
def format(json, **kwargs):
from mo_sql_parsing.formatting import Formatter
return Formatter(**kwargs).dispatch(json)
_ = json.dumps _ = json.dumps

@ -10,7 +10,7 @@
# SQL CONSTANTS # SQL CONSTANTS
from mo_parsing import * from mo_parsing import *
from mo_sql_parsing.utils import SQL_NULL, keyword from aquery_parser.utils import SQL_NULL, keyword
NULL = keyword("null") / (lambda: SQL_NULL) NULL = keyword("null") / (lambda: SQL_NULL)
TRUE = keyword("true") / (lambda: True) TRUE = keyword("true") / (lambda: True)

@ -7,17 +7,14 @@
# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # Contact: Kyle Lahnakoski (kyle@lahnakoski.com)
# #
from operator import add
from textwrap import indent
from mo_parsing import whitespaces
from mo_parsing.helpers import restOfLine from mo_parsing.helpers import restOfLine
from mo_parsing.infix import delimited_list from mo_parsing.infix import delimited_list
from mo_parsing.whitespaces import NO_WHITESPACE, Whitespace from mo_parsing.whitespaces import NO_WHITESPACE, Whitespace
from mo_sql_parsing.keywords import * from aquery_parser.keywords import *
from mo_sql_parsing.types import get_column_type, time_functions from aquery_parser.types import get_column_type, time_functions
from mo_sql_parsing.utils import * from aquery_parser.utils import *
from mo_sql_parsing.windows import window from aquery_parser.windows import window
def no_dashes(tokens, start, string): def no_dashes(tokens, start, string):
@ -34,9 +31,9 @@ def no_dashes(tokens, start, string):
digit = Char("0123456789") digit = Char("0123456789")
simple_ident = ( simple_ident = (
Char(FIRST_IDENT_CHAR) Char(FIRST_IDENT_CHAR)
+ (Regex("(?<=[^ 0-9])\\-(?=[^ 0-9])") | Char(IDENT_CHAR))[...] + Char(IDENT_CHAR)[...] # let's not support dashes in var_names.
) )
simple_ident = Regex(simple_ident.__regex__()[1]) / no_dashes simple_ident = Regex(simple_ident.__regex__()[1])
def common_parser(): def common_parser():
@ -536,7 +533,7 @@ def parser(literal_string, ident, sqlserver=False):
| assign("default charset", EQ + var_name) | assign("default charset", EQ + var_name)
) )
+ Optional(AS.suppress() + infix_notation(query, [])("query")) + Optional(AS.suppress() + infix_notation(query, [])("query"))
)("create table") )("create_table")
create_view = ( create_view = (
keyword("create") keyword("create")
@ -547,7 +544,7 @@ def parser(literal_string, ident, sqlserver=False):
+ var_name("name") + var_name("name")
+ AS + AS
+ query("query") + query("query")
)("create view") )("create_view")
# CREATE INDEX a ON u USING btree (e); # CREATE INDEX a ON u USING btree (e);
create_index = ( create_index = (

@ -12,7 +12,7 @@
from mo_parsing import Forward, Group, Optional, MatchFirst, Literal, ZeroOrMore, export from mo_parsing import Forward, Group, Optional, MatchFirst, Literal, ZeroOrMore, export
from mo_parsing.infix import delimited_list, RIGHT_ASSOC, LEFT_ASSOC from mo_parsing.infix import delimited_list, RIGHT_ASSOC, LEFT_ASSOC
from mo_sql_parsing.keywords import ( from aquery_parser.keywords import (
RB, RB,
LB, LB,
NEG, NEG,
@ -24,7 +24,7 @@ from mo_sql_parsing.keywords import (
LT, LT,
GT, GT,
) )
from mo_sql_parsing.utils import ( from aquery_parser.utils import (
keyword, keyword,
to_json_call, to_json_call,
int_num, int_num,
@ -220,4 +220,4 @@ def get_column_type(expr, var_name, literal_string):
return column_type, column_definition, column_def_references return column_type, column_definition, column_def_references
export("mo_sql_parsing.utils", unary_ops) export("aquery_parser.utils", unary_ops)

@ -11,8 +11,8 @@ from __future__ import absolute_import, division, unicode_literals
from mo_parsing.infix import delimited_list from mo_parsing.infix import delimited_list
from mo_sql_parsing.keywords import * from aquery_parser.keywords import *
from mo_sql_parsing.utils import * from aquery_parser.utils import *
# https://docs.microsoft.com/en-us/sql/t-sql/queries/select-over-clause-transact-sql?view=sql-server-ver15 # https://docs.microsoft.com/en-us/sql/t-sql/queries/select-over-clause-transact-sql?view=sql-server-ver15

@ -0,0 +1,13 @@
from engine.ast import Context, ast_node
import engine.ddl
def initialize():
return Context()
def generate(ast, cxt):
for k in ast.keys():
if k in ast_node.types.keys():
root = ast_node.types[k](None, ast, cxt)
__all__ = ["generate"]

@ -0,0 +1,65 @@
from typing import List
class TableInfo:
def __init__(self, table_name, cols, cxt:'Context'):
# statics
self.table_name = table_name
self.columns = dict() # column_name, type
for c in cols:
self.columns[c['name']] = ((list(c['type'].keys()))[0], c)
k9name = self.table_name + c['name']
if k9name in cxt.k9cols_byname: # duplicate names?
root = cxt.k9cols_byname[k9name]
k9name = k9name + root[1]
root[1] += 1
cxt.k9cols[c] = k9name
cxt.k9cols_byname[k9name] = (c, 1)
# runtime
self.n_cols = 0 # number of cols
self.order = [] # assumptions
cxt.tables_byname[self.table_name] = self # construct reverse map
def get_k9colname(self, cxt:'Context', col_name):
return cxt.k9cols[self.columns[col_name][1]] # well, this is gnarly.. will change later
class Context:
def __init__(self):
self.tables:List[TableInfo] = []
self.tables_byname = dict()
self.k9cols = dict()
self.k9cols_byname = dict()
self.k9code = ''
def add_table(self, table_name, cols):
tbl = TableInfo(table_name, cols, self)
self.tables.append(tbl)
return tbl
def emit(self, codelet):
self.k9code += codelet + '\n'
def __str__(self):
return self.k9code
class ast_node:
types = dict()
def __init__(self, parent:"ast_node", node, context:Context = None):
self.context = parent.context if context is None else context
self.produce(node)
self.enumerate(node)
self.consume(node)
def emit(self, code):
self.context.emit(code)
name = 'null'
def produce(self, _):
pass
def enumerate(self, _):
pass
def consume(self, _):
pass

@ -0,0 +1,22 @@
from engine.ast import TableInfo, ast_node
class create_table(ast_node):
name = 'create_table'
def produce(self, node):
ct = node[self.name]
tbl = self.context.add_table(ct['name'], ct['columns'])
# create tables in k9
for c in ct['columns']:
self.emit(f"{tbl.get_k9colname((list(c['name'].keys())))[0]}:()")
class insert_into(ast_node):
name = 'insert'
def produce(self, node):
ct = node[self.name]
table:TableInfo = self.context.tables_byname[ct]
import sys, inspect
for name, cls in inspect.getmembers(sys.modules[__name__]):
if inspect.isclass(cls) and issubclass(cls, ast_node):
ast_node.types[name] = cls

@ -1,602 +0,0 @@
# encoding: utf-8
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Author: Beto Dealmeida (beto@dealmeida.net)
#
from __future__ import absolute_import, division, unicode_literals
import re
from mo_dots import split_field
from mo_future import first, is_text, string_types, text
from mo_parsing import listwrap
from mo_sql_parsing.keywords import RESERVED, join_keywords, precedence
from mo_sql_parsing.utils import binary_ops, is_set_op
MAX_PRECEDENCE = 100
VALID = re.compile(r"^[a-zA-Z_]\w*$")
def is_keyword(identifier):
try:
RESERVED.parse_string(identifier)
return True
except Exception:
return False
def should_quote(identifier):
"""
Return true if a given identifier should be quoted.
This is usually true when the identifier:
- is a reserved word
- contain spaces
- does not match the regex `[a-zA-Z_]\\w*`
"""
return identifier != "*" and (not VALID.match(identifier) or is_keyword(identifier))
def escape(ident, ansi_quotes, should_quote):
"""
Escape identifiers.
ANSI uses double quotes, but many databases use back quotes.
"""
def esc(identifier):
if not should_quote(identifier):
return identifier
quote = '"' if ansi_quotes else "`"
identifier = identifier.replace(quote, 2 * quote)
return "{0}{1}{2}".format(quote, identifier, quote)
return ".".join(esc(f) for f in split_field(ident))
def Operator(_op):
op_prec = precedence[binary_ops[_op]]
op = " {0} ".format(_op).replace("_", " ").upper()
def func(self, json, prec):
acc = []
if isinstance(json, dict):
# {VARIABLE: VALUE} FORM
k, v = first(json.items())
json = [k, {"literal": v}]
for i, v in enumerate(listwrap(json)):
if i == 0:
acc.append(self.dispatch(v, op_prec + 0.25))
else:
acc.append(self.dispatch(v, op_prec))
if prec >= op_prec:
return op.join(acc)
else:
return f"({op.join(acc)})"
return func
def isolate(expr, sql, prec):
"""
RETURN sql IN PARENTHESIS IF PREEDENCE > prec
:param expr: expression to isolate
:param sql: sql to return
:param prec: current precedence
"""
if is_text(expr):
return sql
ps = [p for k in expr.keys() for p in [precedence.get(k)] if p is not None]
if not ps:
return sql
elif min(ps) >= prec:
return f"({sql})"
else:
return sql
unordered_clauses = [
"with",
"distinct_on",
"select_distinct",
"select",
"from",
"where",
"groupby",
"having",
]
ordered_clauses = [
"orderby",
"limit",
"offset",
"fetch",
]
class Formatter:
# infix operators
_concat = Operator("||")
_mul = Operator("*")
_div = Operator("/")
_mod = Operator("%")
_add = Operator("+")
_sub = Operator("-")
_neq = Operator("<>")
_gt = Operator(">")
_lt = Operator("<")
_gte = Operator(">=")
_lte = Operator("<=")
_eq = Operator("=")
_or = Operator("or")
_and = Operator("and")
_binary_and = Operator("&")
_binary_or = Operator("|")
_like = Operator("like")
_not_like = Operator("not like")
_rlike = Operator("rlike")
_not_rlike = Operator("not rlike")
_union = Operator("union")
_union_all = Operator("union all")
_intersect = Operator("intersect")
_minus = Operator("minus")
_except = Operator("except")
def __init__(self, ansi_quotes=True, should_quote=should_quote):
self.ansi_quotes = ansi_quotes
self.should_quote = should_quote
def format(self, json):
return self.dispatch(json, 50)
def dispatch(self, json, prec=100):
if isinstance(json, list):
return self.sql_list(json, prec=precedence["list"])
if isinstance(json, dict):
if len(json) == 0:
return ""
elif "value" in json:
return self.value(json, prec)
elif "join" in json:
return self._join_on(json)
elif "insert" in json:
return self.insert(json)
elif json.keys() & set(ordered_clauses):
return self.ordered_query(json, prec)
elif json.keys() & set(unordered_clauses):
return self.unordered_query(json, prec)
elif "null" in json:
return "NULL"
elif "trim" in json:
return self._trim(json, prec)
elif "extract" in json:
return self._extract(json, prec)
else:
return self.op(json, prec)
if isinstance(json, string_types):
return escape(json, self.ansi_quotes, self.should_quote)
if json == None:
return "NULL"
return text(json)
def sql_list(self, json, prec=precedence["from"] - 1):
sql = ", ".join(self.dispatch(element, prec=MAX_PRECEDENCE) for element in json)
if prec >= precedence["from"]:
return sql
else:
return f"({sql})"
def value(self, json, prec=precedence["from"]):
parts = [self.dispatch(json["value"], prec)]
if "over" in json:
over = json["over"]
parts.append("OVER")
window = []
if "partitionby" in over:
window.append("PARTITION BY")
window.append(self.dispatch(over["partitionby"]))
if "orderby" in over:
window.append(self.orderby(over, precedence["window"]))
if "range" in over:
def wordy(v):
if v < 0:
return [text(abs(v)), "PRECEDING"]
elif v > 0:
return [text(v), "FOLLOWING"]
window.append("ROWS")
range = over["range"]
min = range.get("min")
max = range.get("max")
if min is None:
if max is None:
window.pop() # not expected, but deal
elif max == 0:
window.append("UNBOUNDED PRECEDING")
else:
window.append("BETWEEN")
window.append("UNBOUNDED PRECEDING")
window.append("AND")
window.extend(wordy(max))
elif min == 0:
if max is None:
window.append("UNBOUNDED FOLLOWING")
elif max == 0:
window.append("CURRENT ROW")
else:
window.extend(wordy(max))
else:
if max is None:
window.append("BETWEEN")
window.extend(wordy(min))
window.append("AND")
window.append("UNBOUNDED FOLLOWING")
elif max == 0:
window.extend(wordy(min))
else:
window.append("BETWEEN")
window.extend(wordy(min))
window.append("AND")
window.extend(wordy(max))
window = " ".join(window)
parts.append(f"({window})")
if "name" in json:
parts.extend(["AS", self.dispatch(json["name"])])
return " ".join(parts)
def op(self, json, prec):
if len(json) > 1:
raise Exception("Operators should have only one key!")
key, value = list(json.items())[0]
# check if the attribute exists, and call the corresponding method;
# note that we disallow keys that start with `_` to avoid giving access
# to magic methods
attr = f"_{key}"
if hasattr(self, attr) and not key.startswith("_"):
method = getattr(self, attr)
op_prec = precedence.get(key, MAX_PRECEDENCE)
if prec >= op_prec:
return method(value, op_prec)
else:
return f"({method(value, op_prec)})"
# treat as regular function call
if isinstance(value, dict) and len(value) == 0:
return (
key.upper() + "()"
) # NOT SURE IF AN EMPTY dict SHOULD BE DELT WITH HERE, OR IN self.format()
else:
params = ", ".join(self.dispatch(p) for p in listwrap(value))
return f"{key.upper()}({params})"
def _binary_not(self, value, prec):
return "~{0}".format(self.dispatch(value))
def _exists(self, value, prec):
return "{0} IS NOT NULL".format(self.dispatch(value, precedence["is"]))
def _missing(self, value, prec):
return "{0} IS NULL".format(self.dispatch(value, precedence["is"]))
def _collate(self, pair, prec):
return "{0} COLLATE {1}".format(
self.dispatch(pair[0], precedence["collate"]), pair[1]
)
def _in(self, json, prec):
member, set = json
if "literal" in set:
set = {"literal": listwrap(set["literal"])}
sql = (
self.dispatch(member, precedence["in"])
+ " IN "
+ self.dispatch(set, precedence["in"])
)
if prec < precedence["in"]:
sql = f"({sql})"
return sql
def _nin(self, json, prec):
member, set = json
if "literal" in set:
set = {"literal": listwrap(set["literal"])}
sql = (
self.dispatch(member, precedence["in"])
+ " NOT IN "
+ self.dispatch(set, precedence["in"])
)
if prec < precedence["in"]:
sql = f"({sql})"
return sql
def _case(self, checks, prec):
parts = ["CASE"]
for check in checks if isinstance(checks, list) else [checks]:
if isinstance(check, dict):
if "when" in check and "then" in check:
parts.extend(["WHEN", self.dispatch(check["when"])])
parts.extend(["THEN", self.dispatch(check["then"])])
else:
parts.extend(["ELSE", self.dispatch(check)])
else:
parts.extend(["ELSE", self.dispatch(check)])
parts.append("END")
return " ".join(parts)
def _cast(self, json, prec):
expr, type = json
type_name, params = first(type.items())
if not params:
type = type_name.upper()
else:
type = {type_name.upper(): params}
return f"CAST({self.dispatch(expr)} AS {self.dispatch(type)})"
def _extract(self, json, prec):
interval, value = json["extract"]
i = self.dispatch(interval).upper()
v = self.dispatch(value)
return f"EXTRACT({i} FROM {v})"
def _interval(self, json, prec):
amount = self.dispatch(json[0], precedence["and"])
type = self.dispatch(json[1], precedence["and"])
return f"INTERVAL {amount} {type.upper()}"
def _literal(self, json, prec=0):
if isinstance(json, list):
return "({0})".format(", ".join(
self._literal(v, precedence["literal"]) for v in json
))
elif isinstance(json, string_types):
return "'{0}'".format(json.replace("'", "''"))
else:
return str(json)
def _get(self, json, prec):
v, i = json
v_sql = self.dispatch(v, prec=precedence["literal"])
i_sql = self.dispatch(i)
return f"{v_sql}[{i_sql}]"
def _between(self, json, prec):
return "{0} BETWEEN {1} AND {2}".format(
self.dispatch(json[0], precedence["between"]),
self.dispatch(json[1], precedence["between"]),
self.dispatch(json[2], precedence["between"]),
)
def _trim(self, json, prec):
c = json.get("characters")
d = json.get("direction")
v = json["trim"]
acc = ["TRIM("]
if d:
acc.append(d.upper())
acc.append(" ")
if c:
acc.append(self.dispatch(c))
acc.append(" ")
if c or d:
acc.append("FROM ")
acc.append(self.dispatch(v))
acc.append(")")
return "".join(acc)
def _not_between(self, json, prec):
return "{0} NOT BETWEEN {1} AND {2}".format(
self.dispatch(json[0], precedence["between"]),
self.dispatch(json[1], precedence["between"]),
self.dispatch(json[2], precedence["between"]),
)
def _distinct(self, json, prec):
return "DISTINCT " + ", ".join(
self.dispatch(v, precedence["select"]) for v in listwrap(json)
)
def _select_distinct(self, json, prec):
return "SELECT DISTINCT " + ", ".join(self.dispatch(v) for v in listwrap(json))
def _distinct_on(self, json, prec):
return (
"DISTINCT ON (" + ", ".join(self.dispatch(v) for v in listwrap(json)) + ")"
)
def _join_on(self, json, prec):
detected_join = join_keywords & set(json.keys())
if len(detected_join) == 0:
raise Exception(
'Fail to detect join type! Detected: "{}" Except one of: "{}"'.format(
[on_keyword for on_keyword in json if on_keyword != "on"][0],
'", "'.join(join_keywords),
)
)
join_keyword = detected_join.pop()
acc = []
acc.append(join_keyword.upper())
acc.append(self.dispatch(json[join_keyword], precedence["join"]))
if json.get("on"):
acc.append("ON")
acc.append(self.dispatch(json["on"]))
if json.get("using"):
acc.append("USING")
acc.append(self.dispatch(json["using"]))
return " ".join(acc)
def ordered_query(self, json, prec):
if json.keys() & set(unordered_clauses) - {"from"}:
# regular query
acc = [self.unordered_query(json, precedence["order"])]
else:
# set-op expression
acc = [self.dispatch(json["from"], precedence["order"])]
acc.extend(
part
for clause in ordered_clauses
if clause in json
for part in [getattr(self, clause)(json, precedence["order"])]
if part
)
sql = " ".join(acc)
if prec >= precedence["order"]:
return sql
else:
return f"({sql})"
def unordered_query(self, json, prec):
sql = " ".join(
part
for clause in unordered_clauses
if clause in json
for part in [getattr(self, clause)(json, precedence["from"])]
if part
)
if prec >= precedence["from"]:
return sql
else:
return f"({sql})"
def with_(self, json, prec):
if "with" in json:
with_ = json["with"]
if not isinstance(with_, list):
with_ = [with_]
parts = ", ".join(
"{0} AS ({1})".format(part["name"], self.dispatch(part["value"]))
for part in with_
)
return "WITH {0}".format(parts)
def select(self, json, prec):
param = ", ".join(self.dispatch(s) for s in listwrap(json["select"]))
if "top" in json:
top = self.dispatch(json["top"])
return f"SELECT TOP ({top}) {param}"
if "distinct_on" in json:
return param
else:
return f"SELECT {param}"
def distinct_on(self, json, prec):
param = ", ".join(self.dispatch(s) for s in listwrap(json["distinct_on"]))
return f"SELECT DISTINCT ON ({param})"
def select_distinct(self, json, prec):
param = ", ".join(self.dispatch(s) for s in listwrap(json["select_distinct"]))
return f"SELECT DISTINCT {param}"
def from_(self, json, prec):
is_join = False
from_ = json["from"]
if isinstance(from_, dict) and is_set_op & from_.keys():
source = self.op(from_, precedence["from"])
return f"FROM {source}"
from_ = listwrap(from_)
parts = []
for v in from_:
if join_keywords & set(v):
is_join = True
parts.append(self._join_on(v, precedence["from"] - 1))
else:
parts.append(self.dispatch(v, precedence["from"] - 1))
joiner = " " if is_join else ", "
rest = joiner.join(parts)
return f"FROM {rest}"
def where(self, json, prec):
expr = self.dispatch(json["where"])
return f"WHERE {expr}"
def groupby(self, json, prec):
param = ", ".join(self.dispatch(s) for s in listwrap(json["groupby"]))
return f"GROUP BY {param}"
def having(self, json, prec):
return "HAVING {0}".format(self.dispatch(json["having"]))
def orderby(self, json, prec):
param = ", ".join(
(
self.dispatch(s["value"], precedence["order"])
+ " "
+ s.get("sort", "").upper()
).strip()
for s in listwrap(json["orderby"])
)
return f"ORDER BY {param}"
def limit(self, json, prec):
num = self.dispatch(json["limit"], precedence["order"])
return f"LIMIT {num}"
def offset(self, json, prec):
num = self.dispatch(json["offset"], precedence["order"])
return f"OFFSET {num}"
def fetch(self, json, prec):
num = self.dispatch(json["offset"], precedence["order"])
return f"FETCH {num} ROWS ONLY"
def insert(self, json, prec=precedence["from"]):
acc = ["INSERT"]
if "overwrite" in json:
acc.append("OVERWRITE")
else:
acc.append("INTO")
acc.append(json["insert"])
if "columns" in json:
acc.append(self.sql_list(json))
if "values" in json:
values = json["values"]
if all(isinstance(row, dict) for row in values):
columns = list(sorted(set(k for row in values for k in row.keys())))
acc.append(self.sql_list(columns))
if "if exists" in json:
acc.append("IF EXISTS")
acc.append("VALUES")
acc.append(",\n".join(
"(" + ", ".join(self._literal(row[c]) for c in columns) + ")"
for row in values
))
else:
if "if exists" in json:
acc.append("IF EXISTS")
acc.append("VALUES")
for row in values:
acc.append("(" + ", ".join(self._literal(row)) + ")")
else:
if json["if exists"]:
acc.append("IF EXISTS")
acc.append(self.dispatch(json["query"]))
return " ".join(acc)
setattr(Formatter, "with", Formatter.with_)
setattr(Formatter, "from", Formatter.from_)

@ -0,0 +1,11 @@
CREATE TABLE sale(Month INT, sales INT)
LOAD DATA INFILE "moving_avg.csv"
INTO TABLE sale
FIELDS TERMINATED BY "\t"
SELECT Month,avgs(3,sales)
FROM sale
ASSUMING ASC Month
INTO OUTFILE "moving_avg_output.csv"
FIELDS TERMINATED BY ","

@ -0,0 +1,6 @@
Month sales
1 100
2 120
4 140
3 140
5 130
1 Month sales
2 1 100
3 2 120
4 4 140
5 3 140
6 5 130

@ -1,6 +1,10 @@
import re import re
import mo_sql_parsing as parser import aquery_parser as parser
import engine
test_parser = True
# code to test parser
ws = re.compile(r'\s+') ws = re.compile(r'\s+')
q = 'SELECT p.Name, v.Name FROM Production.Product p JOIN Purchasing.ProductVendor pv ON p.ProductID = pv.ProductID JOIN Purchasing.Vendor v ON pv.BusinessEntityID = v.BusinessEntityID WHERE ProductSubcategoryID = 15 ORDER BY v.Name;' q = 'SELECT p.Name, v.Name FROM Production.Product p JOIN Purchasing.ProductVendor pv ON p.ProductID = pv.ProductID JOIN Purchasing.Vendor v ON pv.BusinessEntityID = v.BusinessEntityID WHERE ProductSubcategoryID = 15 ORDER BY v.Name;'
@ -9,9 +13,11 @@ res = parser.parse(q)
print(res) print(res)
while True: while test_parser:
try: try:
q = input() q = input()
if q == 'break':
break
trimed = ws.sub(' ', q.lower()).split(' ') trimed = ws.sub(' ', q.lower()).split(' ')
if trimed[0] == 'file': if trimed[0] == 'file':
fn = 'q.sql' if len(trimed) <= 1 or len(trimed[1]) == 0 \ fn = 'q.sql' if len(trimed) <= 1 or len(trimed[1]) == 0 \
@ -25,3 +31,7 @@ while True:
print(stmts) print(stmts)
except Exception as e: except Exception as e:
print(type(e), e) print(type(e), e)
cxt = engine.initialize()
for s in stmts['stmts']:
engine.generate(s, cxt)

@ -0,0 +1,22 @@
CREATE TABLE stocks(timestamp INT, price INT)
INSERT INTO stocks VALUES(1,15)
INSERT INTO stocks VALUES(2,19)
INSERT INTO stocks VALUES(3,16)
INSERT INTO stocks VALUES(4,17)
INSERT INTO stocks VALUES(5,15)
INSERT INTO stocks VALUES(6,13)
INSERT INTO stocks VALUES(7,5)
INSERT INTO stocks VALUES(8,8)
INSERT INTO stocks VALUES(9,7)
INSERT INTO stocks VALUES(10,13)
INSERT INTO stocks VALUES(11,11)
INSERT INTO stocks VALUES(12,14)
INSERT INTO stocks VALUES(13,10)
INSERT INTO stocks VALUES(14,5)
INSERT INTO stocks VALUES(15,2)
INSERT INTO stocks VALUES(16,5)
SELECT max(price-mins(price))
FROM stocks
ASSUMING ASC timestamp
Loading…
Cancel
Save