From 6b2bd5011b460c233c3d8e773e6360204ac4cfb1 Mon Sep 17 00:00:00 2001 From: Bill Sun Date: Tue, 11 Jan 2022 14:28:57 -0500 Subject: [PATCH] initial support for udf, inline k9 block, assumptions --- mo_sql_parsing/keywords.py | 9 ++++++- mo_sql_parsing/sql_parser.py | 33 ++++++++++++++++++++++---- q.sql | 14 +++++++++++ q1.sql | 46 ++++++++++++++++++++++++++++++++++++ run.py | 18 +++++++++++--- 5 files changed, 111 insertions(+), 9 deletions(-) create mode 100644 q.sql create mode 100644 q1.sql diff --git a/mo_sql_parsing/keywords.py b/mo_sql_parsing/keywords.py index 28b2d28..fd0c3dc 100644 --- a/mo_sql_parsing/keywords.py +++ b/mo_sql_parsing/keywords.py @@ -33,6 +33,7 @@ EXCEPT = keyword("except") FETCH = keyword("fetch").suppress() FROM = keyword("from").suppress() FULL = keyword("full") +FUNCTION = keyword("function").suppress() GROUP = keyword("group").suppress() HAVING = keyword("having").suppress() INNER = keyword("inner") @@ -109,6 +110,7 @@ INDF = ( # https://prestodb.io/docs/current/functions/comparison.html#is-distinct-from-and-is-not-distinct-from keyword("is not distinct from").set_parser_name("ne!") ) +FASSIGN = Literal(":=").set_parser_name("fassign") # Assignment in UDFs NEQ = (Literal("!=") | Literal("<>")).set_parser_name("neq") LAMBDA = Literal("->").set_parser_name("lambda") @@ -181,6 +183,7 @@ RESERVED = MatchFirst([ FOREIGN, FROM, FULL, + FUNCTION, GROUP_BY, GROUP, HAVING, @@ -224,7 +227,10 @@ RESERVED = MatchFirst([ WITH, WITHIN, ]) - +L_INLINE = Literal("").suppress() +R_INLINE = Literal("").suppress() +LBRACE = Literal("{").suppress() +RBRACE = Literal("}").suppress() LB = Literal("(").suppress() RB = Literal(")").suppress() EQ = Char("=").suppress() @@ -282,6 +288,7 @@ precedence = { "lambda": 12, "join": 18, "list": 18, + "function": 30, "select": 30, "from": 30, "window": 35, diff --git a/mo_sql_parsing/sql_parser.py b/mo_sql_parsing/sql_parser.py index 710d6b2..535824d 100644 --- a/mo_sql_parsing/sql_parser.py +++ b/mo_sql_parsing/sql_parser.py @@ -7,7 +7,9 @@ # Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # - +from operator import add +from textwrap import indent +from mo_parsing import whitespaces from mo_parsing.helpers import restOfLine from mo_parsing.infix import delimited_list from mo_parsing.whitespaces import NO_WHITESPACE, Whitespace @@ -76,7 +78,8 @@ def parser(literal_string, ident, sqlserver=False): engine.add_ignore(Literal("/*") + SkipTo("*/", include=True)) var_name = ~RESERVED + ident - + + inline_kblock = (L_INLINE + SkipTo(R_INLINE, include=True))("k9") # EXPRESSIONS expr = Forward() column_type, column_definition, column_def_references = get_column_type( @@ -341,6 +344,22 @@ def parser(literal_string, ident, sqlserver=False): + Group(var_name("name") + AS + over_clause("value"))("join") ) ) / to_join_call + + fassign = Group(var_name("var") + Suppress(FASSIGN) + expr("expr") + Suppress(";"))("assignment") + fassigns = fassign + ZeroOrMore(fassign, Whitespace(white=" \t")) + + fbody = (Optional(fassigns) + expr("ret")) + + udf = ( + FUNCTION + + var_name("fname") + + LB + + Optional(delimited_list(var_name)("params")) + + RB + + LBRACE + + fbody + + RBRACE + ) selection = ( (SELECT + DISTINCT + ON + LB) @@ -407,11 +426,13 @@ def parser(literal_string, ident, sqlserver=False): ) + RB, ) - assumption = (ASSUMING + (ASC|DESC)("assumption")) + + assumption = Group((ASC|DESC) ("ord") + var_name("attrib")) + assumptions = (ASSUMING + Group(delimited_list(assumption))("assumptions")) table_source << Group( ((LB + query + RB) | stack | call_function | var_name)("value") - + Optional(assumption) + + Optional(assumptions) + Optional(flag("with ordinality")) + Optional(tablesample) + alias @@ -600,7 +621,9 @@ def parser(literal_string, ident, sqlserver=False): ) / to_json_call return ( - query + inline_kblock + | udf + | query | (insert | update | delete) | (create_table | create_view | create_cache | create_index) | (drop_table | drop_view | drop_index) diff --git a/q.sql b/q.sql new file mode 100644 index 0000000..e456f51 --- /dev/null +++ b/q.sql @@ -0,0 +1,14 @@ +FUNCTION +execStrategy ( alloc , mavgday , mavgmonth , px ) { +buySignal := mavgday > mavgmonth ; +f := a + b ; +alloc * prd ( +CASE maxs ( buySignal ) +WHEN TRUE THEN +CASE buySignal +WHEN TRUE THEN 1 / px +ELSE px +END +ELSE 1 +END ) +} \ No newline at end of file diff --git a/q1.sql b/q1.sql new file mode 100644 index 0000000..fb7fcaa --- /dev/null +++ b/q1.sql @@ -0,0 +1,46 @@ +WITH +Target (Id , TradeDate , ClosePrice ) AS +( SELECT +Id , TradeDate , ClosePrice +FROM price +WHERE Id IN stock10 AND +TradeDate >= startYear10 AND +TradeDate <= startYear10 + 365 * 10), +weekly (Id , bucket , name , low , high , mean ) AS +( SELECT +Id , +timeBucket , +" weekly " , +min ( ClosePrice ) , +max ( ClosePrice ) , +avg ( ClosePrice ) +FROM Target +GROUP BY Id , getWeek ( TradeDate ) as +timeBucket ), +monthly ( Id , bucket , name , low , high , mean ) AS +( SELECT +Id , +timeBucket , +" monthly " , +min ( ClosePrice ) , +max ( ClosePrice ) , +avg ( ClosePrice ) +FROM Target +GROUP BY Id , getMonth ( TradeDate ) as +timeBucket ), +yearly (Id , bucket , name , low , high , mean ) AS +( SELECT +Id , +timeBucket , +" yearly " , +min ( ClosePrice ) , +max ( ClosePrice ) , +avg ( ClosePrice ) +FROM Target +GROUP BY Id , getYear ( TradeDate ) as +timeBucket ) +SELECT +Id , bucket , name , low , high , mean +FROM +CONCATENATE ( weekly , monthly , yearly ) +ASSUMING ASC Id , ASC name , ASC bucket diff --git a/run.py b/run.py index 45d8da4..67d26cf 100644 --- a/run.py +++ b/run.py @@ -1,4 +1,8 @@ +import re import mo_sql_parsing as parser + +ws = re.compile(r'\s+') + q = 'SELECT p.Name, v.Name FROM Production.Product p JOIN Purchasing.ProductVendor pv ON p.ProductID = pv.ProductID JOIN Purchasing.Vendor v ON pv.BusinessEntityID = v.BusinessEntityID WHERE ProductSubcategoryID = 15 ORDER BY v.Name;' res = parser.parse(q) @@ -8,8 +12,16 @@ print(res) while True: try: q = input() + trimed = ws.sub(' ', q.lower()).split(' ') + if trimed[0] == 'file': + fn = 'q.sql' if len(trimed) <= 1 or len(trimed[1]) == 0 \ + else trimed[1] + + with open(fn, 'r') as file: + contents = file.read() + stmts = parser.parse(contents) + continue stmts = parser.parse(q) - for s in stmts: - print(s) + print(stmts) except Exception as e: - print(e) + print(type(e), e)