import java.util.ArrayList; import java.util.List; import java_cup.runtime.*; import chocopy.common.astnodes.*; /* The following code section is copied verbatim to the generated * parser class. */ parser code {: /* The following fields and methods deal with error reporting * Avoid changing these unless you know what you are doing. */ /** Node that accumulates error messages to be added to the Program * node produced as a result. */ public final Errors errors = new Errors(new ArrayList<>()); /** Return the Program node that results from parsing the stream of * tokens produced by lexical analysis. In the case of syntax errors, * the program may be empty, but will have error messages. */ public Program parseProgram(boolean debug) { try { Symbol result = debug ? debug_parse() : parse(); if (result == null || !(result.value instanceof Program)) { return new Program(new Location(0, 0), new Location(0, 0), new ArrayList(), new ArrayList(), errors); } else { return (Program) result.value; } } catch (RuntimeException excp) { throw excp; } catch (Exception excp) { String msg = String.format("Internal parser error detected: %s%n", excp); throw new AssertionError(msg); } } @Override public SymbolFactory getSymbolFactory() { return ((ChocoPyLexer) getScanner()).symbolFactory; } @Override public void syntax_error(Symbol cur_token) { String token = symbl_name_from_id(cur_token.sym); String text = ((ChocoPyLexer) getScanner()).yytext(); errors.syntaxError( ((ComplexSymbolFactory.ComplexSymbol) cur_token).xleft, ((ComplexSymbolFactory.ComplexSymbol) cur_token).xright, "Parse error near token %s: %s", token, text); } @Override public void unrecovered_syntax_error(Symbol cur_token) { /* Do not die */ } :} /************************************************************************** * FEEL FREE TO MODIFY ANYTHING BELOW THIS LINE * * The rules provided below parse expressions of the form + + ... * You can re-use these rules or edit them as you wish. The start rule * should return a node of type Program. * * Tips: Production rules are usually followed by action code that will be * copied to the generated parser to be executed immediately after a reduce * operation; that is, when a production rule has been matched. You can name * a nonterminal or terminal symbol in a production rule using the colon * notation, e.g. expr_stmt ::= expr:e, to get the AST node for the matched * expression. In the action code, `e` will be a variable of whatever type * has been declared for the corresponding nonterminal, such as `Expr`. * Therefore, you can construct an AST Node of type `ExprStmt` with `e` in the * constructor: `new ExprStmt(exleft, exright, e)` * * The variables `exleft` and `exright` are automatically generated by CUP * and contain Location objects for the start and end of the expression `e`. * You can collect start and line number info for AST nodes by taking the * location of the left end of the leftmost symbol in a rule and the * location of the right end of the rightmost symbol. The auto-generated * variables have names `xleft` and `xright`, where is the * name given to the symbol using the colon notation. * * When you have nonterminals that are lists of things, e.g. List or * List, it is helpful to get the leftmost and rightmost * source location from within this list; we have provided some utility * functions below to do just that. **************************************************************************/ /* The following code section is copied verbatim to the class that performs * production-rule actions. */ action code {: /** Return a mutable list initially containing the single value ITEM. */ List single(T item) { List list = new ArrayList<>(); if (item != null) { list.add(item); } return list; } /** If ITEM is non-null, appends it to the end of LIST. Then returns * LIST. */ List combine(List list, T item) { if (item != null) { list.add(item); } return list; } /** Return a mutable empty list. */ List empty() { return new ArrayList(); } /** Return the leftmost non-whitespace location in NODES, or null if NODES * is empty. Assumes that the nodes of NODES are ordered in increasing * order of location, from left to right. */ ComplexSymbolFactory.Location getLeft(List nodes) { if (nodes.isEmpty()) { return null; } Node first = nodes.get(0); return new ComplexSymbolFactory.Location(first.getLocation()[0], first.getLocation()[1]); } /** Return the rightmost non-whitespace location in NODES, or null if NODES * is empty. Assumes that the nodes of NODES are ordered in increasing * order of location, from left to right. */ ComplexSymbolFactory.Location getRight(List nodes) { if (nodes.isEmpty()) { return null; } Node last = nodes.get(nodes.size()-1); return new ComplexSymbolFactory.Location(last.getLocation()[2], last.getLocation()[3]); } :} /* Terminal symbols (tokens returned by the lexer). The declaration * terminal , , ...; * declares each as the denotation of a distinct type terminal * symbol for use in the grammar. The declaration * terminal , ...; * does the same, and in addition indicates that the lexer supplies a * semantic value of type for these symbols that may be referenced * in actions ( {: ... :} ). */ terminal INDENT; terminal DEDENT; terminal String ID; terminal String STRING; /* Terminal Delimiters */ terminal NEWLINE; terminal String COLON; terminal String COMMA; /* Terminal Literals */ terminal Integer NUMBER; terminal Boolean BOOL; terminal String NONE; /* Terminal Keywords */ terminal String IF; terminal String ELSE; terminal String ELIF; terminal String WHILE; terminal String CLASS; terminal String DEF; terminal String LAMBDA; terminal String AS; terminal String FOR; terminal String GLOBAL; terminal String IN; terminal String NONLOCAL; terminal String PASS; terminal String RETURN; terminal String ASSERT; terminal String AWAIT; terminal String BREAK; terminal String CONTINUE; terminal String DEL; terminal String EXCEPT; terminal String FINALLY; terminal String FROM; terminal String IMPORT; terminal String RAISE; terminal String TRY; terminal String WITH; terminal String YIELD; /* Terminal Operators */ terminal String PLUS; terminal String MINUS; terminal String MUL; terminal String DIV; terminal String MOD; terminal String GT; terminal String LT; terminal String EQUAL; terminal String NEQ; terminal String GEQ; terminal String LEQ; terminal String ASSIGN; terminal String AND; terminal String OR; terminal String NOT; terminal String DOT; terminal String LPAR; terminal String RPAR; terminal String LBR; terminal String RBR; terminal String ARROW; terminal String IS; /* Returned by the lexer for erroneous tokens. Since it does not appear in * the grammar, it indicates a syntax error. */ terminal UNRECOGNIZED; /* Nonterminal symbols (defined in production rules below). * As for terminal symbols, * non terminal , ..., ; * defines the listed nonterminal identifier symbols to have semantic values * of type . */ non terminal Program program; non terminal List program_head, class_body, class_body_defs, fun_body_decs; non terminal List stmt_list, opt_stmt_list; non terminal Stmt stmt, expr_stmt; non terminal Expr expr, binary_expr; non terminal VarDef var_def; non terminal ClassDef class_def; non terminal FuncDef fun_def; non terminal Literal literal; non terminal TypedVar typed_var; non terminal TypeAnnotation type, ret_type; non terminal Identifier identifier; non terminal List typed_vars; non terminal GlobalDecl global_decl; non terminal NonLocalDecl nonlocal_decl; /* Precedences (lowest to highest) for resolving what would otherwise be * ambiguities in the form of shift/reduce conflicts.. */ precedence left OR; precedence left AND; precedence left NOT; precedence nonassoc EQUAL, NEQ, LT, GT, LEQ, GEQ, IS; precedence left PLUS, MINUS; precedence left MUL, DIV, MOD; precedence left DOT, COMMA, LBR, RBR; /* The start symbol. */ start with program; /***** GRAMMAR RULES *****/ /* Rules are defined in the order given by the language reference */ /* program */ program ::= program_head:d opt_stmt_list:s {: RESULT = new Program(d.isEmpty() ? getLeft(s) : getLeft(d), sxright, d, s, errors); :} ; program_head ::= program_head:d var_def:vd {: RESULT = combine(d, vd); :} | program_head:d class_def:cd {: RESULT = combine(d, cd); :} | program_head:d fun_def:fd {: RESULT = combine(d, fd); :} | program_head:d error:e {: RESULT = d; :} | {: RESULT = empty(); :} ; /* class_def */ class_def ::= CLASS:c identifier:id LPAR identifier:parentId RPAR COLON NEWLINE INDENT class_body:cb DEDENT {: RESULT = new ClassDef(cxleft, getRight(cb), id, parentId, cb); :}; /* class_body */ class_body ::= PASS NEWLINE {: RESULT = empty(); :} | class_body_defs:defs {: RESULT = defs; :} ; class_body_defs ::= class_body_defs:defs var_def:vd {: RESULT = combine(defs, vd); :} | class_body_defs:defs fun_def:fd {: RESULT = combine(defs, fd); :} | class_body_defs:defs error {: RESULT = defs; :} | var_def:vd {: RESULT = single(vd); :} | fun_def:fd {: RESULT = single(fd); :} ; /* fun_def */ fun_def ::= DEF:def identifier:id LPAR typed_vars:params RPAR ret_type:rt COLON NEWLINE INDENT fun_body_decs:fbd stmt_list:sl DEDENT {: RESULT = new FuncDef(defxleft, getRight(sl), id, params, rt, fbd, sl); :} ; ret_type ::= ARROW type:t {: RESULT= t; :} | {: RESULT= null; :} ; typed_vars ::= typed_var:tv {: RESULT= single(tv); :} | typed_vars:tvs COMMA typed_var:tv {: RESULT= combine(tvs, tv); :} | typed_vars:tvs COMMA error {: RESULT= tvs; :} | {: RESULT= empty(); :} ; /* fun_body */ fun_body_decs ::= fun_body_decs:fbd global_decl:gd {: RESULT= combine(fbd, gd); :} | fun_body_decs:fbd nonlocal_decl:nd {: RESULT= combine(fbd, nd); :} | fun_body_decs:fbd var_def:vd {: RESULT= combine(fbd, vd); :} | fun_body_decs:fbd fun_def:fd {: RESULT= combine(fbd, fd); :} | fun_body_decs:fbd error {: RESULT= fbd; :} | {: RESULT= empty(); :} ; /* typed_var */ typed_var ::= identifier:id COLON type:t {: RESULT = new TypedVar(idxleft, txright, id, t); :}; /* type */ type ::= identifier:id {: RESULT = new ClassType(idxleft, idxright, id.name); :} | STRING:str {: RESULT = new ClassType(strxleft, strxright, str); :} | LBR:lbr type:t RBR:rbr {: RESULT = new ListType(lbrxleft, rbrxright, t); :} ; /* global_decl */ global_decl ::= GLOBAL:g identifier:id NEWLINE {: RESULT = new GlobalDecl(gxleft, idxright, id); :}; /* nonlocal_decl */ nonlocal_decl ::= NONLOCAL:n identifier:id NEWLINE {: RESULT = new NonLocalDecl(nxleft, idxright, id); :}; /* var_def */ var_def ::= typed_var:t ASSIGN literal:l NEWLINE {: RESULT = new VarDef(txleft, lxright, t, l); :}; /* literal */ literal ::= NONE:n {: RESULT = new NoneLiteral(nxleft, nxright); :} | BOOL:b {: RESULT = new BooleanLiteral(bxleft, bxright, b); :} | NUMBER:n {: RESULT = new IntegerLiteral(nxleft, nxright, n); :} | STRING:s {: RESULT = new StringLiteral(sxleft, sxright, s); :} ; opt_stmt_list ::= {: RESULT = empty(); :} | stmt_list:s {: RESULT = s; :} ; stmt_list ::= stmt:s {: RESULT = single(s); :} | stmt_list:l stmt:s {: RESULT = combine(l, s); :} | stmt_list:l error {: RESULT = l; :} /* If there is a syntax error in the source, this says to discard * symbols from the parsing stack and perform reductions until * there is a stmt_list on top of the stack, and then to discard * input symbols until it is possible to shift again, reporting * a syntax error. */ ; stmt ::= expr_stmt:s NEWLINE {: RESULT = s; :} ; expr_stmt ::= expr:e {: RESULT = new ExprStmt(exleft, exright, e); :} ; expr ::= binary_expr:e {: RESULT = e; :} | NUMBER:n {: RESULT = new IntegerLiteral(nxleft, nxright, n); :} ; /* A binary expression, illustrating how to find the left and right * source position of a phrase. */ binary_expr ::= expr:e1 PLUS:op expr:e2 {: RESULT = new BinaryExpr(e1xleft, e2xright, e1, op, e2); :} ; /* Extras - rules below have not been given in language reference, we have them to ease implementation */ identifier ::= ID:idStr {: RESULT = new Identifier(idStrxleft, idStrxright, idStr); :};