import java.util.ArrayList; import java.util.List; import java_cup.runtime.*; import chocopy.common.astnodes.*; /* The following code section is copied verbatim to the generated * parser class. */ parser code {: /* The following fields and methods deal with error reporting * Avoid changing these unless you know what you are doing. */ /** Node that accumulates error messages to be added to the Program * node produced as a result. */ public final Errors errors = new Errors(new ArrayList<>()); /** Return the Program node that results from parsing the stream of * tokens produced by lexical analysis. In the case of syntax errors, * the program may be empty, but will have error messages. */ public Program parseProgram(boolean debug) { try { Symbol result = debug ? debug_parse() : parse(); if (result == null || !(result.value instanceof Program)) { return new Program(new Location(0, 0), new Location(0, 0), new ArrayList(), new ArrayList(), errors); } else { return (Program) result.value; } } catch (RuntimeException excp) { throw excp; } catch (Exception excp) { String msg = String.format("Internal parser error detected: %s%n", excp); throw new AssertionError(msg); } } @Override public SymbolFactory getSymbolFactory() { return ((ChocoPyLexer) getScanner()).symbolFactory; } @Override public void syntax_error(Symbol cur_token) { String token = symbl_name_from_id(cur_token.sym); String text = ((ChocoPyLexer) getScanner()).yytext(); errors.syntaxError( ((ComplexSymbolFactory.ComplexSymbol) cur_token).xleft, ((ComplexSymbolFactory.ComplexSymbol) cur_token).xright, "Parse error near token %s: %s", token, text); } @Override public void unrecovered_syntax_error(Symbol cur_token) { /* Do not die */ } :} /************************************************************************** * FEEL FREE TO MODIFY ANYTHING BELOW THIS LINE * * The rules provided below parse expressions of the form + + ... * You can re-use these rules or edit them as you wish. The start rule * should return a node of type Program. * * Tips: Production rules are usually followed by action code that will be * copied to the generated parser to be executed immediately after a reduce * operation; that is, when a production rule has been matched. You can name * a nonterminal or terminal symbol in a production rule using the colon * notation, e.g. expr_stmt ::= expr:e, to get the AST node for the matched * expression. In the action code, `e` will be a variable of whatever type * has been declared for the corresponding nonterminal, such as `Expr`. * Therefore, you can construct an AST Node of type `ExprStmt` with `e` in the * constructor: `new ExprStmt(exleft, exright, e)` * * The variables `exleft` and `exright` are automatically generated by CUP * and contain Location objects for the start and end of the expression `e`. * You can collect start and line number info for AST nodes by taking the * location of the left end of the leftmost symbol in a rule and the * location of the right end of the rightmost symbol. The auto-generated * variables have names `xleft` and `xright`, where is the * name given to the symbol using the colon notation. * * When you have nonterminals that are lists of things, e.g. List or * List, it is helpful to get the leftmost and rightmost * source location from within this list; we have provided some utility * functions below to do just that. **************************************************************************/ /* The following code section is copied verbatim to the class that performs * production-rule actions. */ action code {: /** Return a mutable list initially containing the single value ITEM. */ List single(T item) { List list = new ArrayList<>(); if (item != null) { list.add(item); } return list; } /** If ITEM is non-null, appends it to the end of LIST. Then returns * LIST. */ List combine(List list, T item) { if (item != null) { list.add(item); } return list; } /** Return a mutable empty list. */ List empty() { return new ArrayList(); } /** Return the leftmost non-whitespace location in NODES, or null if NODES * is empty. Assumes that the nodes of NODES are ordered in increasing * order of location, from left to right. */ ComplexSymbolFactory.Location getLeft(List nodes) { if (nodes.isEmpty()) { return null; } Node first = nodes.get(0); return new ComplexSymbolFactory.Location(first.getLocation()[0], first.getLocation()[1]); } :} /* Terminal symbols (tokens returned by the lexer). The declaration * terminal , , ...; * declares each as the denotation of a distinct type terminal * symbol for use in the grammar. The declaration * terminal , ...; * does the same, and in addition indicates that the lexer supplies a * semantic value of type for these symbols that may be referenced * in actions ( {: ... :} ). */ terminal INDENT; terminal DEDENT; terminal String ID; terminal String STRING; /* Terminal Delimiters */ terminal NEWLINE; terminal String COLON; terminal String COMMA; /* Terminal Literals */ terminal Integer NUMBER; terminal Boolean BOOL; terminal String NONE; /* Terminal Keywords */ terminal String IF; terminal String ELSE; terminal String ELIF; terminal String WHILE; terminal String CLASS; terminal String DEF; terminal String LAMBDA; terminal String AS; terminal String FOR; terminal String GLOBAL; terminal String IN; terminal String NONLOCAL; terminal String PASS; terminal String RETURN; terminal String ASSERT; terminal String AWAIT; terminal String BREAK; terminal String CONTINUE; terminal String DEL; terminal String EXCEPT; terminal String FINALLY; terminal String FROM; terminal String IMPORT; terminal String RAISE; terminal String TRY; terminal String WITH; terminal String YIELD; /* Terminal Operators */ terminal String PLUS; terminal String MINUS; terminal String MUL; terminal String DIV; terminal String MOD; terminal String GT; terminal String LT; terminal String EQUAL; terminal String NEQ; terminal String GEQ; terminal String LEQ; terminal String ASSIGN; terminal String AND; terminal String OR; terminal String NOT; terminal String DOT; terminal String LPAR; terminal String RPAR; terminal String LBR; terminal String RBR; terminal String ARROW; terminal String IS; /* Returned by the lexer for erroneous tokens. Since it does not appear in * the grammar, it indicates a syntax error. */ terminal UNRECOGNIZED; /* Nonterminal symbols (defined in production rules below). * As for terminal symbols, * non terminal , ..., ; * defines the listed nonterminal identifier symbols to have semantic values * of type . */ non terminal Program program; non terminal List program_head; non terminal List stmt_list, opt_stmt_list; non terminal Stmt stmt, expr_stmt; non terminal Expr expr, binary_expr; /* Precedences (lowest to highest) for resolving what would otherwise be * ambiguities in the form of shift/reduce conflicts.. */ precedence left PLUS; /* The start symbol. */ start with program; /***** GRAMMAR RULES *****/ program ::= program_head:d opt_stmt_list:s {: RESULT = new Program(d.isEmpty() ? getLeft(s) : getLeft(d), sxright, d, s, errors); :} ; /* Initial list of declarations. */ program_head ::= /* not implemented; currently matches empty string */ {: RESULT = empty(); :} ; opt_stmt_list ::= {: RESULT = empty(); :} | stmt_list:s {: RESULT = s; :} ; stmt_list ::= stmt:s {: RESULT = single(s); :} | stmt_list:l stmt:s {: RESULT = combine(l, s); :} | stmt_list:l error {: RESULT = l; :} /* If there is a syntax error in the source, this says to discard * symbols from the parsing stack and perform reductions until * there is a stmt_list on top of the stack, and then to discard * input symbols until it is possible to shift again, reporting * a syntax error. */ ; stmt ::= expr_stmt:s NEWLINE {: RESULT = s; :} ; expr_stmt ::= expr:e {: RESULT = new ExprStmt(exleft, exright, e); :} ; expr ::= binary_expr:e {: RESULT = e; :} | NUMBER:n {: RESULT = new IntegerLiteral(nxleft, nxright, n); :} ; /* A binary expression, illustrating how to find the left and right * source position of a phrase. */ binary_expr ::= expr:e1 PLUS:op expr:e2 {: RESULT = new BinaryExpr(e1xleft, e2xright, e1, op, e2); :} ;