import java.util.ArrayList; import java.util.List; import java_cup.runtime.*; import chocopy.common.astnodes.*; /* The following code section is copied verbatim to the generated * parser class. */ parser code {: /* The following fields and methods deal with error reporting * Avoid changing these unless you know what you are doing. */ /** Node that accumulates error messages to be added to the Program * node produced as a result. */ public final Errors errors = new Errors(new ArrayList<>()); /** Return the Program node that results from parsing the stream of * tokens produced by lexical analysis. In the case of syntax errors, * the program may be empty, but will have error messages. */ public Program parseProgram(boolean debug) { try { Symbol result = debug ? debug_parse() : parse(); if (result == null || !(result.value instanceof Program)) { return new Program(new Location(0, 0), new Location(0, 0), new ArrayList(), new ArrayList(), errors); } else { return (Program) result.value; } } catch (RuntimeException excp) { throw excp; } catch (Exception excp) { String msg = String.format("Internal parser error detected: %s%n", excp); throw new AssertionError(msg); } } @Override public SymbolFactory getSymbolFactory() { return ((ChocoPyLexer) getScanner()).symbolFactory; } @Override public void syntax_error(Symbol cur_token) { String token = symbl_name_from_id(cur_token.sym); String text = ((ChocoPyLexer) getScanner()).yytext(); errors.syntaxError( ((ComplexSymbolFactory.ComplexSymbol) cur_token).xleft, ((ComplexSymbolFactory.ComplexSymbol) cur_token).xright, "Parse error near token %s: %s", token, text); } @Override public void unrecovered_syntax_error(Symbol cur_token) { /* Do not die */ } :} /************************************************************************** * FEEL FREE TO MODIFY ANYTHING BELOW THIS LINE * * The rules provided below parse expressions of the form + + ... * You can re-use these rules or edit them as you wish. The start rule * should return a node of type Program. * * Tips: Production rules are usually followed by action code that will be * copied to the generated parser to be executed immediately after a reduce * operation; that is, when a production rule has been matched. You can name * a nonterminal or terminal symbol in a production rule using the colon * notation, e.g. expr_stmt ::= expr:e, to get the AST node for the matched * expression. In the action code, `e` will be a variable of whatever type * has been declared for the corresponding nonterminal, such as `Expr`. * Therefore, you can construct an AST Node of type `ExprStmt` with `e` in the * constructor: `new ExprStmt(exleft, exright, e)` * * The variables `exleft` and `exright` are automatically generated by CUP * and contain Location objects for the start and end of the expression `e`. * You can collect start and line number info for AST nodes by taking the * location of the left end of the leftmost symbol in a rule and the * location of the right end of the rightmost symbol. The auto-generated * variables have names `xleft` and `xright`, where is the * name given to the symbol using the colon notation. * * When you have nonterminals that are lists of things, e.g. List or * List, it is helpful to get the leftmost and rightmost * source location from within this list; we have provided some utility * functions below to do just that. **************************************************************************/ /* The following code section is copied verbatim to the class that performs * production-rule actions. */ action code {: /** Return a mutable list initially containing the single value ITEM. */ List single(T item) { List list = new ArrayList<>(); if (item != null) { list.add(item); } return list; } /** If ITEM is non-null, appends it to the end of LIST. Then returns * LIST. */ List combine(List list, T item) { if (item != null) { list.add(item); } return list; } /** Return a mutable empty list. */ List empty() { return new ArrayList(); } /** Return the leftmost non-whitespace location in NODES, or null if NODES * is empty. Assumes that the nodes of NODES are ordered in increasing * order of location, from left to right. */ ComplexSymbolFactory.Location getLeft(List nodes) { if (nodes.isEmpty()) { return null; } Node first = nodes.get(0); return new ComplexSymbolFactory.Location(first.getLocation()[0], first.getLocation()[1]); } /** Return the rightmost non-whitespace location in NODES, or null if NODES * is empty. Assumes that the nodes of NODES are ordered in increasing * order of location, from left to right. */ ComplexSymbolFactory.Location getRight(List nodes) { if (nodes.isEmpty()) { return null; } Node last = nodes.get(nodes.size()-1); return new ComplexSymbolFactory.Location(last.getLocation()[2], last.getLocation()[3]); } :} /* Terminal symbols (tokens returned by the lexer). The declaration * terminal , , ...; * declares each as the denotation of a distinct type terminal * symbol for use in the grammar. The declaration * terminal , ...; * does the same, and in addition indicates that the lexer supplies a * semantic value of type for these symbols that may be referenced * in actions ( {: ... :} ). */ terminal INDENT; terminal DEDENT; terminal String ID; terminal String STRING; /* Terminal Delimiters */ terminal NEWLINE; terminal String COLON; terminal String COMMA; /* Terminal Literals */ terminal Integer NUMBER; terminal Boolean BOOL; terminal String NONE; /* Terminal Keywords */ terminal String IF; terminal String ELSE; terminal String ELIF; terminal String WHILE; terminal String CLASS; terminal String DEF; terminal String LAMBDA; terminal String AS; terminal String FOR; terminal String GLOBAL; terminal String IN; terminal String NONLOCAL; terminal String PASS; terminal String RETURN; terminal String ASSERT; terminal String AWAIT; terminal String BREAK; terminal String CONTINUE; terminal String DEL; terminal String EXCEPT; terminal String FINALLY; terminal String FROM; terminal String IMPORT; terminal String RAISE; terminal String TRY; terminal String WITH; terminal String YIELD; /* Terminal Operators */ terminal String PLUS; terminal String MINUS; terminal String MUL; terminal String DIV; terminal String MOD; terminal String GT; terminal String LT; terminal String EQUAL; terminal String NEQ; terminal String GEQ; terminal String LEQ; terminal String ASSIGN; terminal String AND; terminal String OR; terminal String NOT; terminal String DOT; terminal String LPAR; terminal String RPAR; terminal String LBR; terminal String RBR; terminal String ARROW; terminal String IS; /* Returned by the lexer for erroneous tokens. Since it does not appear in * the grammar, it indicates a syntax error. */ terminal UNRECOGNIZED; /* Nonterminal symbols (defined in production rules below). * As for terminal symbols, * non terminal , ..., ; * defines the listed nonterminal identifier symbols to have semantic values * of type . */ non terminal Program program; non terminal List program_head, class_body, class_body_defs, fun_body_decs; non terminal List stmt_list, opt_stmt_list, block, else_body; non terminal Stmt stmt, simple_stmt; non terminal Expr expr, binary_expr, pexpr, cexpr, cmp_pexpr; non terminal VarDef var_def; non terminal ClassDef class_def; non terminal FuncDef fun_def; non terminal Literal literal; non terminal StringLiteral bin_op, comp_op; non terminal TypedVar typed_var; non terminal TypeAnnotation type, ret_type; non terminal Identifier identifier; non terminal List typed_vars; non terminal GlobalDecl global_decl; non terminal NonLocalDecl nonlocal_decl; non terminal List opt_target, expr_list; non terminal Expr target; non terminal MemberExpr member_expr; non terminal IndexExpr index_expr; /* Precedences (lowest to highest) for resolving what would otherwise be * ambiguities in the form of shift/reduce conflicts.. */ precedence left OR; precedence left AND; precedence left NOT; precedence nonassoc EQUAL, NEQ, LT, GT, LEQ, GEQ, IS; precedence left PLUS, MINUS; precedence left MUL, DIV, MOD; precedence left DOT, COMMA, LBR, RBR; precedence left IF, ELSE; /* The start symbol. */ start with program; /***** GRAMMAR RULES *****/ /* Rules are defined in the order given by the language reference */ /* program */ program ::= program_head:d opt_stmt_list:s {: RESULT = new Program(d.isEmpty() ? getLeft(s) : getLeft(d), sxright, d, s, errors); :} ; program_head ::= program_head:d var_def:vd {: RESULT = combine(d, vd); :} | program_head:d class_def:cd {: RESULT = combine(d, cd); :} | program_head:d fun_def:fd {: RESULT = combine(d, fd); :} | program_head:d error:e {: RESULT = d; :} | {: RESULT = empty(); :} ; opt_stmt_list ::= {: RESULT = empty(); :} | stmt_list:s {: RESULT = s; :} ; /* class_def */ class_def ::= CLASS:c identifier:id LPAR identifier:parentId RPAR COLON NEWLINE INDENT class_body:cb DEDENT {: RESULT = new ClassDef(cxleft, getRight(cb), id, parentId, cb); :}; /* class_body */ class_body ::= PASS NEWLINE {: RESULT = empty(); :} | class_body_defs:defs {: RESULT = defs; :} ; class_body_defs ::= class_body_defs:defs var_def:vd {: RESULT = combine(defs, vd); :} | class_body_defs:defs fun_def:fd {: RESULT = combine(defs, fd); :} | class_body_defs:defs error {: RESULT = defs; :} | var_def:vd {: RESULT = single(vd); :} | fun_def:fd {: RESULT = single(fd); :} ; /* fun_def */ fun_def ::= DEF:def identifier:id LPAR typed_vars:params RPAR ret_type:rt COLON NEWLINE INDENT fun_body_decs:fbd stmt_list:sl DEDENT {: RESULT = new FuncDef(defxleft, getRight(sl), id, params, rt, fbd, sl); :} ; ret_type ::= ARROW type:t {: RESULT= t; :} | {: RESULT= null; :} ; typed_vars ::= typed_var:tv {: RESULT= single(tv); :} | typed_vars:tvs COMMA typed_var:tv {: RESULT= combine(tvs, tv); :} | typed_vars:tvs COMMA error {: RESULT= tvs; :} | {: RESULT= empty(); :} ; /* fun_body */ fun_body_decs ::= fun_body_decs:fbd global_decl:gd {: RESULT= combine(fbd, gd); :} | fun_body_decs:fbd nonlocal_decl:nd {: RESULT= combine(fbd, nd); :} | fun_body_decs:fbd var_def:vd {: RESULT= combine(fbd, vd); :} | fun_body_decs:fbd fun_def:fd {: RESULT= combine(fbd, fd); :} | fun_body_decs:fbd error {: RESULT= fbd; :} | {: RESULT= empty(); :} ; /* typed_var */ typed_var ::= identifier:id COLON type:t {: RESULT = new TypedVar(idxleft, txright, id, t); :}; /* type */ type ::= identifier:id {: RESULT = new ClassType(idxleft, idxright, id.name); :} | STRING:str {: RESULT = new ClassType(strxleft, strxright, str); :} | LBR:lbr type:t RBR:rbr {: RESULT = new ListType(lbrxleft, rbrxright, t); :} ; /* global_decl */ global_decl ::= GLOBAL:g identifier:id NEWLINE {: RESULT = new GlobalDecl(gxleft, idxright, id); :}; /* nonlocal_decl */ nonlocal_decl ::= NONLOCAL:n identifier:id NEWLINE {: RESULT = new NonLocalDecl(nxleft, idxright, id); :}; /* var_def */ var_def ::= typed_var:t ASSIGN literal:l NEWLINE {: RESULT = new VarDef(txleft, lxright, t, l); :}; /* stmt */ stmt ::= simple_stmt:s NEWLINE {: RESULT = s; :} | IF:i expr:cond COLON block:b else_body:elb {: RESULT = new IfStmt(ixleft, getRight(elb), cond, b, elb); :} | WHILE:wh expr:cond COLON block:b {: RESULT = new WhileStmt(whxleft, getRight(b), cond, b); :} | FOR:f identifier:id IN expr:e COLON block:b {: RESULT = new ForStmt(fxleft, getRight(b), id, e, b); :} ; else_body ::= ELSE:el COLON block:b {: RESULT = b; :} | ELIF:el expr:cond COLON block:b else_body:elb {: RESULT = single(new IfStmt(elxleft, getRight(elb), cond, b, elb)); :} | {: RESULT = empty(); :} ; /* simple_stmt */ simple_stmt ::= PASS:p {: RESULT = null; :} | expr:e {: RESULT = new ExprStmt(exleft, exright, e); :} | RETURN:r expr:e {: RESULT = new ReturnStmt(rxleft, exright, e); :} | RETURN {: RESULT = null; :} | opt_target:ot expr:e {: RESULT = new AssignStmt(getLeft(ot), exright, ot, e); :} ; opt_target ::= opt_target:ot target:t ASSIGN {: RESULT = combine(ot, t); :} | target:t ASSIGN {: RESULT = single(t); :} ; /* block */ block ::= NEWLINE INDENT stmt_list:sl DEDENT {: RESULT = sl; :}; /* literal */ literal ::= NONE:n {: RESULT = new NoneLiteral(nxleft, nxright); :} | BOOL:b {: RESULT = new BooleanLiteral(bxleft, bxright, b); :} | NUMBER:n {: RESULT = new IntegerLiteral(nxleft, nxright, n); :} | STRING:s {: RESULT = new StringLiteral(sxleft, sxright, s); :} ; /* expr */ expr ::= pexpr:ce {: RESULT = ce; :} | NOT:n expr:exp {: RESULT = new UnaryExpr(nxleft, expxright, n, exp); :} | expr:e1 AND:a expr:e2 {: RESULT = new BinaryExpr(e1xleft, e2xright, e1, a, e2); :} | expr:e1 OR:o expr:e2 {: RESULT = new BinaryExpr(e1xleft, e2xright, e1, o, e2); :} | expr:e1 IF expr:e2 ELSE expr:e3 {: RESULT = new IfExpr(e1xleft, e3xright, e2, e1, e3); :} ; /* cexpr */ /* cexpr ::= pexpr:pe {: RESULT = new Expr(pexleft, pexright); :} | cmp_pexpr:cp {: RESULT = new Expr(cpxleft, cpxright); :} ; cmp_pexpr ::= pexpr:p comp_op:co cmp_pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, co, p2); :} | pexpr:p {: RESULT = new Expr(pxleft, pxright); :} ; */ /* pexpr */ pexpr ::= identifier:id {: RESULT = id; :} | literal:l {: RESULT = l; :} | LBR:lbr expr_list:l RBR:rbr {: RESULT = new ListExpr(lbrxleft, rbrxright, l); :} | LPAR:lpar expr:e RPAR:rpar {: RESULT = e; :} | member_expr:m {: RESULT = m; :} | index_expr:i {: RESULT = i; :} | member_expr:m LPAR expr_list:l RPAR:rpar {: RESULT = new MethodCallExpr(mxleft, rparxright, m, l); :} | identifier:id LPAR expr_list:l RPAR:rpar {: RESULT = new CallExpr(idxleft, rparxright, id, l); :} | pexpr:p1 bin_op:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo.value, p2); :} | MINUS:m pexpr:p {: RESULT = new UnaryExpr(mxleft, pxright, m, p); :} | pexpr:p1 comp_op:co pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, co.value, p2); :} ; expr_list ::= expr:e {: RESULT = single(e); :} | expr_list:el COMMA expr:e {: RESULT = combine(el, e); :} | {: RESULT = null; :} ; /* bin_op */ bin_op ::= PLUS:a {: RESULT = new StringLiteral(axleft, axright, "+"); :} | MINUS:a {: RESULT = new StringLiteral(axleft, axright, "-"); :} | MUL:a {: RESULT = new StringLiteral(axleft, axright, "*"); :} | DIV:a {: RESULT = new StringLiteral(axleft, axright, "/"); :} | MOD:a {: RESULT = new StringLiteral(axleft, axright, "%"); :} ; /* comp_op */ comp_op ::= EQUAL:a {: RESULT = new StringLiteral(axleft, axright, "=="); :} | NEQ:a {: RESULT = new StringLiteral(axleft, axright, "!="); :} | LEQ:a {: RESULT = new StringLiteral(axleft, axright, "<="); :} | GEQ:a {: RESULT = new StringLiteral(axleft, axright, ">="); :} | LT:a {: RESULT = new StringLiteral(axleft, axright, "<"); :} | GT:a {: RESULT = new StringLiteral(axleft, axright, ">"); :} | IS:a {: RESULT = new StringLiteral(axleft, axright, "is"); :} ; /* member_expr */ member_expr ::= pexpr:p DOT identifier:id {: RESULT = new MemberExpr(pxleft, idxright, p, id); :} ; /* index_expr */ index_expr ::= pexpr:p LBR expr:e RBR:rbr {: RESULT = new IndexExpr(pxleft, rbrxright, p, e); :} ; /* target */ target ::= identifier:id {: RESULT = id; :} | member_expr:m {: RESULT = m; :} | index_expr:i {: RESULT = i; :} ; /* Extras - rules below have not been given in language reference, we have them to ease implementation */ identifier ::= ID:idStr {: RESULT = new Identifier(idStrxleft, idStrxright, idStr); :}; stmt_list ::= stmt:s {: RESULT = single(s); :} | stmt_list:l stmt:s {: RESULT = combine(l, s); :} | stmt_list:l error {: RESULT = l; :} /* If there is a syntax error in the source, this says to discard * symbols from the parsing stack and perform reductions until * there is a stmt_list on top of the stack, and then to discard * input symbols until it is possible to shift again, reporting * a syntax error. */ ;