ChocoPy/src/main/cup/chocopy/pa1/ChocoPy.cup

import java.util.ArrayList;
import java.util.List;

import java_cup.runtime.*;
import chocopy.common.astnodes.*;

/* The following code section is copied verbatim to the generated
 * parser class. */
parser code {:

    /* The following fields and methods deal with error reporting
     * Avoid changing these unless you know what you are doing. */

    /** Node that accumulates error messages to be added to the Program
     *  node produced as a result. */
    public final Errors errors = new Errors(new ArrayList<>());

    /** Return the Program node that results from parsing the stream of
     *  tokens produced by lexical analysis.  In the case of syntax errors,
     *  the program may be empty, but will have error messages. */
    public Program parseProgram(boolean debug) {
        try {
            Symbol result = debug ? debug_parse() : parse();
            if (result == null || !(result.value instanceof Program)) {
                return new Program(new Location(0, 0), new Location(0, 0),
                                   new ArrayList<Declaration>(),
                                   new ArrayList<Stmt>(),
                                   errors);
            } else {
                return (Program) result.value;
            }
        } catch (RuntimeException excp) {
            throw excp;
        } catch (Exception excp) {
            String msg =
                String.format("Internal parser error detected: %s%n", excp);
            throw new AssertionError(msg);
        }
    }

    @Override
    public SymbolFactory getSymbolFactory() {
        return ((ChocoPyLexer) getScanner()).symbolFactory;
    }

    @Override
    public void syntax_error(Symbol cur_token) {
                String token = symbl_name_from_id(cur_token.sym);
                String text = ((ChocoPyLexer) getScanner()).yytext();
                errors.syntaxError(
                    ((ComplexSymbolFactory.ComplexSymbol) cur_token).xleft,
                    ((ComplexSymbolFactory.ComplexSymbol) cur_token).xright,
                    "Parse error near token %s: %s", token, text);
    }

    @Override
    public void unrecovered_syntax_error(Symbol cur_token) {
        /* Do not die */
    }
:}


/**************************************************************************
 *              FEEL FREE TO MODIFY ANYTHING BELOW THIS LINE              
 *
 * The rules provided below parse expressions of the form <INT> + <INT> + ... 
 * You can re-use these rules or edit them as you wish. The start rule
 * should return a node of type Program.
 *
 * Tips: Production rules are usually followed by action code that will be
 * copied to the generated parser to be executed immediately after a reduce
 * operation; that is, when a production rule has been matched. You can name
 * a nonterminal or terminal symbol in a production rule using the colon
 * notation, e.g. expr_stmt ::= expr:e, to get the AST node for the matched
 * expression. In the action code, `e` will be a variable of whatever type
 * has been declared for the corresponding nonterminal, such as `Expr`.
 * Therefore, you can construct an AST Node of type `ExprStmt` with `e` in the
 * constructor: `new ExprStmt(exleft, exright, e)`
 *
 * The variables `exleft` and `exright` are automatically generated by CUP
 * and contain Location objects for the start and end of the expression `e`.
 * You can collect start and line number info for AST nodes by taking the
 * location of the left end of the leftmost symbol in a rule and the
 * location of the right end of the rightmost symbol. The auto-generated
 * variables have names `<sym>xleft` and `<sym>xright`, where <sym> is the
 * name given to the symbol using the colon notation.
 *
 * When you have nonterminals that are lists of things, e.g. List<Stmt> or
 * List<Declaration>, it is helpful to get the leftmost and rightmost
 * source location from within this list; we have provided some utility
 * functions below to do just that.
 **************************************************************************/


/* The following code section is copied verbatim to the class that performs
 * production-rule actions. */
action code {:

    /** Return a mutable list initially containing the single value ITEM. */
    <T> List<T> single(T item) {
        List<T> list = new ArrayList<>();
        if (item != null) {
            list.add(item);
        }
        return list;
    }

    /** If ITEM is non-null, appends it to the end of LIST.  Then returns
     *  LIST. */
    <T> List<T> combine(List<T> list, T item) {
        if (item != null) {
            list.add(item);
        }
        return list;
    }

    /** Return a mutable empty list. */
    <T> List<T> empty() {
        return new ArrayList<T>();
    }

    /** Return the leftmost non-whitespace location in NODES, or null if NODES
     *  is empty.  Assumes that the nodes of NODES are ordered in increasing
     *  order of location, from left to right. */
    ComplexSymbolFactory.Location getLeft(List<? extends Node> nodes) {
        if (nodes.isEmpty()) {
            return null;
        }
        Node first = nodes.get(0);
        return new ComplexSymbolFactory.Location(first.getLocation()[0],
                                                 first.getLocation()[1]);
    }

:}

/* Terminal symbols (tokens returned by the lexer).  The declaration
 *     terminal <identifier1>, <identifier2>, ...;
 * declares each <identifieri> as the denotation of a distinct type terminal
 * symbol for use in the grammar.  The declaration
 *     terminal <type> <identifier1>, ...;
 * does the same, and in addition indicates that the lexer supplies a
 * semantic value of type <type> for these symbols that may be referenced
 * in actions ( {: ... :} ).
 */
terminal INDENT;
terminal DEDENT;
terminal String ID;
terminal String STRING;


/* Terminal Delimiters */
terminal NEWLINE;
terminal String COLON;
terminal String COMMA;

/* Terminal Literals */
terminal Integer NUMBER;
terminal Boolean BOOL;
terminal String NONE;

/* Terminal Keywords */
terminal String IF;
terminal String ELSE;
terminal String ELIF;
terminal String WHILE;
terminal String CLASS;
terminal String DEF;
terminal String LAMBDA;
terminal String AS;
terminal String FOR;
terminal String GLOBAL;
terminal String IN;
terminal String NONLOCAL;
terminal String PASS;
terminal String RETURN;
terminal String ASSERT;
terminal String AWAIT;
terminal String BREAK;
terminal String CONTINUE;
terminal String DEL;
terminal String EXCEPT;
terminal String FINALLY;
terminal String FROM;
terminal String IMPORT;
terminal String RAISE;
terminal String TRY;
terminal String WITH;
terminal String YIELD;


/* Terminal Operators */
terminal String PLUS;
terminal String MINUS;
terminal String MUL;
terminal String DIV;
terminal String MOD;
terminal String GT;
terminal String LT;
terminal String EQUAL;
terminal String NEQ;
terminal String GEQ;
terminal String LEQ;
terminal String ASSIGN;
terminal String AND;
terminal String OR;
terminal String NOT;
terminal String DOT;
terminal String LPAR;
terminal String RPAR;
terminal String LBR;
terminal String RBR;
terminal String ARROW;
terminal String IS;

/* Returned by the lexer for erroneous tokens.  Since it does not appear in
 * the grammar, it indicates a syntax error. */
terminal UNRECOGNIZED;   

/* Nonterminal symbols (defined in production rules below).
 * As for terminal symbols, 
 *     non terminal <type> <identifier1>, ..., <identifiern>; 
 * defines the listed nonterminal identifier symbols to have semantic values
 * of type <type>. */
non terminal Program           program;
non terminal List<Declaration> program_head;
non terminal List<Stmt>        stmt_list, opt_stmt_list;
non terminal Stmt              stmt, expr_stmt;
non terminal Expr              expr, binary_expr;

/* Precedences (lowest to highest) for resolving what would otherwise be
 * ambiguities in the form of shift/reduce conflicts.. */
precedence left OR;
precedence left AND;
precedence left NOT;
precedence nonassoc EQUAL, NEQ, LT, GT, LEQ, GEQ, IS;
precedence left PLUS, MINUS;
precedence left MUL, DIV, MOD;
precedence left DOT, COMMA, LBR, RBR;

/* The start symbol. */
start with program;


/*****  GRAMMAR RULES *****/

program ::= program_head:d opt_stmt_list:s
        {: RESULT = new Program(d.isEmpty() ? getLeft(s) : getLeft(d),
                                sxright, d, s, errors);
        :}
        ;

/* Initial list of declarations. */
program_head ::= /* not implemented; currently matches empty string */
                                     {: RESULT = empty(); :}
                ;

opt_stmt_list ::=                    {: RESULT = empty(); :}
                | stmt_list:s        {: RESULT = s; :}
                ;

stmt_list ::= stmt:s                 {: RESULT = single(s); :}
            | stmt_list:l stmt:s     {: RESULT = combine(l, s); :}
            | stmt_list:l error      {: RESULT = l; :}
            /* If there is a syntax error in the source, this says to discard
             * symbols from the parsing stack and perform reductions until
             * there is a stmt_list on top of the stack, and then to discard
             * input symbols until it is possible to shift again, reporting
             * a syntax error. */
            ;

stmt ::= expr_stmt:s NEWLINE {: RESULT = s; :}
              ;

expr_stmt ::= expr:e    {: RESULT = new ExprStmt(exleft, exright, e); :}
            ;

expr ::= binary_expr:e  {: RESULT = e; :}
       | NUMBER:n       {: RESULT = new IntegerLiteral(nxleft, nxright, n); :}
       ;


/* A binary expression, illustrating how to find the left and right
 * source position of a phrase. */
binary_expr ::= expr:e1 PLUS:op expr:e2
                        {: RESULT = new BinaryExpr(e1xleft, e2xright,
                                                   e1, op, e2); :}
              ;
Initial commit 4 years ago			`import java.util.ArrayList;`
			`import java.util.List;`

			`import java_cup.runtime.*;`
			`import chocopy.common.astnodes.*;`

			`/* The following code section is copied verbatim to the generated`
			`* parser class. */`
			`parser code {:`

			`/* The following fields and methods deal with error reporting`
			`* Avoid changing these unless you know what you are doing. */`

			`/** Node that accumulates error messages to be added to the Program`
			`* node produced as a result. */`
			`public final Errors errors = new Errors(new ArrayList<>());`

			`/** Return the Program node that results from parsing the stream of`
			`* tokens produced by lexical analysis. In the case of syntax errors,`
			`* the program may be empty, but will have error messages. */`
			`public Program parseProgram(boolean debug) {`
			`try {`
			`Symbol result = debug ? debug_parse() : parse();`
			`if (result == null \|\| !(result.value instanceof Program)) {`
			`return new Program(new Location(0, 0), new Location(0, 0),`
			`new ArrayList<Declaration>(),`
			`new ArrayList<Stmt>(),`
			`errors);`
			`} else {`
			`return (Program) result.value;`
			`}`
			`} catch (RuntimeException excp) {`
			`throw excp;`
			`} catch (Exception excp) {`
			`String msg =`
			`String.format("Internal parser error detected: %s%n", excp);`
			`throw new AssertionError(msg);`
			`}`
			`}`

			`@Override`
			`public SymbolFactory getSymbolFactory() {`
			`return ((ChocoPyLexer) getScanner()).symbolFactory;`
			`}`

			`@Override`
			`public void syntax_error(Symbol cur_token) {`
			`String token = symbl_name_from_id(cur_token.sym);`
			`String text = ((ChocoPyLexer) getScanner()).yytext();`
			`errors.syntaxError(`
			`((ComplexSymbolFactory.ComplexSymbol) cur_token).xleft,`
			`((ComplexSymbolFactory.ComplexSymbol) cur_token).xright,`
			`"Parse error near token %s: %s", token, text);`
			`}`

			`@Override`
			`public void unrecovered_syntax_error(Symbol cur_token) {`
			`/* Do not die */`
			`}`
			`:}`


			`/**************************************************************************`
			`* FEEL FREE TO MODIFY ANYTHING BELOW THIS LINE`
			`*`
			`* The rules provided below parse expressions of the form <INT> + <INT> + ...`
			`* You can re-use these rules or edit them as you wish. The start rule`
			`* should return a node of type Program.`
			`*`
			`* Tips: Production rules are usually followed by action code that will be`
			`* copied to the generated parser to be executed immediately after a reduce`
			`* operation; that is, when a production rule has been matched. You can name`
			`* a nonterminal or terminal symbol in a production rule using the colon`
			`* notation, e.g. expr_stmt ::= expr:e, to get the AST node for the matched`
			* expression. In the action code, `e` will be a variable of whatever type
			* has been declared for the corresponding nonterminal, such as `Expr`.
			* Therefore, you can construct an AST Node of type `ExprStmt` with `e` in the
			* constructor: `new ExprStmt(exleft, exright, e)`
			`*`
			* The variables `exleft` and `exright` are automatically generated by CUP
			* and contain Location objects for the start and end of the expression `e`.
			`* You can collect start and line number info for AST nodes by taking the`
			`* location of the left end of the leftmost symbol in a rule and the`
			`* location of the right end of the rightmost symbol. The auto-generated`
			* variables have names `<sym>xleft` and `<sym>xright`, where <sym> is the
			`* name given to the symbol using the colon notation.`
			`*`
			`* When you have nonterminals that are lists of things, e.g. List<Stmt> or`
			`* List<Declaration>, it is helpful to get the leftmost and rightmost`
			`* source location from within this list; we have provided some utility`
			`* functions below to do just that.`
			`**************************************************************************/`


			`/* The following code section is copied verbatim to the class that performs`
			`* production-rule actions. */`
			`action code {:`

			`/** Return a mutable list initially containing the single value ITEM. */`
			`<T> List<T> single(T item) {`
			`List<T> list = new ArrayList<>();`
			`if (item != null) {`
			`list.add(item);`
			`}`
			`return list;`
			`}`

			`/** If ITEM is non-null, appends it to the end of LIST. Then returns`
			`* LIST. */`
			`<T> List<T> combine(List<T> list, T item) {`
			`if (item != null) {`
			`list.add(item);`
			`}`
			`return list;`
			`}`

			`/** Return a mutable empty list. */`
			`<T> List<T> empty() {`
			`return new ArrayList<T>();`
			`}`

			`/** Return the leftmost non-whitespace location in NODES, or null if NODES`
			`* is empty. Assumes that the nodes of NODES are ordered in increasing`
			`* order of location, from left to right. */`
			`ComplexSymbolFactory.Location getLeft(List<? extends Node> nodes) {`
			`if (nodes.isEmpty()) {`
			`return null;`
			`}`
			`Node first = nodes.get(0);`
			`return new ComplexSymbolFactory.Location(first.getLocation()[0],`
			`first.getLocation()[1]);`
			`}`

			`:}`

			`/* Terminal symbols (tokens returned by the lexer). The declaration`
			`* terminal <identifier1>, <identifier2>, ...;`
			`* declares each <identifieri> as the denotation of a distinct type terminal`
			`* symbol for use in the grammar. The declaration`
			`* terminal <type> <identifier1>, ...;`
			`* does the same, and in addition indicates that the lexer supplies a`
			`* semantic value of type <type> for these symbols that may be referenced`
			`* in actions ( {: ... :} ).`
			`*/`
Added terminals to parser 4 years ago			`terminal INDENT;`
			`terminal DEDENT;`
			`terminal String ID;`
			`terminal String STRING;`



			`/* Terminal Delimiters */`
Initial commit 4 years ago			`terminal NEWLINE;`
Added terminals to parser 4 years ago			`terminal String COLON;`
			`terminal String COMMA;`

			`/* Terminal Literals */`
			`terminal Integer NUMBER;`
			`terminal Boolean BOOL;`
			`terminal String NONE;`

			`/* Terminal Keywords */`
			`terminal String IF;`
			`terminal String ELSE;`
			`terminal String ELIF;`
			`terminal String WHILE;`
			`terminal String CLASS;`
			`terminal String DEF;`
			`terminal String LAMBDA;`
			`terminal String AS;`
			`terminal String FOR;`
			`terminal String GLOBAL;`
			`terminal String IN;`
			`terminal String NONLOCAL;`
			`terminal String PASS;`
			`terminal String RETURN;`
			`terminal String ASSERT;`
			`terminal String AWAIT;`
			`terminal String BREAK;`
			`terminal String CONTINUE;`
			`terminal String DEL;`
			`terminal String EXCEPT;`
			`terminal String FINALLY;`
			`terminal String FROM;`
			`terminal String IMPORT;`
			`terminal String RAISE;`
			`terminal String TRY;`
			`terminal String WITH;`
			`terminal String YIELD;`


			`/* Terminal Operators */`
			`terminal String PLUS;`
			`terminal String MINUS;`
			`terminal String MUL;`
			`terminal String DIV;`
			`terminal String MOD;`
			`terminal String GT;`
			`terminal String LT;`
			`terminal String EQUAL;`
			`terminal String NEQ;`
			`terminal String GEQ;`
			`terminal String LEQ;`
			`terminal String ASSIGN;`
			`terminal String AND;`
			`terminal String OR;`
			`terminal String NOT;`
			`terminal String DOT;`
			`terminal String LPAR;`
			`terminal String RPAR;`
			`terminal String LBR;`
			`terminal String RBR;`
			`terminal String ARROW;`
			`terminal String IS;`

Initial commit 4 years ago			`/* Returned by the lexer for erroneous tokens. Since it does not appear in`
			`* the grammar, it indicates a syntax error. */`
			`terminal UNRECOGNIZED;`

			`/* Nonterminal symbols (defined in production rules below).`
			`* As for terminal symbols,`
			`* non terminal <type> <identifier1>, ..., <identifiern>;`
			`* defines the listed nonterminal identifier symbols to have semantic values`
			`* of type <type>. */`
			`non terminal Program program;`
			`non terminal List<Declaration> program_head;`
			`non terminal List<Stmt> stmt_list, opt_stmt_list;`
			`non terminal Stmt stmt, expr_stmt;`
			`non terminal Expr expr, binary_expr;`

			`/* Precedences (lowest to highest) for resolving what would otherwise be`
			`* ambiguities in the form of shift/reduce conflicts.. */`
Added precedences 4 years ago			`precedence left OR;`
			`precedence left AND;`
			`precedence left NOT;`
			`precedence nonassoc EQUAL, NEQ, LT, GT, LEQ, GEQ, IS;`
			`precedence left PLUS, MINUS;`
			`precedence left MUL, DIV, MOD;`
			`precedence left DOT, COMMA, LBR, RBR;`
Initial commit 4 years ago
			`/* The start symbol. */`
			`start with program;`


			`/*** GRAMMAR RULES ***/`

			`program ::= program_head:d opt_stmt_list:s`
			`{: RESULT = new Program(d.isEmpty() ? getLeft(s) : getLeft(d),`
			`sxright, d, s, errors);`
			`:}`
			`;`

			`/* Initial list of declarations. */`
			`program_head ::= /* not implemented; currently matches empty string */`
			`{: RESULT = empty(); :}`
			`;`

			`opt_stmt_list ::= {: RESULT = empty(); :}`
			`\| stmt_list:s {: RESULT = s; :}`
			`;`

			`stmt_list ::= stmt:s {: RESULT = single(s); :}`
			`\| stmt_list:l stmt:s {: RESULT = combine(l, s); :}`
			`\| stmt_list:l error {: RESULT = l; :}`
			`/* If there is a syntax error in the source, this says to discard`
			`* symbols from the parsing stack and perform reductions until`
			`* there is a stmt_list on top of the stack, and then to discard`
			`* input symbols until it is possible to shift again, reporting`
			`* a syntax error. */`
			`;`

			`stmt ::= expr_stmt:s NEWLINE {: RESULT = s; :}`
			`;`

			`expr_stmt ::= expr:e {: RESULT = new ExprStmt(exleft, exright, e); :}`
			`;`

			`expr ::= binary_expr:e {: RESULT = e; :}`
			`\| NUMBER:n {: RESULT = new IntegerLiteral(nxleft, nxright, n); :}`
			`;`


			`/* A binary expression, illustrating how to find the left and right`
			`* source position of a phrase. */`
			`binary_expr ::= expr:e1 PLUS:op expr:e2`
			`{: RESULT = new BinaryExpr(e1xleft, e2xright,`
			`e1, op, e2); :}`
			`;`