|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.List;
|
|
|
|
|
|
|
|
import java_cup.runtime.*;
|
|
|
|
import chocopy.common.astnodes.*;
|
|
|
|
|
|
|
|
/* The following code section is copied verbatim to the generated
|
|
|
|
* parser class. */
|
|
|
|
parser code {:
|
|
|
|
|
|
|
|
/* The following fields and methods deal with error reporting
|
|
|
|
* Avoid changing these unless you know what you are doing. */
|
|
|
|
|
|
|
|
/** Node that accumulates error messages to be added to the Program
|
|
|
|
* node produced as a result. */
|
|
|
|
public final Errors errors = new Errors(new ArrayList<>());
|
|
|
|
|
|
|
|
/** Return the Program node that results from parsing the stream of
|
|
|
|
* tokens produced by lexical analysis. In the case of syntax errors,
|
|
|
|
* the program may be empty, but will have error messages. */
|
|
|
|
public Program parseProgram(boolean debug) {
|
|
|
|
try {
|
|
|
|
Symbol result = debug ? debug_parse() : parse();
|
|
|
|
if (result == null || !(result.value instanceof Program)) {
|
|
|
|
return new Program(new Location(0, 0), new Location(0, 0),
|
|
|
|
new ArrayList<Declaration>(),
|
|
|
|
new ArrayList<Stmt>(),
|
|
|
|
errors);
|
|
|
|
} else {
|
|
|
|
return (Program) result.value;
|
|
|
|
}
|
|
|
|
} catch (RuntimeException excp) {
|
|
|
|
throw excp;
|
|
|
|
} catch (Exception excp) {
|
|
|
|
String msg =
|
|
|
|
String.format("Internal parser error detected: %s%n", excp);
|
|
|
|
throw new AssertionError(msg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public SymbolFactory getSymbolFactory() {
|
|
|
|
return ((ChocoPyLexer) getScanner()).symbolFactory;
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public void syntax_error(Symbol cur_token) {
|
|
|
|
String token = symbl_name_from_id(cur_token.sym);
|
|
|
|
String text = ((ChocoPyLexer) getScanner()).yytext();
|
|
|
|
errors.syntaxError(
|
|
|
|
((ComplexSymbolFactory.ComplexSymbol) cur_token).xleft,
|
|
|
|
((ComplexSymbolFactory.ComplexSymbol) cur_token).xright,
|
|
|
|
"Parse error near token %s: %s", token, text);
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public void unrecovered_syntax_error(Symbol cur_token) {
|
|
|
|
/* Do not die */
|
|
|
|
}
|
|
|
|
:}
|
|
|
|
|
|
|
|
|
|
|
|
/**************************************************************************
|
|
|
|
* FEEL FREE TO MODIFY ANYTHING BELOW THIS LINE
|
|
|
|
*
|
|
|
|
* The rules provided below parse expressions of the form <INT> + <INT> + ...
|
|
|
|
* You can re-use these rules or edit them as you wish. The start rule
|
|
|
|
* should return a node of type Program.
|
|
|
|
*
|
|
|
|
* Tips: Production rules are usually followed by action code that will be
|
|
|
|
* copied to the generated parser to be executed immediately after a reduce
|
|
|
|
* operation; that is, when a production rule has been matched. You can name
|
|
|
|
* a nonterminal or terminal symbol in a production rule using the colon
|
|
|
|
* notation, e.g. expr_stmt ::= expr:e, to get the AST node for the matched
|
|
|
|
* expression. In the action code, `e` will be a variable of whatever type
|
|
|
|
* has been declared for the corresponding nonterminal, such as `Expr`.
|
|
|
|
* Therefore, you can construct an AST Node of type `ExprStmt` with `e` in the
|
|
|
|
* constructor: `new ExprStmt(exleft, exright, e)`
|
|
|
|
*
|
|
|
|
* The variables `exleft` and `exright` are automatically generated by CUP
|
|
|
|
* and contain Location objects for the start and end of the expression `e`.
|
|
|
|
* You can collect start and line number info for AST nodes by taking the
|
|
|
|
* location of the left end of the leftmost symbol in a rule and the
|
|
|
|
* location of the right end of the rightmost symbol. The auto-generated
|
|
|
|
* variables have names `<sym>xleft` and `<sym>xright`, where <sym> is the
|
|
|
|
* name given to the symbol using the colon notation.
|
|
|
|
*
|
|
|
|
* When you have nonterminals that are lists of things, e.g. List<Stmt> or
|
|
|
|
* List<Declaration>, it is helpful to get the leftmost and rightmost
|
|
|
|
* source location from within this list; we have provided some utility
|
|
|
|
* functions below to do just that.
|
|
|
|
**************************************************************************/
|
|
|
|
|
|
|
|
|
|
|
|
/* The following code section is copied verbatim to the class that performs
|
|
|
|
* production-rule actions. */
|
|
|
|
action code {:
|
|
|
|
|
|
|
|
/** Return a mutable list initially containing the single value ITEM. */
|
|
|
|
<T> List<T> single(T item) {
|
|
|
|
List<T> list = new ArrayList<>();
|
|
|
|
if (item != null) {
|
|
|
|
list.add(item);
|
|
|
|
}
|
|
|
|
return list;
|
|
|
|
}
|
|
|
|
|
|
|
|
/** If ITEM is non-null, appends it to the end of LIST. Then returns
|
|
|
|
* LIST. */
|
|
|
|
<T> List<T> combine(List<T> list, T item) {
|
|
|
|
if (item != null) {
|
|
|
|
list.add(item);
|
|
|
|
}
|
|
|
|
return list;
|
|
|
|
}
|
|
|
|
|
|
|
|
/** Return a mutable empty list. */
|
|
|
|
<T> List<T> empty() {
|
|
|
|
return new ArrayList<T>();
|
|
|
|
}
|
|
|
|
|
|
|
|
/** Return the leftmost non-whitespace location in NODES, or null if NODES
|
|
|
|
* is empty. Assumes that the nodes of NODES are ordered in increasing
|
|
|
|
* order of location, from left to right. */
|
|
|
|
ComplexSymbolFactory.Location getLeft(List<? extends Node> nodes) {
|
|
|
|
if (nodes.isEmpty()) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
Node first = nodes.get(0);
|
|
|
|
return new ComplexSymbolFactory.Location(first.getLocation()[0],
|
|
|
|
first.getLocation()[1]);
|
|
|
|
}
|
|
|
|
|
|
|
|
:}
|
|
|
|
|
|
|
|
/* Terminal symbols (tokens returned by the lexer). The declaration
|
|
|
|
* terminal <identifier1>, <identifier2>, ...;
|
|
|
|
* declares each <identifieri> as the denotation of a distinct type terminal
|
|
|
|
* symbol for use in the grammar. The declaration
|
|
|
|
* terminal <type> <identifier1>, ...;
|
|
|
|
* does the same, and in addition indicates that the lexer supplies a
|
|
|
|
* semantic value of type <type> for these symbols that may be referenced
|
|
|
|
* in actions ( {: ... :} ).
|
|
|
|
*/
|
|
|
|
terminal INDENT;
|
|
|
|
terminal DEDENT;
|
|
|
|
terminal String ID;
|
|
|
|
terminal String STRING;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Terminal Delimiters */
|
|
|
|
terminal NEWLINE;
|
|
|
|
terminal String COLON;
|
|
|
|
terminal String COMMA;
|
|
|
|
|
|
|
|
/* Terminal Literals */
|
|
|
|
terminal Integer NUMBER;
|
|
|
|
terminal Boolean BOOL;
|
|
|
|
terminal String NONE;
|
|
|
|
|
|
|
|
/* Terminal Keywords */
|
|
|
|
terminal String IF;
|
|
|
|
terminal String ELSE;
|
|
|
|
terminal String ELIF;
|
|
|
|
terminal String WHILE;
|
|
|
|
terminal String CLASS;
|
|
|
|
terminal String DEF;
|
|
|
|
terminal String LAMBDA;
|
|
|
|
terminal String AS;
|
|
|
|
terminal String FOR;
|
|
|
|
terminal String GLOBAL;
|
|
|
|
terminal String IN;
|
|
|
|
terminal String NONLOCAL;
|
|
|
|
terminal String PASS;
|
|
|
|
terminal String RETURN;
|
|
|
|
terminal String ASSERT;
|
|
|
|
terminal String AWAIT;
|
|
|
|
terminal String BREAK;
|
|
|
|
terminal String CONTINUE;
|
|
|
|
terminal String DEL;
|
|
|
|
terminal String EXCEPT;
|
|
|
|
terminal String FINALLY;
|
|
|
|
terminal String FROM;
|
|
|
|
terminal String IMPORT;
|
|
|
|
terminal String RAISE;
|
|
|
|
terminal String TRY;
|
|
|
|
terminal String WITH;
|
|
|
|
terminal String YIELD;
|
|
|
|
|
|
|
|
|
|
|
|
/* Terminal Operators */
|
|
|
|
terminal String PLUS;
|
|
|
|
terminal String MINUS;
|
|
|
|
terminal String MUL;
|
|
|
|
terminal String DIV;
|
|
|
|
terminal String MOD;
|
|
|
|
terminal String GT;
|
|
|
|
terminal String LT;
|
|
|
|
terminal String EQUAL;
|
|
|
|
terminal String NEQ;
|
|
|
|
terminal String GEQ;
|
|
|
|
terminal String LEQ;
|
|
|
|
terminal String ASSIGN;
|
|
|
|
terminal String AND;
|
|
|
|
terminal String OR;
|
|
|
|
terminal String NOT;
|
|
|
|
terminal String DOT;
|
|
|
|
terminal String LPAR;
|
|
|
|
terminal String RPAR;
|
|
|
|
terminal String LBR;
|
|
|
|
terminal String RBR;
|
|
|
|
terminal String ARROW;
|
|
|
|
terminal String IS;
|
|
|
|
|
|
|
|
/* Returned by the lexer for erroneous tokens. Since it does not appear in
|
|
|
|
* the grammar, it indicates a syntax error. */
|
|
|
|
terminal UNRECOGNIZED;
|
|
|
|
|
|
|
|
/* Nonterminal symbols (defined in production rules below).
|
|
|
|
* As for terminal symbols,
|
|
|
|
* non terminal <type> <identifier1>, ..., <identifiern>;
|
|
|
|
* defines the listed nonterminal identifier symbols to have semantic values
|
|
|
|
* of type <type>. */
|
|
|
|
non terminal Program program;
|
|
|
|
non terminal List<Declaration> program_head;
|
|
|
|
non terminal List<Stmt> stmt_list, opt_stmt_list;
|
|
|
|
non terminal Stmt stmt, expr_stmt;
|
|
|
|
non terminal Expr expr, binary_expr;
|
|
|
|
|
|
|
|
/* Precedences (lowest to highest) for resolving what would otherwise be
|
|
|
|
* ambiguities in the form of shift/reduce conflicts.. */
|
|
|
|
precedence left OR;
|
|
|
|
precedence left AND;
|
|
|
|
precedence left NOT;
|
|
|
|
precedence nonassoc EQUAL, NEQ, LT, GT, LEQ, GEQ, IS;
|
|
|
|
precedence left PLUS, MINUS;
|
|
|
|
precedence left MUL, DIV, MOD;
|
|
|
|
precedence left DOT, COMMA, LBR, RBR;
|
|
|
|
|
|
|
|
/* The start symbol. */
|
|
|
|
start with program;
|
|
|
|
|
|
|
|
|
|
|
|
/***** GRAMMAR RULES *****/
|
|
|
|
|
|
|
|
program ::= program_head:d opt_stmt_list:s
|
|
|
|
{: RESULT = new Program(d.isEmpty() ? getLeft(s) : getLeft(d),
|
|
|
|
sxright, d, s, errors);
|
|
|
|
:}
|
|
|
|
;
|
|
|
|
|
|
|
|
/* Initial list of declarations. */
|
|
|
|
program_head ::= /* not implemented; currently matches empty string */
|
|
|
|
{: RESULT = empty(); :}
|
|
|
|
;
|
|
|
|
|
|
|
|
opt_stmt_list ::= {: RESULT = empty(); :}
|
|
|
|
| stmt_list:s {: RESULT = s; :}
|
|
|
|
;
|
|
|
|
|
|
|
|
stmt_list ::= stmt:s {: RESULT = single(s); :}
|
|
|
|
| stmt_list:l stmt:s {: RESULT = combine(l, s); :}
|
|
|
|
| stmt_list:l error {: RESULT = l; :}
|
|
|
|
/* If there is a syntax error in the source, this says to discard
|
|
|
|
* symbols from the parsing stack and perform reductions until
|
|
|
|
* there is a stmt_list on top of the stack, and then to discard
|
|
|
|
* input symbols until it is possible to shift again, reporting
|
|
|
|
* a syntax error. */
|
|
|
|
;
|
|
|
|
|
|
|
|
stmt ::= expr_stmt:s NEWLINE {: RESULT = s; :}
|
|
|
|
;
|
|
|
|
|
|
|
|
expr_stmt ::= expr:e {: RESULT = new ExprStmt(exleft, exright, e); :}
|
|
|
|
;
|
|
|
|
|
|
|
|
expr ::= binary_expr:e {: RESULT = e; :}
|
|
|
|
| NUMBER:n {: RESULT = new IntegerLiteral(nxleft, nxright, n); :}
|
|
|
|
;
|
|
|
|
|
|
|
|
|
|
|
|
/* A binary expression, illustrating how to find the left and right
|
|
|
|
* source position of a phrase. */
|
|
|
|
binary_expr ::= expr:e1 PLUS:op expr:e2
|
|
|
|
{: RESULT = new BinaryExpr(e1xleft, e2xright,
|
|
|
|
e1, op, e2); :}
|
|
|
|
;
|