You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ChocoPy/src/main/cup/chocopy/pa1/ChocoPy.cup

492 lines
21 KiB

4 years ago
import java.util.ArrayList;
import java.util.List;
import java_cup.runtime.*;
import chocopy.common.astnodes.*;
/* The following code section is copied verbatim to the generated
* parser class. */
parser code {:
/* The following fields and methods deal with error reporting
* Avoid changing these unless you know what you are doing. */
/** Node that accumulates error messages to be added to the Program
* node produced as a result. */
public final Errors errors = new Errors(new ArrayList<>());
/** Return the Program node that results from parsing the stream of
* tokens produced by lexical analysis. In the case of syntax errors,
* the program may be empty, but will have error messages. */
public Program parseProgram(boolean debug) {
try {
Symbol result = debug ? debug_parse() : parse();
if (result == null || !(result.value instanceof Program)) {
return new Program(new Location(0, 0), new Location(0, 0),
new ArrayList<Declaration>(),
new ArrayList<Stmt>(),
errors);
} else {
return (Program) result.value;
}
} catch (RuntimeException excp) {
throw excp;
} catch (Exception excp) {
String msg =
String.format("Internal parser error detected: %s%n", excp);
throw new AssertionError(msg);
}
}
@Override
public SymbolFactory getSymbolFactory() {
return ((ChocoPyLexer) getScanner()).symbolFactory;
}
@Override
public void syntax_error(Symbol cur_token) {
String token = symbl_name_from_id(cur_token.sym);
String text = ((ChocoPyLexer) getScanner()).yytext();
errors.syntaxError(
((ComplexSymbolFactory.ComplexSymbol) cur_token).xleft,
((ComplexSymbolFactory.ComplexSymbol) cur_token).xright,
"Parse error near token %s: %s", token, text);
}
@Override
public void unrecovered_syntax_error(Symbol cur_token) {
/* Do not die */
}
:}
/**************************************************************************
* FEEL FREE TO MODIFY ANYTHING BELOW THIS LINE
*
* The rules provided below parse expressions of the form <INT> + <INT> + ...
* You can re-use these rules or edit them as you wish. The start rule
* should return a node of type Program.
*
* Tips: Production rules are usually followed by action code that will be
* copied to the generated parser to be executed immediately after a reduce
* operation; that is, when a production rule has been matched. You can name
* a nonterminal or terminal symbol in a production rule using the colon
* notation, e.g. expr_stmt ::= expr:e, to get the AST node for the matched
* expression. In the action code, `e` will be a variable of whatever type
* has been declared for the corresponding nonterminal, such as `Expr`.
* Therefore, you can construct an AST Node of type `ExprStmt` with `e` in the
* constructor: `new ExprStmt(exleft, exright, e)`
*
* The variables `exleft` and `exright` are automatically generated by CUP
* and contain Location objects for the start and end of the expression `e`.
* You can collect start and line number info for AST nodes by taking the
* location of the left end of the leftmost symbol in a rule and the
* location of the right end of the rightmost symbol. The auto-generated
* variables have names `<sym>xleft` and `<sym>xright`, where <sym> is the
* name given to the symbol using the colon notation.
*
* When you have nonterminals that are lists of things, e.g. List<Stmt> or
* List<Declaration>, it is helpful to get the leftmost and rightmost
* source location from within this list; we have provided some utility
* functions below to do just that.
**************************************************************************/
/* The following code section is copied verbatim to the class that performs
* production-rule actions. */
action code {:
/** Return a mutable list initially containing the single value ITEM. */
<T> List<T> single(T item) {
List<T> list = new ArrayList<>();
if (item != null) {
list.add(item);
}
return list;
}
/** If ITEM is non-null, appends it to the end of LIST. Then returns
* LIST. */
<T> List<T> combine(List<T> list, T item) {
if (item != null) {
list.add(item);
}
return list;
}
/** Return a mutable empty list. */
<T> List<T> empty() {
return new ArrayList<T>();
}
/** Return the leftmost non-whitespace location in NODES, or null if NODES
* is empty. Assumes that the nodes of NODES are ordered in increasing
* order of location, from left to right. */
ComplexSymbolFactory.Location getLeft(List<? extends Node> nodes) {
if (nodes.isEmpty()) {
return null;
}
Node first = nodes.get(0);
return new ComplexSymbolFactory.Location(first.getLocation()[0],
first.getLocation()[1]);
}
/** Return the rightmost non-whitespace location in NODES, or null if NODES
* is empty. Assumes that the nodes of NODES are ordered in increasing
* order of location, from left to right. */
ComplexSymbolFactory.Location getRight(List<? extends Node> nodes) {
if (nodes.isEmpty()) {
return null;
}
Node last = nodes.get(nodes.size()-1);
return new ComplexSymbolFactory.Location(last.getLocation()[2],
last.getLocation()[3]);
}
4 years ago
:}
/* Terminal symbols (tokens returned by the lexer). The declaration
* terminal <identifier1>, <identifier2>, ...;
* declares each <identifieri> as the denotation of a distinct type terminal
* symbol for use in the grammar. The declaration
* terminal <type> <identifier1>, ...;
* does the same, and in addition indicates that the lexer supplies a
* semantic value of type <type> for these symbols that may be referenced
* in actions ( {: ... :} ).
*/
terminal INDENT;
terminal DEDENT;
terminal String ID;
terminal String STRING;
/* Terminal Delimiters */
4 years ago
terminal NEWLINE;
terminal String COLON;
terminal String COMMA;
/* Terminal Literals */
terminal Integer NUMBER;
terminal Boolean BOOL;
terminal String NONE;
/* Terminal Keywords */
terminal String IF;
terminal String ELSE;
terminal String ELIF;
terminal String WHILE;
terminal String CLASS;
terminal String DEF;
terminal String LAMBDA;
terminal String AS;
terminal String FOR;
terminal String GLOBAL;
terminal String IN;
terminal String NONLOCAL;
terminal String PASS;
terminal String RETURN;
terminal String ASSERT;
terminal String AWAIT;
terminal String BREAK;
terminal String CONTINUE;
terminal String DEL;
terminal String EXCEPT;
terminal String FINALLY;
terminal String FROM;
terminal String IMPORT;
terminal String RAISE;
terminal String TRY;
terminal String WITH;
terminal String YIELD;
/* Terminal Operators */
terminal String PLUS;
terminal String MINUS;
terminal String MUL;
terminal String DIV;
terminal String MOD;
terminal String GT;
terminal String LT;
terminal String EQUAL;
terminal String NEQ;
terminal String GEQ;
terminal String LEQ;
terminal String ASSIGN;
terminal String AND;
terminal String OR;
terminal String NOT;
terminal String DOT;
terminal String LPAR;
terminal String RPAR;
terminal String LBR;
terminal String RBR;
terminal String ARROW;
terminal String IS;
4 years ago
/* Returned by the lexer for erroneous tokens. Since it does not appear in
* the grammar, it indicates a syntax error. */
terminal UNRECOGNIZED;
/* Nonterminal symbols (defined in production rules below).
* As for terminal symbols,
* non terminal <type> <identifier1>, ..., <identifiern>;
* defines the listed nonterminal identifier symbols to have semantic values
* of type <type>. */
non terminal Program program;
non terminal List<Declaration> program_head, class_body, class_body_defs, fun_body_decs;
non terminal List<Stmt> stmt_list, opt_stmt_list, block, else_body;
non terminal Stmt stmt, simple_stmt;
4 years ago
non terminal Expr expr, pexpr, cexpr;
non terminal VarDef var_def;
non terminal ClassDef class_def;
non terminal FuncDef fun_def;
non terminal Literal literal;
non terminal StringLiteral bin_op, comp_op;
non terminal TypedVar typed_var;
non terminal TypeAnnotation type, ret_type;
non terminal Identifier identifier;
non terminal List<TypedVar> typed_vars;
non terminal GlobalDecl global_decl;
non terminal NonLocalDecl nonlocal_decl;
non terminal List<Expr> opt_target, expr_list;
non terminal Expr target;
non terminal MemberExpr member_expr;
non terminal IndexExpr index_expr;
4 years ago
/* Precedences (lowest to highest) for resolving what would otherwise be
* ambiguities in the form of shift/reduce conflicts.. */
precedence left OR;
precedence left AND;
precedence left NOT;
precedence nonassoc EQUAL, NEQ, LT, GT, LEQ, GEQ, IS;
precedence left PLUS, MINUS;
precedence left MUL, DIV, MOD;
precedence left DOT, COMMA, LBR, RBR;
precedence right IF, ELSE;
4 years ago
/* The start symbol. */
start with program;
/***** GRAMMAR RULES *****/
/* Rules are defined in the order given by the language reference */
/* program */
4 years ago
program ::= program_head:d opt_stmt_list:s
{: RESULT = new Program(d.isEmpty() ? getLeft(s) : getLeft(d),
sxright, d, s, errors);
:}
;
program_head ::= program_head:d var_def:vd {: RESULT = combine(d, vd); :}
| program_head:d class_def:cd {: RESULT = combine(d, cd); :}
| program_head:d fun_def:fd {: RESULT = combine(d, fd); :}
| program_head:d error:e {: RESULT = d; :}
| {: RESULT = empty(); :}
;
opt_stmt_list ::= {: RESULT = empty(); :}
| stmt_list:s {: RESULT = s; :}
;
/* class_def */
class_def ::= CLASS:c identifier:id LPAR identifier:parentId RPAR COLON NEWLINE INDENT class_body:cb DEDENT {: RESULT = new ClassDef(cxleft, getRight(cb), id, parentId, cb); :};
/* class_body */
class_body ::= PASS NEWLINE {: RESULT = empty(); :}
| class_body_defs:defs {: RESULT = defs; :}
;
class_body_defs ::= class_body_defs:defs var_def:vd {: RESULT = combine(defs, vd); :}
| class_body_defs:defs fun_def:fd {: RESULT = combine(defs, fd); :}
| class_body_defs:defs error {: RESULT = defs; :}
| var_def:vd {: RESULT = single(vd); :}
| fun_def:fd {: RESULT = single(fd); :}
;
/* fun_def */
fun_def ::= DEF:def identifier:id LPAR typed_vars:params RPAR ret_type:rt COLON:col NEWLINE INDENT fun_body_decs:fbd stmt_list:sl DEDENT
4 years ago
{: TypeAnnotation _rt = rt;if((rt instanceof ClassType) && ((ClassType)rt).className == "<None>") _rt = new ClassType( colxright, colxright, "<None>");RESULT = new FuncDef(defxleft, getRight(sl), id, params, _rt, fbd, sl); :}
;
ret_type ::= ARROW type:t {: RESULT= t; :}
4 years ago
| {: RESULT= new ClassType(null, null,"<None>"); :}
;
typed_vars ::= typed_var:tv {: RESULT= single(tv); :}
| typed_vars:tvs COMMA typed_var:tv {: RESULT= combine(tvs, tv); :}
| typed_vars:tvs COMMA error {: RESULT= tvs; :}
| {: RESULT= empty(); :}
;
/* fun_body */
fun_body_decs ::= fun_body_decs:fbd global_decl:gd {: RESULT= combine(fbd, gd); :}
| fun_body_decs:fbd nonlocal_decl:nd {: RESULT= combine(fbd, nd); :}
| fun_body_decs:fbd var_def:vd {: RESULT= combine(fbd, vd); :}
| fun_body_decs:fbd fun_def:fd {: RESULT= combine(fbd, fd); :}
| fun_body_decs:fbd error {: RESULT= fbd; :}
| {: RESULT= empty(); :}
4 years ago
;
/* typed_var */
typed_var ::= identifier:id COLON type:t {: RESULT = new TypedVar(idxleft, txright, id, t); :};
/* type */
type ::= identifier:id {: RESULT = new ClassType(idxleft, idxright, id.name); :}
| STRING:str {: RESULT = new ClassType(strxleft, strxright, str); :}
| LBR:lbr type:t RBR:rbr {: RESULT = new ListType(lbrxleft, rbrxright, t); :}
;
/* global_decl */
global_decl ::= GLOBAL:g identifier:id NEWLINE {: RESULT = new GlobalDecl(gxleft, idxright, id); :};
/* nonlocal_decl */
nonlocal_decl ::= NONLOCAL:n identifier:id NEWLINE {: RESULT = new NonLocalDecl(nxleft, idxright, id); :};
/* var_def */
var_def ::= typed_var:t ASSIGN literal:l NEWLINE {: RESULT = new VarDef(txleft, lxright, t, l); :};
/* stmt */
stmt ::= simple_stmt:s NEWLINE {: RESULT = s; :}
| IF:i expr:cond COLON block:b else_body:elb {: RESULT = new IfStmt(ixleft, getRight(elb), cond, b, elb); :}
| WHILE:wh expr:cond COLON block:b {: RESULT = new WhileStmt(whxleft, getRight(b), cond, b); :}
| FOR:f identifier:id IN expr:e COLON block:b {: RESULT = new ForStmt(fxleft, getRight(b), id, e, b); :}
;
else_body ::= ELSE:el COLON block:b {: RESULT = b; :}
| ELIF:el expr:cond COLON block:b else_body:elb {: RESULT = single(new IfStmt(elxleft, getRight(elb), cond, b, elb)); :}
| {: RESULT = empty(); :}
;
4 years ago
/* simple_stmt */
simple_stmt ::= PASS:p {: RESULT = null; :}
| expr:e {: RESULT = new ExprStmt(exleft, exright, e); :}
| RETURN:r expr:e {: RESULT = new ReturnStmt(rxleft, exright, e); :}
| RETURN {: RESULT = null; :}
| opt_target:ot expr:e {: RESULT = new AssignStmt(getLeft(ot), exright, ot, e); :}
;
opt_target ::= opt_target:ot target:t ASSIGN {: RESULT = combine(ot, t); :}
| target:t ASSIGN {: RESULT = single(t); :}
;
/* block */
block ::= NEWLINE INDENT stmt_list:sl DEDENT {: RESULT = sl; :};
/* literal */
literal ::= NONE:n {: RESULT = new NoneLiteral(nxleft, nxright); :}
| BOOL:b {: RESULT = new BooleanLiteral(bxleft, bxright, b); :}
| NUMBER:n {: RESULT = new IntegerLiteral(nxleft, nxright, n); :}
| STRING:s {: RESULT = new StringLiteral(sxleft, sxright, s); :}
;
/* expr */
4 years ago
expr ::= cexpr:ce {: RESULT = ce; :}
| NOT:n expr:exp {: RESULT = new UnaryExpr(nxleft, expxright, n, exp); :}
| expr:e1 AND:a expr:e2 {: RESULT = new BinaryExpr(e1xleft, e2xright, e1, a, e2); :}
| expr:e1 OR:o expr:e2 {: RESULT = new BinaryExpr(e1xleft, e2xright, e1, o, e2); :}
| expr:e1 IF expr:e2 ELSE expr:e3 {: RESULT = new IfExpr(e1xleft, e3xright, e2, e1, e3); :}
;
4 years ago
/* cexpr */
4 years ago
cexpr ::= pexpr:pe {: RESULT = pe; :}
| pexpr:p1 comp_op:co cexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, co.value, p2); :}
;
4 years ago
/* pexpr */
pexpr ::= identifier:id {: RESULT = id; :}
| literal:l {: RESULT = l; :}
| LBR:lbr expr_list:l RBR:rbr {: RESULT = new ListExpr(lbrxleft, rbrxright, l); :}
4 years ago
| LPAR:lpar expr:e RPAR:rpar {: RESULT = e; :}
| member_expr:m {: RESULT = m; :}
| index_expr:i {: RESULT = i; :}
| member_expr:m LPAR expr_list:l RPAR:rpar {: RESULT = new MethodCallExpr(mxleft, rparxright, m, l); :}
| identifier:id LPAR expr_list:l RPAR:rpar {: RESULT = new CallExpr(idxleft, rparxright, id, l); :}
4 years ago
| pexpr:p1 PLUS:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :}
| pexpr:p1 MINUS:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :}
| pexpr:p1 MUL:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :}
| pexpr:p1 DIV:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :}
| pexpr:p1 MOD:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :}
| MINUS:m pexpr:p {: RESULT = new UnaryExpr(mxleft, pxright, m, p); :}
;
expr_list ::= expr:e {: RESULT = single(e); :}
| expr_list:el COMMA expr:e {: RESULT = combine(el, e); :}
| {: RESULT = new ArrayList<Expr>(); :}
4 years ago
;
4 years ago
/* bin_op */ //We may still be able to use bin_op, so I left it here.
bin_op ::= PLUS:a {: RESULT = new StringLiteral(axleft, axright, "+"); :}
| MINUS:a {: RESULT = new StringLiteral(axleft, axright, "-"); :}
| MUL:a {: RESULT = new StringLiteral(axleft, axright, "*"); :}
4 years ago
| DIV:a {: RESULT = new StringLiteral(axleft, axright, "//"); :} //Section 2.6.3 in chocopy language reference
| MOD:a {: RESULT = new StringLiteral(axleft, axright, "%"); :}
;
4 years ago
4 years ago
/* comp_op */ //this might also need some change in order not to break left associativity
comp_op ::= EQUAL:a {: RESULT = new StringLiteral(axleft, axright, "=="); :}
| NEQ:a {: RESULT = new StringLiteral(axleft, axright, "!="); :}
| LEQ:a {: RESULT = new StringLiteral(axleft, axright, "<="); :}
| GEQ:a {: RESULT = new StringLiteral(axleft, axright, ">="); :}
| LT:a {: RESULT = new StringLiteral(axleft, axright, "<"); :}
| GT:a {: RESULT = new StringLiteral(axleft, axright, ">"); :}
| IS:a {: RESULT = new StringLiteral(axleft, axright, "is"); :}
;
4 years ago
/* member_expr */
member_expr ::= pexpr:p DOT identifier:id {: RESULT = new MemberExpr(pxleft, idxright, p, id); :}
;
4 years ago
4 years ago
/* index_expr */
index_expr ::= pexpr:p LBR expr:e RBR:rbr {: RESULT = new IndexExpr(pxleft, rbrxright, p, e); :}
4 years ago
;
4 years ago
/* target */
target ::= identifier:id {: RESULT = id; :}
| member_expr:m {: RESULT = m; :}
| index_expr:i {: RESULT = i; :}
;
/* Extras - rules below have not been given in language reference, we have them to ease implementation */
identifier ::= ID:idStr {: RESULT = new Identifier(idStrxleft, idStrxright, idStr); :};
stmt_list ::= stmt:s {: RESULT = single(s); :}
| stmt_list:l stmt:s {: RESULT = combine(l, s); :}
| stmt_list:l error {: RESULT = l; :}
/* If there is a syntax error in the source, this says to discard
* symbols from the parsing stack and perform reductions until
* there is a stmt_list on top of the stack, and then to discard
* input symbols until it is possible to shift again, reporting
* a syntax error. */
;