diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..f4e6621 --- /dev/null +++ b/build.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +mvn clean package -e diff --git a/full_test.sh b/full_test.sh new file mode 100755 index 0000000..19504c9 --- /dev/null +++ b/full_test.sh @@ -0,0 +1,3 @@ +#!/bin/bash +./build.sh +./test.sh diff --git a/src/main/cup/chocopy/pa1/ChocoPy.cup b/src/main/cup/chocopy/pa1/ChocoPy.cup index 0585cf7..13e324c 100644 --- a/src/main/cup/chocopy/pa1/ChocoPy.cup +++ b/src/main/cup/chocopy/pa1/ChocoPy.cup @@ -130,6 +130,18 @@ action code {: return new ComplexSymbolFactory.Location(first.getLocation()[0], first.getLocation()[1]); } + + /** Return the rightmost non-whitespace location in NODES, or null if NODES + * is empty. Assumes that the nodes of NODES are ordered in increasing + * order of location, from left to right. */ + ComplexSymbolFactory.Location getRight(List nodes) { + if (nodes.isEmpty()) { + return null; + } + Node last = nodes.get(nodes.size()-1); + return new ComplexSymbolFactory.Location(last.getLocation()[2], + last.getLocation()[3]); + } :} @@ -142,14 +154,78 @@ action code {: * semantic value of type for these symbols that may be referenced * in actions ( {: ... :} ). */ +terminal INDENT; +terminal DEDENT; +terminal String ID; +terminal String STRING; + + + +/* Terminal Delimiters */ terminal NEWLINE; -terminal String PLUS; +terminal String COLON; +terminal String COMMA; + +/* Terminal Literals */ +terminal Integer NUMBER; +terminal Boolean BOOL; +terminal String NONE; + +/* Terminal Keywords */ +terminal String IF; +terminal String ELSE; +terminal String ELIF; +terminal String WHILE; +terminal String CLASS; +terminal String DEF; +terminal String LAMBDA; +terminal String AS; +terminal String FOR; +terminal String GLOBAL; +terminal String IN; +terminal String NONLOCAL; +terminal String PASS; +terminal String RETURN; +terminal String ASSERT; +terminal String AWAIT; +terminal String BREAK; +terminal String CONTINUE; +terminal String DEL; +terminal String EXCEPT; +terminal String FINALLY; +terminal String FROM; +terminal String IMPORT; +terminal String RAISE; +terminal String TRY; +terminal String WITH; +terminal String YIELD; + + +/* Terminal Operators */ +terminal String PLUS; terminal String MINUS; terminal String MUL; terminal String DIV; -terminal String NAMES; +terminal String MOD; +terminal String GT; +terminal String LT; +terminal String EQUAL; +terminal String NEQ; +terminal String GEQ; +terminal String LEQ; +terminal String ASSIGN; +terminal String AND; +terminal String OR; +terminal String NOT; +terminal String DOT; +terminal String LPAR; +terminal String RPAR; +terminal String LBR; +terminal String RBR; +terminal String ARROW; +terminal String IS; + -terminal Integer NUMBER; /* Returned by the lexer for erroneous tokens. Since it does not appear in * the grammar, it indicates a syntax error. */ terminal UNRECOGNIZED; @@ -160,14 +236,39 @@ terminal UNRECOGNIZED; * defines the listed nonterminal identifier symbols to have semantic values * of type . */ non terminal Program program; -non terminal List program_head; -non terminal List stmt_list, opt_stmt_list; -non terminal Stmt stmt, expr_stmt; -non terminal Expr expr, binary_expr; +non terminal List program_head, class_body, class_body_defs, fun_body_decs; +non terminal List stmt_list, opt_stmt_list, block, else_body; +non terminal Stmt stmt, simple_stmt; +non terminal Expr expr, pexpr, cexpr; +non terminal VarDef var_def; +non terminal ClassDef class_def; +non terminal FuncDef fun_def; +non terminal Literal literal; +non terminal StringLiteral bin_op, comp_op; +non terminal TypedVar typed_var; +non terminal TypeAnnotation type, ret_type; +non terminal Identifier identifier; +non terminal List typed_vars; +non terminal GlobalDecl global_decl; +non terminal NonLocalDecl nonlocal_decl; +non terminal List opt_target, expr_list; +non terminal Expr target; +non terminal MemberExpr member_expr; +non terminal IndexExpr index_expr; + + + /* Precedences (lowest to highest) for resolving what would otherwise be * ambiguities in the form of shift/reduce conflicts.. */ -precedence left PLUS; +precedence left OR; +precedence left AND; +precedence left NOT; +precedence nonassoc EQUAL, NEQ, LT, GT, LEQ, GEQ, IS; +precedence left PLUS, MINUS; +precedence left MUL, DIV, MOD; +precedence left DOT, COMMA, LBR, RBR; +precedence left IF, ELSE; /* The start symbol. */ start with program; @@ -175,45 +276,214 @@ start with program; /***** GRAMMAR RULES *****/ +/* Rules are defined in the order given by the language reference */ + +/* program */ program ::= program_head:d opt_stmt_list:s {: RESULT = new Program(d.isEmpty() ? getLeft(s) : getLeft(d), sxright, d, s, errors); :} ; -/* Initial list of declarations. */ -program_head ::= /* not implemented; currently matches empty string */ - {: RESULT = empty(); :} +program_head ::= program_head:d var_def:vd {: RESULT = combine(d, vd); :} + | program_head:d class_def:cd {: RESULT = combine(d, cd); :} + | program_head:d fun_def:fd {: RESULT = combine(d, fd); :} + | program_head:d error:e {: RESULT = d; :} + | {: RESULT = empty(); :} ; - + opt_stmt_list ::= {: RESULT = empty(); :} | stmt_list:s {: RESULT = s; :} ; -stmt_list ::= stmt:s {: RESULT = single(s); :} - | stmt_list:l stmt:s {: RESULT = combine(l, s); :} - | stmt_list:l error {: RESULT = l; :} - /* If there is a syntax error in the source, this says to discard - * symbols from the parsing stack and perform reductions until - * there is a stmt_list on top of the stack, and then to discard - * input symbols until it is possible to shift again, reporting - * a syntax error. */ + +/* class_def */ +class_def ::= CLASS:c identifier:id LPAR identifier:parentId RPAR COLON NEWLINE INDENT class_body:cb DEDENT {: RESULT = new ClassDef(cxleft, getRight(cb), id, parentId, cb); :}; + + +/* class_body */ +class_body ::= PASS NEWLINE {: RESULT = empty(); :} + | class_body_defs:defs {: RESULT = defs; :} + ; + +class_body_defs ::= class_body_defs:defs var_def:vd {: RESULT = combine(defs, vd); :} + | class_body_defs:defs fun_def:fd {: RESULT = combine(defs, fd); :} + | class_body_defs:defs error {: RESULT = defs; :} + | var_def:vd {: RESULT = single(vd); :} + | fun_def:fd {: RESULT = single(fd); :} + ; + + +/* fun_def */ +fun_def ::= DEF:def identifier:id LPAR typed_vars:params RPAR ret_type:rt COLON NEWLINE INDENT fun_body_decs:fbd stmt_list:sl DEDENT + {: RESULT = new FuncDef(defxleft, getRight(sl), id, params, rt, fbd, sl); :} ; -stmt ::= expr_stmt:s NEWLINE {: RESULT = s; :} +ret_type ::= ARROW type:t {: RESULT= t; :} + | {: RESULT= null; :} + ; + +typed_vars ::= typed_var:tv {: RESULT= single(tv); :} + | typed_vars:tvs COMMA typed_var:tv {: RESULT= combine(tvs, tv); :} + | typed_vars:tvs COMMA error {: RESULT= tvs; :} + | {: RESULT= empty(); :} + ; + + +/* fun_body */ +fun_body_decs ::= fun_body_decs:fbd global_decl:gd {: RESULT= combine(fbd, gd); :} + | fun_body_decs:fbd nonlocal_decl:nd {: RESULT= combine(fbd, nd); :} + | fun_body_decs:fbd var_def:vd {: RESULT= combine(fbd, vd); :} + | fun_body_decs:fbd fun_def:fd {: RESULT= combine(fbd, fd); :} + | fun_body_decs:fbd error {: RESULT= fbd; :} + | {: RESULT= empty(); :} + ; + + +/* typed_var */ +typed_var ::= identifier:id COLON type:t {: RESULT = new TypedVar(idxleft, txright, id, t); :}; + + +/* type */ +type ::= identifier:id {: RESULT = new ClassType(idxleft, idxright, id.name); :} + | STRING:str {: RESULT = new ClassType(strxleft, strxright, str); :} + | LBR:lbr type:t RBR:rbr {: RESULT = new ListType(lbrxleft, rbrxright, t); :} + ; + + +/* global_decl */ +global_decl ::= GLOBAL:g identifier:id NEWLINE {: RESULT = new GlobalDecl(gxleft, idxright, id); :}; + + +/* nonlocal_decl */ +nonlocal_decl ::= NONLOCAL:n identifier:id NEWLINE {: RESULT = new NonLocalDecl(nxleft, idxright, id); :}; + + +/* var_def */ +var_def ::= typed_var:t ASSIGN literal:l NEWLINE {: RESULT = new VarDef(txleft, lxright, t, l); :}; + + +/* stmt */ +stmt ::= simple_stmt:s NEWLINE {: RESULT = s; :} + | IF:i expr:cond COLON block:b else_body:elb {: RESULT = new IfStmt(ixleft, getRight(elb), cond, b, elb); :} + | WHILE:wh expr:cond COLON block:b {: RESULT = new WhileStmt(whxleft, getRight(b), cond, b); :} + | FOR:f identifier:id IN expr:e COLON block:b {: RESULT = new ForStmt(fxleft, getRight(b), id, e, b); :} + ; + + +else_body ::= ELSE:el COLON block:b {: RESULT = b; :} + | ELIF:el expr:cond COLON block:b else_body:elb {: RESULT = single(new IfStmt(elxleft, getRight(elb), cond, b, elb)); :} + | {: RESULT = empty(); :} + ; + + +/* simple_stmt */ +simple_stmt ::= PASS:p {: RESULT = null; :} + | expr:e {: RESULT = new ExprStmt(exleft, exright, e); :} + | RETURN:r expr:e {: RESULT = new ReturnStmt(rxleft, exright, e); :} + | RETURN {: RESULT = null; :} + | opt_target:ot expr:e {: RESULT = new AssignStmt(getLeft(ot), exright, ot, e); :} + ; + + +opt_target ::= opt_target:ot target:t ASSIGN {: RESULT = combine(ot, t); :} + | target:t ASSIGN {: RESULT = single(t); :} ; + + +/* block */ +block ::= NEWLINE INDENT stmt_list:sl DEDENT {: RESULT = sl; :}; + + +/* literal */ +literal ::= NONE:n {: RESULT = new NoneLiteral(nxleft, nxright); :} + | BOOL:b {: RESULT = new BooleanLiteral(bxleft, bxright, b); :} + | NUMBER:n {: RESULT = new IntegerLiteral(nxleft, nxright, n); :} + | STRING:s {: RESULT = new StringLiteral(sxleft, sxright, s); :} + ; + + +/* expr */ +expr ::= cexpr:ce {: RESULT = ce; :} + | NOT:n expr:exp {: RESULT = new UnaryExpr(nxleft, expxright, n, exp); :} + | expr:e1 AND:a expr:e2 {: RESULT = new BinaryExpr(e1xleft, e2xright, e1, a, e2); :} + | expr:e1 OR:o expr:e2 {: RESULT = new BinaryExpr(e1xleft, e2xright, e1, o, e2); :} + | expr:e1 IF expr:e2 ELSE expr:e3 {: RESULT = new IfExpr(e1xleft, e3xright, e2, e1, e3); :} + ; + + +/* cexpr */ +cexpr ::= pexpr:pe {: RESULT = pe; :} + | pexpr:p1 comp_op:co cexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, co.value, p2); :} + ; + -expr_stmt ::= expr:e {: RESULT = new ExprStmt(exleft, exright, e); :} +/* pexpr */ +pexpr ::= identifier:id {: RESULT = id; :} + | literal:l {: RESULT = l; :} + | LBR:lbr expr_list:l RBR:rbr {: RESULT = new ListExpr(lbrxleft, rbrxright, l); :} + | LPAR:lpar expr:e RPAR:rpar {: RESULT = e; :} + | member_expr:m {: RESULT = m; :} + | index_expr:i {: RESULT = i; :} + | member_expr:m LPAR expr_list:l RPAR:rpar {: RESULT = new MethodCallExpr(mxleft, rparxright, m, l); :} + | identifier:id LPAR expr_list:l RPAR:rpar {: RESULT = new CallExpr(idxleft, rparxright, id, l); :} + | pexpr:p1 bin_op:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo.value, p2); :} + | MINUS:m pexpr:p {: RESULT = new UnaryExpr(mxleft, pxright, m, p); :} + ; + +expr_list ::= expr:e {: RESULT = single(e); :} + | expr_list:el COMMA expr:e {: RESULT = combine(el, e); :} + | {: RESULT = null; :} ; -expr ::= binary_expr:e {: RESULT = e; :} - | NUMBER:n {: RESULT = new IntegerLiteral(nxleft, nxright, n); :} - ; +/* bin_op */ +bin_op ::= PLUS:a {: RESULT = new StringLiteral(axleft, axright, "+"); :} + | MINUS:a {: RESULT = new StringLiteral(axleft, axright, "-"); :} + | MUL:a {: RESULT = new StringLiteral(axleft, axright, "*"); :} + | DIV:a {: RESULT = new StringLiteral(axleft, axright, "/"); :} + | MOD:a {: RESULT = new StringLiteral(axleft, axright, "%"); :} + ; -/* A binary expression, illustrating how to find the left and right - * source position of a phrase. */ -binary_expr ::= expr:e1 PLUS:op expr:e2 - {: RESULT = new BinaryExpr(e1xleft, e2xright, - e1, op, e2); :} + +/* comp_op */ +comp_op ::= EQUAL:a {: RESULT = new StringLiteral(axleft, axright, "=="); :} + | NEQ:a {: RESULT = new StringLiteral(axleft, axright, "!="); :} + | LEQ:a {: RESULT = new StringLiteral(axleft, axright, "<="); :} + | GEQ:a {: RESULT = new StringLiteral(axleft, axright, ">="); :} + | LT:a {: RESULT = new StringLiteral(axleft, axright, "<"); :} + | GT:a {: RESULT = new StringLiteral(axleft, axright, ">"); :} + | IS:a {: RESULT = new StringLiteral(axleft, axright, "is"); :} + ; + + +/* member_expr */ +member_expr ::= pexpr:p DOT identifier:id {: RESULT = new MemberExpr(pxleft, idxright, p, id); :} + ; + + +/* index_expr */ +index_expr ::= pexpr:p LBR expr:e RBR:rbr {: RESULT = new IndexExpr(pxleft, rbrxright, p, e); :} ; + + +/* target */ +target ::= identifier:id {: RESULT = id; :} + | member_expr:m {: RESULT = m; :} + | index_expr:i {: RESULT = i; :} + ; + + +/* Extras - rules below have not been given in language reference, we have them to ease implementation */ +identifier ::= ID:idStr {: RESULT = new Identifier(idStrxleft, idStrxright, idStr); :}; + + +stmt_list ::= stmt:s {: RESULT = single(s); :} + | stmt_list:l stmt:s {: RESULT = combine(l, s); :} + | stmt_list:l error {: RESULT = l; :} + /* If there is a syntax error in the source, this says to discard + * symbols from the parsing stack and perform reductions until + * there is a stmt_list on top of the stack, and then to discard + * input symbols until it is possible to shift again, reporting + * a syntax error. */ + ; \ No newline at end of file diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..7060f44 --- /dev/null +++ b/test.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +FILENAME=$1 + +if [ -z "$1" ] ; then + echo "Running all test cases. Usage for individual test cases: test.sh FILENAME (inside src/test/data/pa1/sample/ folder)" + java -cp "chocopy-ref.jar:target/assignment.jar" chocopy.ChocoPy --pass=s --test --dir src/test/data/pa1/sample/ + exit 1 +fi + +java -cp "chocopy-ref.jar:target/assignment.jar" chocopy.ChocoPy \ + --pass=s --test src/test/data/pa1/sample/${FILENAME}