diff --git a/.gitignore b/.gitignore index 21cc758..987fe16 100644 --- a/.gitignore +++ b/.gitignore @@ -147,3 +147,6 @@ tramp Session.vim .netrwhist *~ + +# JFlex +src/main/jflex/chocopy/pa1/ChocoPyLexer.java diff --git a/WORKLOG.md b/WORKLOG.md new file mode 100644 index 0000000..7b7b13f --- /dev/null +++ b/WORKLOG.md @@ -0,0 +1,21 @@ +Compiler Construction PA1 Worklog +
+
Team: +
Apoorva Ranade(ar6496) +
Sanjar Ahmadov(sa5640) +
Yinqi Sun(ys3540) +
+
Acknowledgments: Provide attribution to any collaborations, external resources, or out-side help. +
+
Indentation: Describe your strategy for handling INDENT and DEDENT tokens. Point to the relevant source files and line numbers. +
A stack is maintained by the lexer to keep track of indentations. A count is accumulated for the number of whitespace characters before the first token. If the count changes from the previous line count, a stack operation is performed. If count increases, another value is added to the stack. If count decreases, the topmost value is popped from the stack. +
+
Challenges: Describe any challenges (besides indentation) you encountered and the way you solved them. Mention the approaches that did not work, if any. +
Shift-reduce errors while parsing the grammar. One approach to fix is to change the grammar. We chose to fix this issue by adding a precedence as in the case of expr by adding right precedence for if and else. +
Handling errors was another challenge. This required debugging and small changes to program flow. +
Understanding the giving code was a small challenge and took some time before we could start coding. +
+
Improvements: Describe any improvements you introduced that were not strictly necessary to pass the tests, such as implementing additional functionality, adding new tests, or enabling static analysis. +
Added more tests to rigorously check program flow and indentation. +
Function body must have atleast oone statement which is not a part of a nested function. +
Support for multi-line strings. diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..f4e6621 --- /dev/null +++ b/build.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +mvn clean package -e diff --git a/full_test.sh b/full_test.sh new file mode 100755 index 0000000..19504c9 --- /dev/null +++ b/full_test.sh @@ -0,0 +1,3 @@ +#!/bin/bash +./build.sh +./test.sh diff --git a/src/main/cup/chocopy/pa1/ChocoPy.cup b/src/main/cup/chocopy/pa1/ChocoPy.cup index d4ff444..1bcb484 100644 --- a/src/main/cup/chocopy/pa1/ChocoPy.cup +++ b/src/main/cup/chocopy/pa1/ChocoPy.cup @@ -1,4 +1,5 @@ import java.util.ArrayList; +import java.util.Iterator; import java.util.List; import java_cup.runtime.*; @@ -113,12 +114,26 @@ action code {: } return list; } - + List combine(List list, List item) { + if (item != null) { + Iterator it = item.iterator(); + while(it.hasNext()) + list.add(it.next()); + } + return list; + } /** Return a mutable empty list. */ List empty() { return new ArrayList(); } - + class FuncBody { + public List fbd; + public List sl; + public FuncBody(List fbd, List sl){ + this.fbd = fbd; + this.sl = sl; + } + } /** Return the leftmost non-whitespace location in NODES, or null if NODES * is empty. Assumes that the nodes of NODES are ordered in increasing * order of location, from left to right. */ @@ -130,6 +145,18 @@ action code {: return new ComplexSymbolFactory.Location(first.getLocation()[0], first.getLocation()[1]); } + + /** Return the rightmost non-whitespace location in NODES, or null if NODES + * is empty. Assumes that the nodes of NODES are ordered in increasing + * order of location, from left to right. */ + ComplexSymbolFactory.Location getRight(List nodes) { + if (nodes.isEmpty()) { + return null; + } + Node last = nodes.get(nodes.size()-1); + return new ComplexSymbolFactory.Location(last.getLocation()[2], + last.getLocation()[3]); + } :} @@ -142,12 +169,81 @@ action code {: * semantic value of type for these symbols that may be referenced * in actions ( {: ... :} ). */ +terminal INDENT; +terminal DEDENT; +terminal String ID; +terminal String STRING; + + + +/* Terminal Delimiters */ terminal NEWLINE; -terminal String PLUS; -terminal Integer NUMBER; +terminal String COLON; +terminal String COMMA; + +/* Terminal Literals */ +terminal Integer NUMBER; +terminal Boolean BOOL; +terminal String NONE; + +/* Terminal Keywords */ +terminal String IF; +terminal String ELSE; +terminal String ELIF; +terminal String WHILE; +terminal String CLASS; +terminal String DEF; +terminal String LAMBDA; +terminal String AS; +terminal String FOR; +terminal String GLOBAL; +terminal String IN; +terminal String NONLOCAL; +terminal String PASS; +terminal String RETURN; +terminal String ASSERT; +terminal String AWAIT; +terminal String BREAK; +terminal String CONTINUE; +terminal String DEL; +terminal String EXCEPT; +terminal String FINALLY; +terminal String FROM; +terminal String IMPORT; +terminal String RAISE; +terminal String TRY; +terminal String WITH; +terminal String YIELD; + + +/* Terminal Operators */ +terminal String PLUS; +terminal String MINUS; +terminal String MUL; +terminal String DIV; +terminal String MOD; +terminal String GT; +terminal String LT; +terminal String EQUAL; +terminal String NEQ; +terminal String GEQ; +terminal String LEQ; +terminal String ASSIGN; +terminal String AND; +terminal String OR; +terminal String NOT; +terminal String DOT; +terminal String LPAR; +terminal String RPAR; +terminal String LBR; +terminal String RBR; +terminal String ARROW; +terminal String IS; + + /* Returned by the lexer for erroneous tokens. Since it does not appear in * the grammar, it indicates a syntax error. */ -terminal UNRECOGNIZED; +terminal String UNRECOGNIZED; /* Nonterminal symbols (defined in production rules below). * As for terminal symbols, @@ -155,14 +251,39 @@ terminal UNRECOGNIZED; * defines the listed nonterminal identifier symbols to have semantic values * of type . */ non terminal Program program; -non terminal List program_head; -non terminal List stmt_list, opt_stmt_list; -non terminal Stmt stmt, expr_stmt; -non terminal Expr expr, binary_expr; +non terminal List defs, program_head, opt_program_head, class_body, class_body_defs, fun_body_decs; +non terminal List stmt_list, opt_stmt_list, block, else_body; +non terminal Stmt stmt, simple_stmt; +non terminal Expr expr, pexpr, cexpr; +non terminal VarDef var_def; +non terminal ClassDef class_def; +non terminal FuncDef fun_def; +non terminal Literal literal; +non terminal StringLiteral bin_op, comp_op; +non terminal TypedVar typed_var; +non terminal TypeAnnotation type, ret_type; +non terminal Identifier identifier; +non terminal List typed_vars; +non terminal GlobalDecl global_decl; +non terminal NonLocalDecl nonlocal_decl; +non terminal List opt_target, expr_list; +non terminal Expr target; +non terminal MemberExpr member_expr; +non terminal IndexExpr index_expr; +non terminal FuncBody fun_body; + + /* Precedences (lowest to highest) for resolving what would otherwise be * ambiguities in the form of shift/reduce conflicts.. */ -precedence left PLUS; +precedence left OR; +precedence left AND; +precedence left NOT; +precedence nonassoc EQUAL, NEQ, LT, GT, LEQ, GEQ, IS; +precedence left PLUS, MINUS; +precedence left MUL, DIV, MOD; +precedence left DOT, COMMA, LBR, RBR; +precedence right IF, ELSE; /* The start symbol. */ start with program; @@ -170,45 +291,221 @@ start with program; /***** GRAMMAR RULES *****/ +/* Rules are defined in the order given by the language reference */ + +/* program */ program ::= program_head:d opt_stmt_list:s {: RESULT = new Program(d.isEmpty() ? getLeft(s) : getLeft(d), sxright, d, s, errors); :} ; -/* Initial list of declarations. */ -program_head ::= /* not implemented; currently matches empty string */ - {: RESULT = empty(); :} +program_head ::= program_head:d var_def:vd {: RESULT = combine(d, vd); :} + | program_head:d class_def:cd {: RESULT = combine(d, cd); :} + | program_head:d fun_def:fd {: RESULT = combine(d, fd); :} + | program_head:d error:e {: RESULT = d; :} + | {: RESULT = empty(); :} ; - + opt_stmt_list ::= {: RESULT = empty(); :} | stmt_list:s {: RESULT = s; :} ; -stmt_list ::= stmt:s {: RESULT = single(s); :} - | stmt_list:l stmt:s {: RESULT = combine(l, s); :} - | stmt_list:l error {: RESULT = l; :} - /* If there is a syntax error in the source, this says to discard - * symbols from the parsing stack and perform reductions until - * there is a stmt_list on top of the stack, and then to discard - * input symbols until it is possible to shift again, reporting - * a syntax error. */ - ; -stmt ::= expr_stmt:s NEWLINE {: RESULT = s; :} +/* class_def */ +class_def ::= CLASS:c identifier:id LPAR identifier:parentId RPAR COLON NEWLINE INDENT class_body:cb DEDENT {: RESULT = new ClassDef(cxleft, getRight(cb), id, parentId, cb); :}; + + +/* class_body */ +class_body ::= PASS NEWLINE {: RESULT = empty(); :} + | class_body_defs:defs {: RESULT = defs; :} ; + +class_body_defs ::= class_body_defs:defs var_def:vd {: RESULT = combine(defs, vd); :} + | class_body_defs:defs fun_def:fd {: RESULT = combine(defs, fd); :} + | class_body_defs:defs error {: RESULT = defs; :} + | var_def:vd {: RESULT = single(vd); :} + | fun_def:fd {: RESULT = single(fd); :} + ; + + +/* fun_def */ +fun_def ::= DEF:def identifier:id LPAR typed_vars:params RPAR ret_type:rt COLON:col NEWLINE INDENT fun_body_decs:fbd stmt_list:sl DEDENT + {: TypeAnnotation _rt = rt;if((rt instanceof ClassType) && ((ClassType)rt).className == "") _rt = new ClassType( colxright, colxright, "");RESULT = new FuncDef(defxleft, getRight(sl), id, params, _rt, fbd, sl); :} + ; -expr_stmt ::= expr:e {: RESULT = new ExprStmt(exleft, exright, e); :} +ret_type ::= ARROW type:t {: RESULT= t; :} + | {: RESULT= new ClassType(null, null,""); :} + ; + +typed_vars ::= typed_var:tv {: RESULT= single(tv); :} + | typed_vars:tvs COMMA typed_var:tv {: RESULT= combine(tvs, tv); :} + | typed_vars:tvs COMMA error {: RESULT= tvs; :} + | {: RESULT= empty(); :} + ; + + +/* fun_body */ +fun_body ::= fun_body_decs:fbd stmt_list:sl {: RESULT = new FuncBody(fbd, sl);:} + | fun_body_decs:fbd {: RESULT = new FuncBody(fbd, new ArrayList());:} ; -expr ::= binary_expr:e {: RESULT = e; :} - | NUMBER:n {: RESULT = new IntegerLiteral(nxleft, nxright, n); :} +fun_body_decs ::= fun_body_decs:fbd global_decl:gd {: RESULT= combine(fbd, gd); :} + | fun_body_decs:fbd nonlocal_decl:nd {: RESULT= combine(fbd, nd); :} + | fun_body_decs:fbd var_def:vd {: RESULT= combine(fbd, vd); :} + | fun_body_decs:fbd fun_def:fd {: RESULT= combine(fbd, fd); :} + | fun_body_decs:fbd error {: RESULT= fbd; :} + | {: RESULT= empty(); :} + ; + + +/* typed_var */ +typed_var ::= identifier:id COLON type:t {: RESULT = new TypedVar(idxleft, txright, id, t); :}; + + +/* type */ +type ::= identifier:id {: RESULT = new ClassType(idxleft, idxright, id.name); :} + | STRING:str {: RESULT = new ClassType(strxleft, strxright, str); :} + | LBR:lbr type:t RBR:rbr {: RESULT = new ListType(lbrxleft, rbrxright, t); :} + ; + + +/* global_decl */ +global_decl ::= GLOBAL:g identifier:id NEWLINE {: RESULT = new GlobalDecl(gxleft, idxright, id); :}; + + +/* nonlocal_decl */ +nonlocal_decl ::= NONLOCAL:n identifier:id NEWLINE {: RESULT = new NonLocalDecl(nxleft, idxright, id); :}; + + +/* var_def */ +var_def ::= typed_var:t ASSIGN literal:l NEWLINE {: RESULT = new VarDef(txleft, lxright, t, l); :}; + + +/* stmt */ +stmt ::= simple_stmt:s NEWLINE {: RESULT = s; :} + | IF:i expr:cond COLON block:b else_body:elb {: RESULT = new IfStmt(ixleft, getRight(elb), cond, b, elb); :} + | WHILE:wh expr:cond COLON block:b {: RESULT = new WhileStmt(whxleft, getRight(b), cond, b); :} + | FOR:f identifier:id IN expr:e COLON block:b {: RESULT = new ForStmt(fxleft, getRight(b), id, e, b); :} + ; + + +else_body ::= ELSE:el COLON block:b {: RESULT = b; :} + | ELIF:el expr:cond COLON block:b else_body:elb {: RESULT = single(new IfStmt(elxleft, getRight(elb), cond, b, elb)); :} + | {: RESULT = empty(); :} + ; + + +/* simple_stmt */ +simple_stmt ::= PASS:p {: RESULT = null; :} + | expr:e {: RESULT = new ExprStmt(exleft, exright, e); :} + | RETURN:r expr:e {: RESULT = new ReturnStmt(rxleft, exright, e); :} + | RETURN {: RESULT = null; :} + | opt_target:ot expr:e {: RESULT = new AssignStmt(getLeft(ot), exright, ot, e); :} + ; + + +opt_target ::= opt_target:ot target:t ASSIGN {: RESULT = combine(ot, t); :} + | target:t ASSIGN {: RESULT = single(t); :} + ; + + +/* block */ +block ::= NEWLINE INDENT stmt_list:sl DEDENT {: RESULT = sl; :}; + + +/* literal */ +literal ::= NONE:n {: RESULT = new NoneLiteral(nxleft, nxright); :} + | BOOL:b {: RESULT = new BooleanLiteral(bxleft, bxright, b); :} + | NUMBER:n {: RESULT = new IntegerLiteral(nxleft, nxright, n); :} + | STRING:s {: RESULT = new StringLiteral(sxleft, sxright, s); :} + ; + + +/* expr */ +expr ::= cexpr:ce {: RESULT = ce; :} + | NOT:n expr:exp {: RESULT = new UnaryExpr(nxleft, expxright, n, exp); :} + | expr:e1 AND:a expr:e2 {: RESULT = new BinaryExpr(e1xleft, e2xright, e1, a, e2); :} + | expr:e1 OR:o expr:e2 {: RESULT = new BinaryExpr(e1xleft, e2xright, e1, o, e2); :} + | expr:e1 IF expr:e2 ELSE expr:e3 {: RESULT = new IfExpr(e1xleft, e3xright, e2, e1, e3); :} ; -/* A binary expression, illustrating how to find the left and right - * source position of a phrase. */ -binary_expr ::= expr:e1 PLUS:op expr:e2 - {: RESULT = new BinaryExpr(e1xleft, e2xright, - e1, op, e2); :} +/* cexpr */ +cexpr ::= pexpr:pe {: RESULT = pe; :} + | pexpr:p1 comp_op:co cexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, co.value, p2); :} + ; + + +/* pexpr */ +pexpr ::= identifier:id {: RESULT = id; :} + | literal:l {: RESULT = l; :} + | LBR:lbr expr_list:l RBR:rbr {: RESULT = new ListExpr(lbrxleft, rbrxright, l); :} + | LPAR:lpar expr:e RPAR:rpar {: RESULT = e; :} + | member_expr:m {: RESULT = m; :} + | index_expr:i {: RESULT = i; :} + | member_expr:m LPAR expr_list:l RPAR:rpar {: RESULT = new MethodCallExpr(mxleft, rparxright, m, l); :} + | identifier:id LPAR expr_list:l RPAR:rpar {: RESULT = new CallExpr(idxleft, rparxright, id, l); :} + | pexpr:p1 PLUS:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :} + | pexpr:p1 MINUS:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :} + | pexpr:p1 MUL:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :} + | pexpr:p1 DIV:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :} + | pexpr:p1 MOD:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :} + | MINUS:m pexpr:p {: RESULT = new UnaryExpr(mxleft, pxright, m, p); :} + ; + +expr_list ::= expr:e {: RESULT = single(e); :} + | expr_list:el COMMA expr:e {: RESULT = combine(el, e); :} + | {: RESULT = new ArrayList(); :} + ; + +/* bin_op */ //We may still be able to use bin_op, so I left it here. +bin_op ::= PLUS:a {: RESULT = new StringLiteral(axleft, axright, "+"); :} + | MINUS:a {: RESULT = new StringLiteral(axleft, axright, "-"); :} + | MUL:a {: RESULT = new StringLiteral(axleft, axright, "*"); :} + | DIV:a {: RESULT = new StringLiteral(axleft, axright, "//"); :} //Section 2.6.3 in chocopy language reference + | MOD:a {: RESULT = new StringLiteral(axleft, axright, "%"); :} + ; + + +/* comp_op */ //this might also need some change in order not to break left associativity +comp_op ::= EQUAL:a {: RESULT = new StringLiteral(axleft, axright, "=="); :} + | NEQ:a {: RESULT = new StringLiteral(axleft, axright, "!="); :} + | LEQ:a {: RESULT = new StringLiteral(axleft, axright, "<="); :} + | GEQ:a {: RESULT = new StringLiteral(axleft, axright, ">="); :} + | LT:a {: RESULT = new StringLiteral(axleft, axright, "<"); :} + | GT:a {: RESULT = new StringLiteral(axleft, axright, ">"); :} + | IS:a {: RESULT = new StringLiteral(axleft, axright, "is"); :} + ; + + +/* member_expr */ +member_expr ::= pexpr:p DOT identifier:id {: RESULT = new MemberExpr(pxleft, idxright, p, id); :} ; + + +/* index_expr */ +index_expr ::= pexpr:p LBR expr:e RBR:rbr {: RESULT = new IndexExpr(pxleft, rbrxright, p, e); :} + ; + + +/* target */ +target ::= identifier:id {: RESULT = id; :} + | member_expr:m {: RESULT = m; :} + | index_expr:i {: RESULT = i; :} + ; + + +/* Extras - rules below have not been given in language reference, we have them to ease implementation */ +identifier ::= ID:idStr {: RESULT = new Identifier(idStrxleft, idStrxright, idStr); :}; + + +stmt_list ::= stmt:s {: RESULT = single(s); :} + | stmt_list:l stmt:s {: RESULT = combine(l, s); :} + | stmt_list:l error {: RESULT = l; :} + /* If there is a syntax error in the source, this says to discard + * symbols from the parsing stack and perform reductions until + * there is a stmt_list on top of the stack, and then to discard + * input symbols until it is possible to shift again, reporting + * a syntax error. */ + ; \ No newline at end of file diff --git a/src/main/jflex/chocopy/pa1/ChocoPy.jflex b/src/main/jflex/chocopy/pa1/ChocoPy.jflex index 9aafe7f..c297fab 100644 --- a/src/main/jflex/chocopy/pa1/ChocoPy.jflex +++ b/src/main/jflex/chocopy/pa1/ChocoPy.jflex @@ -1,5 +1,7 @@ package chocopy.pa1; import java_cup.runtime.*; +import java.util.ArrayList; +import java.util.Iterator; %% @@ -8,7 +10,7 @@ import java_cup.runtime.*; %unicode %line %column - +%states AFTER, STR %class ChocoPyLexer %public @@ -32,7 +34,12 @@ import java_cup.runtime.*; /** Producer of token-related values for the parser. */ final ComplexSymbolFactory symbolFactory = new ComplexSymbolFactory(); - + private int currIndent = 0; //Current Indentation Level + private String currString = ""; + private int str_l = 0, str_c = 0; //Start location of a string. + /*A stack that keeps track of the spaces in each Indentation Level*/ + private ArrayList stack = new ArrayList(20); + private boolean indentErrorUnchecked = true; /** Return a terminal symbol of syntactic category TYPE and no * semantic value at the current source location. */ private Symbol symbol(int type) { @@ -48,6 +55,26 @@ import java_cup.runtime.*; value); } + private void push(int indent){ + stack.add(indent); + } + private int pop(){ + if(stack.isEmpty()) return 0; + return stack.remove(stack.size() - 1); + } + private int top(){ + if(stack.isEmpty()) return 0; + return stack.get(stack.size() - 1); + } + private boolean find(int indent){ + if(indent == 0) return true; + Iterator it = stack.iterator(); + while(it.hasNext()){ + if(it.next() == indent) + return true; + } + return false; + } %} /* Macros (regexes used in rules below) */ @@ -55,28 +82,192 @@ import java_cup.runtime.*; WhiteSpace = [ \t] LineBreak = \r|\n|\r\n -IntegerLiteral = 0 | [1-9][0-9]* +IntegerLiteral = 0|[1-9][0-9]* // Accroding to the manual, 00+ is illeagal +StringLiteral = ([^\"\\]|(\\\")|(\\t)|(\\r)|(\\n)|(\\\\))+ // \n, \r, \t, \\, \" and Anything except \ and " +Identifiers = (_|[a-z]|[A-Z])(_|[a-z]|[A-Z]|[0-9])* +Comments = #[^\r\n]* %% +//YYINITIAL state is where we're dealing with indentations. +//We will set the state to YYINITIAL when starting a +//new line unless this line is within a string, e.g.: +/* +"this is \ +a string across \ +multiple lines\ +" +*/ +{ + {WhiteSpace} + { + /*Add indentation */ + if(yytext() == "\t") + currIndent += 8; //'\t' = 8 spaces + else + currIndent ++; + } +/* +# This python code will test if '\t' is 8 spaces +# It will run and print '1\n2' +# Please tell me if your Python reports an error +# Or you find documentations that says otherwise +if True: + print(1) # \t + print(2) # 8 spaces +*/ + + {LineBreak} + { + /* + If this is a blank line, start over on the next line. + An empty line should just be ignored, therefore we don't + pass a NEWLINE to Cup. + */ + currIndent = 0; + } + {Comments} { /* ignored */ } //Ignore blank lines + + /*If it's not a blank line (Current character isn't a + Whitespace/linebreak/comment), deal with indentation here and + start accepting whatever is on this line in `AFTER' state*/ + [^ \t\r\n#] + { + //rewind the current character. + yypushback(1); + if(top() > currIndent) + { + /* + If the indentation of the line is less than number of + indents current level should have, + keep dedenting until it reaches the level with the same + number of indents. + It's like a loop, because we're not changing the state + and we rewinded the current character. So it will keep + going until top()<= currIndent and it will switch to + AFTER state. + */ + pop(); + if(top() < currIndent) + { + currIndent = top(); + return symbolFactory.newSymbol("", ChocoPyTokens.UNRECOGNIZED, + new ComplexSymbolFactory.Location(yyline + 1, yycolumn - 1), + new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()), + currIndent); + } + return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[ChocoPyTokens.DEDENT], ChocoPyTokens.DEDENT, + new ComplexSymbolFactory.Location(yyline + 1, yycolumn - 1), + new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()), + currIndent); + } + /*Otherwise, we will start dealing with the rest + of the line after indentation in AFTER state. */ + yybegin(AFTER); + if(top()< currIndent) + { + /* + If current indentation is more than the number of indents + current level should have, start a new level which will have + `currIndent' indents. + */ - { + push(currIndent); + return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[ChocoPyTokens.INDENT], ChocoPyTokens.INDENT, + new ComplexSymbolFactory.Location(yyline + 1, yycolumn - 1), + new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()), + currIndent); + } + } +} + { /* Delimiters. */ - {LineBreak} { return symbol(ChocoPyTokens.NEWLINE); } + {LineBreak} { yybegin(YYINITIAL); currIndent = 0;indentErrorUnchecked = true; return symbol(ChocoPyTokens.NEWLINE);} + ":" { return symbol(ChocoPyTokens.COLON); } + "," { return symbol(ChocoPyTokens.COMMA); } /* Literals. */ {IntegerLiteral} { return symbol(ChocoPyTokens.NUMBER, Integer.parseInt(yytext())); } - /* Operators. */ - "+" { return symbol(ChocoPyTokens.PLUS, yytext()); } + "\"" {yybegin(STR); str_l = yyline + 1; str_c = yycolumn + 1; currString = "";} //Start taking a string when see a " + "False" { return symbol(ChocoPyTokens.BOOL, false); } + "True" { return symbol(ChocoPyTokens.BOOL, true); } + "None" { return symbol(ChocoPyTokens.NONE); } + + /*Keywords*/ + "if" {return symbol(ChocoPyTokens.IF);} + "else" {return symbol(ChocoPyTokens.ELSE);} + "elif" {return symbol(ChocoPyTokens.ELIF);} + "while" {return symbol(ChocoPyTokens.WHILE);} + "class" {return symbol(ChocoPyTokens.CLASS);} + "def" {return symbol(ChocoPyTokens.DEF);} + "lambda" {return symbol(ChocoPyTokens.LAMBDA);} + "as" { return symbol(ChocoPyTokens.AS); } + "for" { return symbol(ChocoPyTokens.FOR); } + "global" { return symbol(ChocoPyTokens.GLOBAL); } + "in" { return symbol(ChocoPyTokens.IN); } + "nonlocal" { return symbol(ChocoPyTokens.NONLOCAL); } + "pass" { return symbol(ChocoPyTokens.PASS); } + "return" { return symbol(ChocoPyTokens.RETURN); } + "assert" { return symbol(ChocoPyTokens.ASSERT); } + "await" { return symbol(ChocoPyTokens.AWAIT); } + "break" { return symbol(ChocoPyTokens.BREAK); } + "continue" { return symbol(ChocoPyTokens.CONTINUE); } + "del" { return symbol(ChocoPyTokens.DEL); } + "except" { return symbol(ChocoPyTokens.EXCEPT); } + "finally" { return symbol(ChocoPyTokens.FINALLY); } + "from" { return symbol(ChocoPyTokens.FROM); } + "import" { return symbol(ChocoPyTokens.IMPORT); } + "raise" { return symbol(ChocoPyTokens.RAISE); } + "try" { return symbol(ChocoPyTokens.TRY); } + "with" { return symbol(ChocoPyTokens.WITH); } + "yield" { return symbol(ChocoPyTokens.YIELD); } + + /* Operators. */ + "+" { return symbol(ChocoPyTokens.PLUS); } + "-" { return symbol(ChocoPyTokens.MINUS); } + "*" { return symbol(ChocoPyTokens.MUL); } + "//" { return symbol(ChocoPyTokens.DIV); } + "/" { return symbol(ChocoPyTokens.DIV); } //Accroding to manual, chocopy don't have fp division, '/', '//' should be integr division + "%" { return symbol(ChocoPyTokens.MOD); } + ">" { return symbol(ChocoPyTokens.GT); } + "<" { return symbol(ChocoPyTokens.LT); } + "==" { return symbol(ChocoPyTokens.EQUAL); } + "!=" { return symbol(ChocoPyTokens.NEQ); } + ">=" { return symbol(ChocoPyTokens.GEQ); } + "<=" { return symbol(ChocoPyTokens.LEQ); } + "=" { return symbol(ChocoPyTokens.ASSIGN); } + "and" { return symbol(ChocoPyTokens.AND); } + "or" { return symbol(ChocoPyTokens.OR); } + "not" { return symbol(ChocoPyTokens.NOT); } + "." { return symbol(ChocoPyTokens.DOT); } + "(" { return symbol(ChocoPyTokens.LPAR); } + ")" { return symbol(ChocoPyTokens.RPAR); } + "[" { return symbol(ChocoPyTokens.LBR); } + "]" { return symbol(ChocoPyTokens.RBR); } + "->" { return symbol(ChocoPyTokens.ARROW); } + "is" { return symbol(ChocoPyTokens.IS); } + + + /*Identifiers*/ + {Identifiers} {return symbol(ChocoPyTokens.ID, yytext());} /* Whitespace. */ {WhiteSpace} { /* ignore */ } + /* Comment. */ + {Comments} { /* ignore */ } } - -<> { return symbol(ChocoPyTokens.EOF); } +{ + {StringLiteral} {currString += yytext();} + \\$ { /*'\' at the end of line, do nothing.*/ } + "\"" {yybegin(AFTER); return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[ChocoPyTokens.STRING], ChocoPyTokens.STRING, + new ComplexSymbolFactory.Location(str_l, str_c), + new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()), + currString);} // accepted a ", return to AFTER state +} +<> { if(!stack.isEmpty()){ return symbol(ChocoPyTokens.DEDENT, pop());} return symbol(ChocoPyTokens.EOF);} /* Error fallback. */ [^] { return symbol(ChocoPyTokens.UNRECOGNIZED); } diff --git a/src/test/data/pa1/student_contributed/bad.py b/src/test/data/pa1/student_contributed/bad.py index b85905e..68e3773 100644 --- a/src/test/data/pa1/student_contributed/bad.py +++ b/src/test/data/pa1/student_contributed/bad.py @@ -1 +1,35 @@ 1 2 3 + +def fun5(): + c = 6 + def fun6(): + print("Hello") + c = 4 + 5 + +if True: + if True: + print("Hello") + if True: + print("Maybe") + else: + print("World") +else: + print("Again") +else: + print("And Again") + +class Thor(object): + y:int = 0 + print("Right place?") + +class Stones(object): + y:int = 0 + def fun(x:int): + print("Right place?") + def bar(): + return 2+3 + print("Wrong Place") + +def fun1(): + def fun2(): + print("Hello") diff --git a/src/test/data/pa1/student_contributed/good.py b/src/test/data/pa1/student_contributed/good.py index 8138b36..093076f 100644 --- a/src/test/data/pa1/student_contributed/good.py +++ b/src/test/data/pa1/student_contributed/good.py @@ -1 +1,80 @@ -1 + 2 + 3 +class Foo(object): + x:int = 0 + + def __init__(self:"Foo", x:int): + self.x = x + + def bar(y:int): + print("Hello World!",self.x+y) + y = 10 + +def get_stones(name:str)->str: + def map_name(nm:str)->str: + return stones[color.index(nm)] + color=["Red","Blue"] + stones=["Mind","Soul"] + return map_name(name) + +def funa(): + def funb(): + print("Hello") + funb() + +def fund(): + def fune(): + print("Hello") + c = 4 + 5 + +def funf(): + def fung(): + print("Hello") + c = 6 + c = 4 + 5 + + +if True: + if True: + if True: + print("Hello") +print("World") + +if True: + if True: + if True: + print("Hello") + print("World") + +if True: + if True: + if True: + print("Hello") + print("World") + +if True: + if True: + if True: + print("Hello") + else: + print("World") + +if True: + if True: + if True: + print("Hello") +else: + print("World") + + + +f = Foo(1) +print(f.x) +f.bar(4) + +a=[[[1],[2]],[[3],[4]]] +print(a[0][0][1]*a[1][1][0]) + +multiline_string="Hi World, +Here I am" + +stone="Blue" +print(get_stones(stone)) \ No newline at end of file diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..7060f44 --- /dev/null +++ b/test.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +FILENAME=$1 + +if [ -z "$1" ] ; then + echo "Running all test cases. Usage for individual test cases: test.sh FILENAME (inside src/test/data/pa1/sample/ folder)" + java -cp "chocopy-ref.jar:target/assignment.jar" chocopy.ChocoPy --pass=s --test --dir src/test/data/pa1/sample/ + exit 1 +fi + +java -cp "chocopy-ref.jar:target/assignment.jar" chocopy.ChocoPy \ + --pass=s --test src/test/data/pa1/sample/${FILENAME}