diff --git a/.gitignore b/.gitignore
index 21cc758..987fe16 100644
--- a/.gitignore
+++ b/.gitignore
@@ -147,3 +147,6 @@ tramp
Session.vim
.netrwhist
*~
+
+# JFlex
+src/main/jflex/chocopy/pa1/ChocoPyLexer.java
diff --git a/WORKLOG.md b/WORKLOG.md
new file mode 100644
index 0000000..7b7b13f
--- /dev/null
+++ b/WORKLOG.md
@@ -0,0 +1,21 @@
+Compiler Construction PA1 Worklog
+
+
Team:
+
Apoorva Ranade(ar6496)
+
Sanjar Ahmadov(sa5640)
+
Yinqi Sun(ys3540)
+
+
Acknowledgments: Provide attribution to any collaborations, external resources, or out-side help.
+
+
Indentation: Describe your strategy for handling INDENT and DEDENT tokens. Point to the relevant source files and line numbers.
+
A stack is maintained by the lexer to keep track of indentations. A count is accumulated for the number of whitespace characters before the first token. If the count changes from the previous line count, a stack operation is performed. If count increases, another value is added to the stack. If count decreases, the topmost value is popped from the stack.
+
+
Challenges: Describe any challenges (besides indentation) you encountered and the way you solved them. Mention the approaches that did not work, if any.
+
Shift-reduce errors while parsing the grammar. One approach to fix is to change the grammar. We chose to fix this issue by adding a precedence as in the case of expr by adding right precedence for if and else.
+
Handling errors was another challenge. This required debugging and small changes to program flow.
+
Understanding the giving code was a small challenge and took some time before we could start coding.
+
+
Improvements: Describe any improvements you introduced that were not strictly necessary to pass the tests, such as implementing additional functionality, adding new tests, or enabling static analysis.
+
Added more tests to rigorously check program flow and indentation.
+
Function body must have atleast oone statement which is not a part of a nested function.
+
Support for multi-line strings.
diff --git a/build.sh b/build.sh
new file mode 100755
index 0000000..f4e6621
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+mvn clean package -e
diff --git a/full_test.sh b/full_test.sh
new file mode 100755
index 0000000..19504c9
--- /dev/null
+++ b/full_test.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+./build.sh
+./test.sh
diff --git a/src/main/cup/chocopy/pa1/ChocoPy.cup b/src/main/cup/chocopy/pa1/ChocoPy.cup
index d4ff444..1bcb484 100644
--- a/src/main/cup/chocopy/pa1/ChocoPy.cup
+++ b/src/main/cup/chocopy/pa1/ChocoPy.cup
@@ -1,4 +1,5 @@
import java.util.ArrayList;
+import java.util.Iterator;
import java.util.List;
import java_cup.runtime.*;
@@ -113,12 +114,26 @@ action code {:
}
return list;
}
-
+ List combine(List list, List item) {
+ if (item != null) {
+ Iterator it = item.iterator();
+ while(it.hasNext())
+ list.add(it.next());
+ }
+ return list;
+ }
/** Return a mutable empty list. */
List empty() {
return new ArrayList();
}
-
+ class FuncBody {
+ public List fbd;
+ public List sl;
+ public FuncBody(List fbd, List sl){
+ this.fbd = fbd;
+ this.sl = sl;
+ }
+ }
/** Return the leftmost non-whitespace location in NODES, or null if NODES
* is empty. Assumes that the nodes of NODES are ordered in increasing
* order of location, from left to right. */
@@ -130,6 +145,18 @@ action code {:
return new ComplexSymbolFactory.Location(first.getLocation()[0],
first.getLocation()[1]);
}
+
+ /** Return the rightmost non-whitespace location in NODES, or null if NODES
+ * is empty. Assumes that the nodes of NODES are ordered in increasing
+ * order of location, from left to right. */
+ ComplexSymbolFactory.Location getRight(List extends Node> nodes) {
+ if (nodes.isEmpty()) {
+ return null;
+ }
+ Node last = nodes.get(nodes.size()-1);
+ return new ComplexSymbolFactory.Location(last.getLocation()[2],
+ last.getLocation()[3]);
+ }
:}
@@ -142,12 +169,81 @@ action code {:
* semantic value of type for these symbols that may be referenced
* in actions ( {: ... :} ).
*/
+terminal INDENT;
+terminal DEDENT;
+terminal String ID;
+terminal String STRING;
+
+
+
+/* Terminal Delimiters */
terminal NEWLINE;
-terminal String PLUS;
-terminal Integer NUMBER;
+terminal String COLON;
+terminal String COMMA;
+
+/* Terminal Literals */
+terminal Integer NUMBER;
+terminal Boolean BOOL;
+terminal String NONE;
+
+/* Terminal Keywords */
+terminal String IF;
+terminal String ELSE;
+terminal String ELIF;
+terminal String WHILE;
+terminal String CLASS;
+terminal String DEF;
+terminal String LAMBDA;
+terminal String AS;
+terminal String FOR;
+terminal String GLOBAL;
+terminal String IN;
+terminal String NONLOCAL;
+terminal String PASS;
+terminal String RETURN;
+terminal String ASSERT;
+terminal String AWAIT;
+terminal String BREAK;
+terminal String CONTINUE;
+terminal String DEL;
+terminal String EXCEPT;
+terminal String FINALLY;
+terminal String FROM;
+terminal String IMPORT;
+terminal String RAISE;
+terminal String TRY;
+terminal String WITH;
+terminal String YIELD;
+
+
+/* Terminal Operators */
+terminal String PLUS;
+terminal String MINUS;
+terminal String MUL;
+terminal String DIV;
+terminal String MOD;
+terminal String GT;
+terminal String LT;
+terminal String EQUAL;
+terminal String NEQ;
+terminal String GEQ;
+terminal String LEQ;
+terminal String ASSIGN;
+terminal String AND;
+terminal String OR;
+terminal String NOT;
+terminal String DOT;
+terminal String LPAR;
+terminal String RPAR;
+terminal String LBR;
+terminal String RBR;
+terminal String ARROW;
+terminal String IS;
+
+
/* Returned by the lexer for erroneous tokens. Since it does not appear in
* the grammar, it indicates a syntax error. */
-terminal UNRECOGNIZED;
+terminal String UNRECOGNIZED;
/* Nonterminal symbols (defined in production rules below).
* As for terminal symbols,
@@ -155,14 +251,39 @@ terminal UNRECOGNIZED;
* defines the listed nonterminal identifier symbols to have semantic values
* of type . */
non terminal Program program;
-non terminal List program_head;
-non terminal List stmt_list, opt_stmt_list;
-non terminal Stmt stmt, expr_stmt;
-non terminal Expr expr, binary_expr;
+non terminal List defs, program_head, opt_program_head, class_body, class_body_defs, fun_body_decs;
+non terminal List stmt_list, opt_stmt_list, block, else_body;
+non terminal Stmt stmt, simple_stmt;
+non terminal Expr expr, pexpr, cexpr;
+non terminal VarDef var_def;
+non terminal ClassDef class_def;
+non terminal FuncDef fun_def;
+non terminal Literal literal;
+non terminal StringLiteral bin_op, comp_op;
+non terminal TypedVar typed_var;
+non terminal TypeAnnotation type, ret_type;
+non terminal Identifier identifier;
+non terminal List typed_vars;
+non terminal GlobalDecl global_decl;
+non terminal NonLocalDecl nonlocal_decl;
+non terminal List opt_target, expr_list;
+non terminal Expr target;
+non terminal MemberExpr member_expr;
+non terminal IndexExpr index_expr;
+non terminal FuncBody fun_body;
+
+
/* Precedences (lowest to highest) for resolving what would otherwise be
* ambiguities in the form of shift/reduce conflicts.. */
-precedence left PLUS;
+precedence left OR;
+precedence left AND;
+precedence left NOT;
+precedence nonassoc EQUAL, NEQ, LT, GT, LEQ, GEQ, IS;
+precedence left PLUS, MINUS;
+precedence left MUL, DIV, MOD;
+precedence left DOT, COMMA, LBR, RBR;
+precedence right IF, ELSE;
/* The start symbol. */
start with program;
@@ -170,45 +291,221 @@ start with program;
/***** GRAMMAR RULES *****/
+/* Rules are defined in the order given by the language reference */
+
+/* program */
program ::= program_head:d opt_stmt_list:s
{: RESULT = new Program(d.isEmpty() ? getLeft(s) : getLeft(d),
sxright, d, s, errors);
:}
;
-/* Initial list of declarations. */
-program_head ::= /* not implemented; currently matches empty string */
- {: RESULT = empty(); :}
+program_head ::= program_head:d var_def:vd {: RESULT = combine(d, vd); :}
+ | program_head:d class_def:cd {: RESULT = combine(d, cd); :}
+ | program_head:d fun_def:fd {: RESULT = combine(d, fd); :}
+ | program_head:d error:e {: RESULT = d; :}
+ | {: RESULT = empty(); :}
;
-
+
opt_stmt_list ::= {: RESULT = empty(); :}
| stmt_list:s {: RESULT = s; :}
;
-stmt_list ::= stmt:s {: RESULT = single(s); :}
- | stmt_list:l stmt:s {: RESULT = combine(l, s); :}
- | stmt_list:l error {: RESULT = l; :}
- /* If there is a syntax error in the source, this says to discard
- * symbols from the parsing stack and perform reductions until
- * there is a stmt_list on top of the stack, and then to discard
- * input symbols until it is possible to shift again, reporting
- * a syntax error. */
- ;
-stmt ::= expr_stmt:s NEWLINE {: RESULT = s; :}
+/* class_def */
+class_def ::= CLASS:c identifier:id LPAR identifier:parentId RPAR COLON NEWLINE INDENT class_body:cb DEDENT {: RESULT = new ClassDef(cxleft, getRight(cb), id, parentId, cb); :};
+
+
+/* class_body */
+class_body ::= PASS NEWLINE {: RESULT = empty(); :}
+ | class_body_defs:defs {: RESULT = defs; :}
;
+
+class_body_defs ::= class_body_defs:defs var_def:vd {: RESULT = combine(defs, vd); :}
+ | class_body_defs:defs fun_def:fd {: RESULT = combine(defs, fd); :}
+ | class_body_defs:defs error {: RESULT = defs; :}
+ | var_def:vd {: RESULT = single(vd); :}
+ | fun_def:fd {: RESULT = single(fd); :}
+ ;
+
+
+/* fun_def */
+fun_def ::= DEF:def identifier:id LPAR typed_vars:params RPAR ret_type:rt COLON:col NEWLINE INDENT fun_body_decs:fbd stmt_list:sl DEDENT
+ {: TypeAnnotation _rt = rt;if((rt instanceof ClassType) && ((ClassType)rt).className == "") _rt = new ClassType( colxright, colxright, "");RESULT = new FuncDef(defxleft, getRight(sl), id, params, _rt, fbd, sl); :}
+ ;
-expr_stmt ::= expr:e {: RESULT = new ExprStmt(exleft, exright, e); :}
+ret_type ::= ARROW type:t {: RESULT= t; :}
+ | {: RESULT= new ClassType(null, null,""); :}
+ ;
+
+typed_vars ::= typed_var:tv {: RESULT= single(tv); :}
+ | typed_vars:tvs COMMA typed_var:tv {: RESULT= combine(tvs, tv); :}
+ | typed_vars:tvs COMMA error {: RESULT= tvs; :}
+ | {: RESULT= empty(); :}
+ ;
+
+
+/* fun_body */
+fun_body ::= fun_body_decs:fbd stmt_list:sl {: RESULT = new FuncBody(fbd, sl);:}
+ | fun_body_decs:fbd {: RESULT = new FuncBody(fbd, new ArrayList());:}
;
-expr ::= binary_expr:e {: RESULT = e; :}
- | NUMBER:n {: RESULT = new IntegerLiteral(nxleft, nxright, n); :}
+fun_body_decs ::= fun_body_decs:fbd global_decl:gd {: RESULT= combine(fbd, gd); :}
+ | fun_body_decs:fbd nonlocal_decl:nd {: RESULT= combine(fbd, nd); :}
+ | fun_body_decs:fbd var_def:vd {: RESULT= combine(fbd, vd); :}
+ | fun_body_decs:fbd fun_def:fd {: RESULT= combine(fbd, fd); :}
+ | fun_body_decs:fbd error {: RESULT= fbd; :}
+ | {: RESULT= empty(); :}
+ ;
+
+
+/* typed_var */
+typed_var ::= identifier:id COLON type:t {: RESULT = new TypedVar(idxleft, txright, id, t); :};
+
+
+/* type */
+type ::= identifier:id {: RESULT = new ClassType(idxleft, idxright, id.name); :}
+ | STRING:str {: RESULT = new ClassType(strxleft, strxright, str); :}
+ | LBR:lbr type:t RBR:rbr {: RESULT = new ListType(lbrxleft, rbrxright, t); :}
+ ;
+
+
+/* global_decl */
+global_decl ::= GLOBAL:g identifier:id NEWLINE {: RESULT = new GlobalDecl(gxleft, idxright, id); :};
+
+
+/* nonlocal_decl */
+nonlocal_decl ::= NONLOCAL:n identifier:id NEWLINE {: RESULT = new NonLocalDecl(nxleft, idxright, id); :};
+
+
+/* var_def */
+var_def ::= typed_var:t ASSIGN literal:l NEWLINE {: RESULT = new VarDef(txleft, lxright, t, l); :};
+
+
+/* stmt */
+stmt ::= simple_stmt:s NEWLINE {: RESULT = s; :}
+ | IF:i expr:cond COLON block:b else_body:elb {: RESULT = new IfStmt(ixleft, getRight(elb), cond, b, elb); :}
+ | WHILE:wh expr:cond COLON block:b {: RESULT = new WhileStmt(whxleft, getRight(b), cond, b); :}
+ | FOR:f identifier:id IN expr:e COLON block:b {: RESULT = new ForStmt(fxleft, getRight(b), id, e, b); :}
+ ;
+
+
+else_body ::= ELSE:el COLON block:b {: RESULT = b; :}
+ | ELIF:el expr:cond COLON block:b else_body:elb {: RESULT = single(new IfStmt(elxleft, getRight(elb), cond, b, elb)); :}
+ | {: RESULT = empty(); :}
+ ;
+
+
+/* simple_stmt */
+simple_stmt ::= PASS:p {: RESULT = null; :}
+ | expr:e {: RESULT = new ExprStmt(exleft, exright, e); :}
+ | RETURN:r expr:e {: RESULT = new ReturnStmt(rxleft, exright, e); :}
+ | RETURN {: RESULT = null; :}
+ | opt_target:ot expr:e {: RESULT = new AssignStmt(getLeft(ot), exright, ot, e); :}
+ ;
+
+
+opt_target ::= opt_target:ot target:t ASSIGN {: RESULT = combine(ot, t); :}
+ | target:t ASSIGN {: RESULT = single(t); :}
+ ;
+
+
+/* block */
+block ::= NEWLINE INDENT stmt_list:sl DEDENT {: RESULT = sl; :};
+
+
+/* literal */
+literal ::= NONE:n {: RESULT = new NoneLiteral(nxleft, nxright); :}
+ | BOOL:b {: RESULT = new BooleanLiteral(bxleft, bxright, b); :}
+ | NUMBER:n {: RESULT = new IntegerLiteral(nxleft, nxright, n); :}
+ | STRING:s {: RESULT = new StringLiteral(sxleft, sxright, s); :}
+ ;
+
+
+/* expr */
+expr ::= cexpr:ce {: RESULT = ce; :}
+ | NOT:n expr:exp {: RESULT = new UnaryExpr(nxleft, expxright, n, exp); :}
+ | expr:e1 AND:a expr:e2 {: RESULT = new BinaryExpr(e1xleft, e2xright, e1, a, e2); :}
+ | expr:e1 OR:o expr:e2 {: RESULT = new BinaryExpr(e1xleft, e2xright, e1, o, e2); :}
+ | expr:e1 IF expr:e2 ELSE expr:e3 {: RESULT = new IfExpr(e1xleft, e3xright, e2, e1, e3); :}
;
-/* A binary expression, illustrating how to find the left and right
- * source position of a phrase. */
-binary_expr ::= expr:e1 PLUS:op expr:e2
- {: RESULT = new BinaryExpr(e1xleft, e2xright,
- e1, op, e2); :}
+/* cexpr */
+cexpr ::= pexpr:pe {: RESULT = pe; :}
+ | pexpr:p1 comp_op:co cexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, co.value, p2); :}
+ ;
+
+
+/* pexpr */
+pexpr ::= identifier:id {: RESULT = id; :}
+ | literal:l {: RESULT = l; :}
+ | LBR:lbr expr_list:l RBR:rbr {: RESULT = new ListExpr(lbrxleft, rbrxright, l); :}
+ | LPAR:lpar expr:e RPAR:rpar {: RESULT = e; :}
+ | member_expr:m {: RESULT = m; :}
+ | index_expr:i {: RESULT = i; :}
+ | member_expr:m LPAR expr_list:l RPAR:rpar {: RESULT = new MethodCallExpr(mxleft, rparxright, m, l); :}
+ | identifier:id LPAR expr_list:l RPAR:rpar {: RESULT = new CallExpr(idxleft, rparxright, id, l); :}
+ | pexpr:p1 PLUS:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :}
+ | pexpr:p1 MINUS:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :}
+ | pexpr:p1 MUL:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :}
+ | pexpr:p1 DIV:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :}
+ | pexpr:p1 MOD:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :}
+ | MINUS:m pexpr:p {: RESULT = new UnaryExpr(mxleft, pxright, m, p); :}
+ ;
+
+expr_list ::= expr:e {: RESULT = single(e); :}
+ | expr_list:el COMMA expr:e {: RESULT = combine(el, e); :}
+ | {: RESULT = new ArrayList(); :}
+ ;
+
+/* bin_op */ //We may still be able to use bin_op, so I left it here.
+bin_op ::= PLUS:a {: RESULT = new StringLiteral(axleft, axright, "+"); :}
+ | MINUS:a {: RESULT = new StringLiteral(axleft, axright, "-"); :}
+ | MUL:a {: RESULT = new StringLiteral(axleft, axright, "*"); :}
+ | DIV:a {: RESULT = new StringLiteral(axleft, axright, "//"); :} //Section 2.6.3 in chocopy language reference
+ | MOD:a {: RESULT = new StringLiteral(axleft, axright, "%"); :}
+ ;
+
+
+/* comp_op */ //this might also need some change in order not to break left associativity
+comp_op ::= EQUAL:a {: RESULT = new StringLiteral(axleft, axright, "=="); :}
+ | NEQ:a {: RESULT = new StringLiteral(axleft, axright, "!="); :}
+ | LEQ:a {: RESULT = new StringLiteral(axleft, axright, "<="); :}
+ | GEQ:a {: RESULT = new StringLiteral(axleft, axright, ">="); :}
+ | LT:a {: RESULT = new StringLiteral(axleft, axright, "<"); :}
+ | GT:a {: RESULT = new StringLiteral(axleft, axright, ">"); :}
+ | IS:a {: RESULT = new StringLiteral(axleft, axright, "is"); :}
+ ;
+
+
+/* member_expr */
+member_expr ::= pexpr:p DOT identifier:id {: RESULT = new MemberExpr(pxleft, idxright, p, id); :}
;
+
+
+/* index_expr */
+index_expr ::= pexpr:p LBR expr:e RBR:rbr {: RESULT = new IndexExpr(pxleft, rbrxright, p, e); :}
+ ;
+
+
+/* target */
+target ::= identifier:id {: RESULT = id; :}
+ | member_expr:m {: RESULT = m; :}
+ | index_expr:i {: RESULT = i; :}
+ ;
+
+
+/* Extras - rules below have not been given in language reference, we have them to ease implementation */
+identifier ::= ID:idStr {: RESULT = new Identifier(idStrxleft, idStrxright, idStr); :};
+
+
+stmt_list ::= stmt:s {: RESULT = single(s); :}
+ | stmt_list:l stmt:s {: RESULT = combine(l, s); :}
+ | stmt_list:l error {: RESULT = l; :}
+ /* If there is a syntax error in the source, this says to discard
+ * symbols from the parsing stack and perform reductions until
+ * there is a stmt_list on top of the stack, and then to discard
+ * input symbols until it is possible to shift again, reporting
+ * a syntax error. */
+ ;
\ No newline at end of file
diff --git a/src/main/jflex/chocopy/pa1/ChocoPy.jflex b/src/main/jflex/chocopy/pa1/ChocoPy.jflex
index 9aafe7f..c297fab 100644
--- a/src/main/jflex/chocopy/pa1/ChocoPy.jflex
+++ b/src/main/jflex/chocopy/pa1/ChocoPy.jflex
@@ -1,5 +1,7 @@
package chocopy.pa1;
import java_cup.runtime.*;
+import java.util.ArrayList;
+import java.util.Iterator;
%%
@@ -8,7 +10,7 @@ import java_cup.runtime.*;
%unicode
%line
%column
-
+%states AFTER, STR
%class ChocoPyLexer
%public
@@ -32,7 +34,12 @@ import java_cup.runtime.*;
/** Producer of token-related values for the parser. */
final ComplexSymbolFactory symbolFactory = new ComplexSymbolFactory();
-
+ private int currIndent = 0; //Current Indentation Level
+ private String currString = "";
+ private int str_l = 0, str_c = 0; //Start location of a string.
+ /*A stack that keeps track of the spaces in each Indentation Level*/
+ private ArrayList stack = new ArrayList(20);
+ private boolean indentErrorUnchecked = true;
/** Return a terminal symbol of syntactic category TYPE and no
* semantic value at the current source location. */
private Symbol symbol(int type) {
@@ -48,6 +55,26 @@ import java_cup.runtime.*;
value);
}
+ private void push(int indent){
+ stack.add(indent);
+ }
+ private int pop(){
+ if(stack.isEmpty()) return 0;
+ return stack.remove(stack.size() - 1);
+ }
+ private int top(){
+ if(stack.isEmpty()) return 0;
+ return stack.get(stack.size() - 1);
+ }
+ private boolean find(int indent){
+ if(indent == 0) return true;
+ Iterator it = stack.iterator();
+ while(it.hasNext()){
+ if(it.next() == indent)
+ return true;
+ }
+ return false;
+ }
%}
/* Macros (regexes used in rules below) */
@@ -55,28 +82,192 @@ import java_cup.runtime.*;
WhiteSpace = [ \t]
LineBreak = \r|\n|\r\n
-IntegerLiteral = 0 | [1-9][0-9]*
+IntegerLiteral = 0|[1-9][0-9]* // Accroding to the manual, 00+ is illeagal
+StringLiteral = ([^\"\\]|(\\\")|(\\t)|(\\r)|(\\n)|(\\\\))+ // \n, \r, \t, \\, \" and Anything except \ and "
+Identifiers = (_|[a-z]|[A-Z])(_|[a-z]|[A-Z]|[0-9])*
+Comments = #[^\r\n]*
%%
+//YYINITIAL state is where we're dealing with indentations.
+//We will set the state to YYINITIAL when starting a
+//new line unless this line is within a string, e.g.:
+/*
+"this is \
+a string across \
+multiple lines\
+"
+*/
+{
+ {WhiteSpace}
+ {
+ /*Add indentation */
+ if(yytext() == "\t")
+ currIndent += 8; //'\t' = 8 spaces
+ else
+ currIndent ++;
+ }
+/*
+# This python code will test if '\t' is 8 spaces
+# It will run and print '1\n2'
+# Please tell me if your Python reports an error
+# Or you find documentations that says otherwise
+if True:
+ print(1) # \t
+ print(2) # 8 spaces
+*/
+
+ {LineBreak}
+ {
+ /*
+ If this is a blank line, start over on the next line.
+ An empty line should just be ignored, therefore we don't
+ pass a NEWLINE to Cup.
+ */
+ currIndent = 0;
+ }
+ {Comments} { /* ignored */ } //Ignore blank lines
+
+ /*If it's not a blank line (Current character isn't a
+ Whitespace/linebreak/comment), deal with indentation here and
+ start accepting whatever is on this line in `AFTER' state*/
+ [^ \t\r\n#]
+ {
+ //rewind the current character.
+ yypushback(1);
+ if(top() > currIndent)
+ {
+ /*
+ If the indentation of the line is less than number of
+ indents current level should have,
+ keep dedenting until it reaches the level with the same
+ number of indents.
+ It's like a loop, because we're not changing the state
+ and we rewinded the current character. So it will keep
+ going until top()<= currIndent and it will switch to
+ AFTER state.
+ */
+ pop();
+ if(top() < currIndent)
+ {
+ currIndent = top();
+ return symbolFactory.newSymbol("", ChocoPyTokens.UNRECOGNIZED,
+ new ComplexSymbolFactory.Location(yyline + 1, yycolumn - 1),
+ new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()),
+ currIndent);
+ }
+ return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[ChocoPyTokens.DEDENT], ChocoPyTokens.DEDENT,
+ new ComplexSymbolFactory.Location(yyline + 1, yycolumn - 1),
+ new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()),
+ currIndent);
+ }
+ /*Otherwise, we will start dealing with the rest
+ of the line after indentation in AFTER state. */
+ yybegin(AFTER);
+ if(top()< currIndent)
+ {
+ /*
+ If current indentation is more than the number of indents
+ current level should have, start a new level which will have
+ `currIndent' indents.
+ */
- {
+ push(currIndent);
+ return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[ChocoPyTokens.INDENT], ChocoPyTokens.INDENT,
+ new ComplexSymbolFactory.Location(yyline + 1, yycolumn - 1),
+ new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()),
+ currIndent);
+ }
+ }
+}
+ {
/* Delimiters. */
- {LineBreak} { return symbol(ChocoPyTokens.NEWLINE); }
+ {LineBreak} { yybegin(YYINITIAL); currIndent = 0;indentErrorUnchecked = true; return symbol(ChocoPyTokens.NEWLINE);}
+ ":" { return symbol(ChocoPyTokens.COLON); }
+ "," { return symbol(ChocoPyTokens.COMMA); }
/* Literals. */
{IntegerLiteral} { return symbol(ChocoPyTokens.NUMBER,
Integer.parseInt(yytext())); }
- /* Operators. */
- "+" { return symbol(ChocoPyTokens.PLUS, yytext()); }
+ "\"" {yybegin(STR); str_l = yyline + 1; str_c = yycolumn + 1; currString = "";} //Start taking a string when see a "
+ "False" { return symbol(ChocoPyTokens.BOOL, false); }
+ "True" { return symbol(ChocoPyTokens.BOOL, true); }
+ "None" { return symbol(ChocoPyTokens.NONE); }
+
+ /*Keywords*/
+ "if" {return symbol(ChocoPyTokens.IF);}
+ "else" {return symbol(ChocoPyTokens.ELSE);}
+ "elif" {return symbol(ChocoPyTokens.ELIF);}
+ "while" {return symbol(ChocoPyTokens.WHILE);}
+ "class" {return symbol(ChocoPyTokens.CLASS);}
+ "def" {return symbol(ChocoPyTokens.DEF);}
+ "lambda" {return symbol(ChocoPyTokens.LAMBDA);}
+ "as" { return symbol(ChocoPyTokens.AS); }
+ "for" { return symbol(ChocoPyTokens.FOR); }
+ "global" { return symbol(ChocoPyTokens.GLOBAL); }
+ "in" { return symbol(ChocoPyTokens.IN); }
+ "nonlocal" { return symbol(ChocoPyTokens.NONLOCAL); }
+ "pass" { return symbol(ChocoPyTokens.PASS); }
+ "return" { return symbol(ChocoPyTokens.RETURN); }
+ "assert" { return symbol(ChocoPyTokens.ASSERT); }
+ "await" { return symbol(ChocoPyTokens.AWAIT); }
+ "break" { return symbol(ChocoPyTokens.BREAK); }
+ "continue" { return symbol(ChocoPyTokens.CONTINUE); }
+ "del" { return symbol(ChocoPyTokens.DEL); }
+ "except" { return symbol(ChocoPyTokens.EXCEPT); }
+ "finally" { return symbol(ChocoPyTokens.FINALLY); }
+ "from" { return symbol(ChocoPyTokens.FROM); }
+ "import" { return symbol(ChocoPyTokens.IMPORT); }
+ "raise" { return symbol(ChocoPyTokens.RAISE); }
+ "try" { return symbol(ChocoPyTokens.TRY); }
+ "with" { return symbol(ChocoPyTokens.WITH); }
+ "yield" { return symbol(ChocoPyTokens.YIELD); }
+
+ /* Operators. */
+ "+" { return symbol(ChocoPyTokens.PLUS); }
+ "-" { return symbol(ChocoPyTokens.MINUS); }
+ "*" { return symbol(ChocoPyTokens.MUL); }
+ "//" { return symbol(ChocoPyTokens.DIV); }
+ "/" { return symbol(ChocoPyTokens.DIV); } //Accroding to manual, chocopy don't have fp division, '/', '//' should be integr division
+ "%" { return symbol(ChocoPyTokens.MOD); }
+ ">" { return symbol(ChocoPyTokens.GT); }
+ "<" { return symbol(ChocoPyTokens.LT); }
+ "==" { return symbol(ChocoPyTokens.EQUAL); }
+ "!=" { return symbol(ChocoPyTokens.NEQ); }
+ ">=" { return symbol(ChocoPyTokens.GEQ); }
+ "<=" { return symbol(ChocoPyTokens.LEQ); }
+ "=" { return symbol(ChocoPyTokens.ASSIGN); }
+ "and" { return symbol(ChocoPyTokens.AND); }
+ "or" { return symbol(ChocoPyTokens.OR); }
+ "not" { return symbol(ChocoPyTokens.NOT); }
+ "." { return symbol(ChocoPyTokens.DOT); }
+ "(" { return symbol(ChocoPyTokens.LPAR); }
+ ")" { return symbol(ChocoPyTokens.RPAR); }
+ "[" { return symbol(ChocoPyTokens.LBR); }
+ "]" { return symbol(ChocoPyTokens.RBR); }
+ "->" { return symbol(ChocoPyTokens.ARROW); }
+ "is" { return symbol(ChocoPyTokens.IS); }
+
+
+ /*Identifiers*/
+ {Identifiers} {return symbol(ChocoPyTokens.ID, yytext());}
/* Whitespace. */
{WhiteSpace} { /* ignore */ }
+ /* Comment. */
+ {Comments} { /* ignore */ }
}
-
-<> { return symbol(ChocoPyTokens.EOF); }
+{
+ {StringLiteral} {currString += yytext();}
+ \\$ { /*'\' at the end of line, do nothing.*/ }
+ "\"" {yybegin(AFTER); return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[ChocoPyTokens.STRING], ChocoPyTokens.STRING,
+ new ComplexSymbolFactory.Location(str_l, str_c),
+ new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()),
+ currString);} // accepted a ", return to AFTER state
+}
+<> { if(!stack.isEmpty()){ return symbol(ChocoPyTokens.DEDENT, pop());} return symbol(ChocoPyTokens.EOF);}
/* Error fallback. */
[^] { return symbol(ChocoPyTokens.UNRECOGNIZED); }
diff --git a/src/test/data/pa1/student_contributed/bad.py b/src/test/data/pa1/student_contributed/bad.py
index b85905e..68e3773 100644
--- a/src/test/data/pa1/student_contributed/bad.py
+++ b/src/test/data/pa1/student_contributed/bad.py
@@ -1 +1,35 @@
1 2 3
+
+def fun5():
+ c = 6
+ def fun6():
+ print("Hello")
+ c = 4 + 5
+
+if True:
+ if True:
+ print("Hello")
+ if True:
+ print("Maybe")
+ else:
+ print("World")
+else:
+ print("Again")
+else:
+ print("And Again")
+
+class Thor(object):
+ y:int = 0
+ print("Right place?")
+
+class Stones(object):
+ y:int = 0
+ def fun(x:int):
+ print("Right place?")
+ def bar():
+ return 2+3
+ print("Wrong Place")
+
+def fun1():
+ def fun2():
+ print("Hello")
diff --git a/src/test/data/pa1/student_contributed/good.py b/src/test/data/pa1/student_contributed/good.py
index 8138b36..093076f 100644
--- a/src/test/data/pa1/student_contributed/good.py
+++ b/src/test/data/pa1/student_contributed/good.py
@@ -1 +1,80 @@
-1 + 2 + 3
+class Foo(object):
+ x:int = 0
+
+ def __init__(self:"Foo", x:int):
+ self.x = x
+
+ def bar(y:int):
+ print("Hello World!",self.x+y)
+ y = 10
+
+def get_stones(name:str)->str:
+ def map_name(nm:str)->str:
+ return stones[color.index(nm)]
+ color=["Red","Blue"]
+ stones=["Mind","Soul"]
+ return map_name(name)
+
+def funa():
+ def funb():
+ print("Hello")
+ funb()
+
+def fund():
+ def fune():
+ print("Hello")
+ c = 4 + 5
+
+def funf():
+ def fung():
+ print("Hello")
+ c = 6
+ c = 4 + 5
+
+
+if True:
+ if True:
+ if True:
+ print("Hello")
+print("World")
+
+if True:
+ if True:
+ if True:
+ print("Hello")
+ print("World")
+
+if True:
+ if True:
+ if True:
+ print("Hello")
+ print("World")
+
+if True:
+ if True:
+ if True:
+ print("Hello")
+ else:
+ print("World")
+
+if True:
+ if True:
+ if True:
+ print("Hello")
+else:
+ print("World")
+
+
+
+f = Foo(1)
+print(f.x)
+f.bar(4)
+
+a=[[[1],[2]],[[3],[4]]]
+print(a[0][0][1]*a[1][1][0])
+
+multiline_string="Hi World,
+Here I am"
+
+stone="Blue"
+print(get_stones(stone))
\ No newline at end of file
diff --git a/test.sh b/test.sh
new file mode 100755
index 0000000..7060f44
--- /dev/null
+++ b/test.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+FILENAME=$1
+
+if [ -z "$1" ] ; then
+ echo "Running all test cases. Usage for individual test cases: test.sh FILENAME (inside src/test/data/pa1/sample/ folder)"
+ java -cp "chocopy-ref.jar:target/assignment.jar" chocopy.ChocoPy --pass=s --test --dir src/test/data/pa1/sample/
+ exit 1
+fi
+
+java -cp "chocopy-ref.jar:target/assignment.jar" chocopy.ChocoPy \
+ --pass=s --test src/test/data/pa1/sample/${FILENAME}