Merge pull request #2 from nyu-compiler-construction/bill/merge-flex-cup

Bill/merge flex cup
5 years ago · b81176b759
parent 42628a34f7 8869a01c09
commit b81176b759
9 changed files with 685 additions and 42 deletions
--- a/.gitignore
+++ b/.gitignore
@ -147,3 +147,6 @@ tramp
 Session.vim
 .netrwhist
 *~
+
+# JFlex
+src/main/jflex/chocopy/pa1/ChocoPyLexer.java
--- a/WORKLOG.md
+++ b/WORKLOG.md
@ -0,0 +1,21 @@
+Compiler Construction PA1 Worklog
+<br>
+<br>Team:
+<br>Apoorva Ranade(ar6496)
+<br>Sanjar Ahmadov(sa5640)
+<br>Yinqi Sun(ys3540)
+<br>
+<br>Acknowledgments: Provide attribution to any collaborations, external resources, or out-side help.
+<br>
+<br>Indentation: Describe your strategy for handling INDENT and DEDENT tokens. Point to the relevant source files and line numbers.
+<br>A stack is maintained by the lexer to keep track of indentations. A count is accumulated for the number of whitespace characters before the first token. If the count changes from the previous line count, a stack operation is performed. If count increases, another value is added to the stack. If count decreases, the topmost value is popped from the stack.
+<br>
+<br>Challenges: Describe any challenges (besides indentation) you encountered and the way you solved them. Mention the approaches that did not work, if any.
+<br> Shift-reduce errors while parsing the grammar. One approach to fix is to change the grammar. We chose to fix this issue by adding a precedence as in the case of expr by adding right precedence for if and else.
+<br> Handling errors was another challenge. This required debugging and small changes to program flow.
+<br> Understanding the giving code was a small challenge and took some time before we could start coding.
+<br>
+<br>Improvements: Describe any improvements you introduced that were not strictly necessary to pass the tests, such as implementing additional functionality, adding new tests, or enabling static analysis.
+<br> Added more tests to rigorously check program flow and indentation.
+<br> Function body must have atleast oone statement which is not a part of a nested function.
+<br> Support for multi-line strings.
--- a/build.sh
+++ b/build.sh
@ -0,0 +1,3 @@
+#!/bin/bash
+
+mvn clean package -e
--- a/full_test.sh
+++ b/full_test.sh
@ -0,0 +1,3 @@
+#!/bin/bash
+./build.sh
+./test.sh
--- a/src/main/cup/chocopy/pa1/ChocoPy.cup
+++ b/src/main/cup/chocopy/pa1/ChocoPy.cup
@ -1,4 +1,5 @@
 import java.util.ArrayList;
+import java.util.Iterator;
 import java.util.List;

 import java_cup.runtime.*;
@ -113,12 +114,26 @@ action code {:
        }
        return list;
    }
-
+    <T> List<T> combine(List<T> list, List<T> item) {
+        if (item != null) {
+            Iterator<T> it = item.iterator();
+            while(it.hasNext())
+                list.add(it.next());
+        }
+        return list;
+    }
    /** Return a mutable empty list. */
    <T> List<T> empty() {
        return new ArrayList<T>();
    }
-
+    class FuncBody {
+        public List<Declaration> fbd;
+        public List<Stmt> sl;
+        public FuncBody(List<Declaration> fbd, List<Stmt> sl){
+            this.fbd = fbd;
+            this.sl = sl;
+        }
+    }
    /** Return the leftmost non-whitespace location in NODES, or null if NODES
     *  is empty.  Assumes that the nodes of NODES are ordered in increasing
     *  order of location, from left to right. */
@ -131,6 +146,18 @@ action code {:
                                                 first.getLocation()[1]);
    }
    
+    /** Return the rightmost non-whitespace location in NODES, or null if NODES
+     *  is empty.  Assumes that the nodes of NODES are ordered in increasing
+     *  order of location, from left to right. */
+    ComplexSymbolFactory.Location getRight(List<? extends Node> nodes) {
+        if (nodes.isEmpty()) {
+            return null;
+        }
+        Node last = nodes.get(nodes.size()-1);
+        return new ComplexSymbolFactory.Location(last.getLocation()[2],
+                                                 last.getLocation()[3]);
+    }
+
 :}

 /* Terminal symbols (tokens returned by the lexer).  The declaration
@ -142,12 +169,81 @@ action code {:
 * semantic value of type <type> for these symbols that may be referenced
 * in actions ( {: ... :} ).
 */
+terminal INDENT;
+terminal DEDENT;
+terminal String ID;
+terminal String STRING;
+
+
+
+/* Terminal Delimiters */
 terminal NEWLINE;
-terminal String PLUS; 
+terminal String COLON;
+terminal String COMMA;
+
+/* Terminal Literals */
 terminal Integer NUMBER;
+terminal Boolean BOOL;
+terminal String NONE;
+
+/* Terminal Keywords */
+terminal String IF;
+terminal String ELSE;
+terminal String ELIF;
+terminal String WHILE;
+terminal String CLASS;
+terminal String DEF;
+terminal String LAMBDA;
+terminal String AS;
+terminal String FOR;
+terminal String GLOBAL;
+terminal String IN;
+terminal String NONLOCAL;
+terminal String PASS;
+terminal String RETURN;
+terminal String ASSERT;
+terminal String AWAIT;
+terminal String BREAK;
+terminal String CONTINUE;
+terminal String DEL;
+terminal String EXCEPT;
+terminal String FINALLY;
+terminal String FROM;
+terminal String IMPORT;
+terminal String RAISE;
+terminal String TRY;
+terminal String WITH;
+terminal String YIELD;
+
+
+/* Terminal Operators */
+terminal String PLUS;
+terminal String MINUS;
+terminal String MUL;
+terminal String DIV;
+terminal String MOD;
+terminal String GT;
+terminal String LT;
+terminal String EQUAL;
+terminal String NEQ;
+terminal String GEQ;
+terminal String LEQ;
+terminal String ASSIGN;
+terminal String AND;
+terminal String OR;
+terminal String NOT;
+terminal String DOT;
+terminal String LPAR;
+terminal String RPAR;
+terminal String LBR;
+terminal String RBR;
+terminal String ARROW;
+terminal String IS;
+
+
 /* Returned by the lexer for erroneous tokens.  Since it does not appear in
 * the grammar, it indicates a syntax error. */
-terminal UNRECOGNIZED;   
+terminal String UNRECOGNIZED;   

 /* Nonterminal symbols (defined in production rules below).
 * As for terminal symbols, 
@ -155,14 +251,39 @@ terminal UNRECOGNIZED;
 * defines the listed nonterminal identifier symbols to have semantic values
 * of type <type>. */
 non terminal Program           program;
-non terminal List<Declaration> program_head;
-non terminal List<Stmt>        stmt_list, opt_stmt_list;
-non terminal Stmt              stmt, expr_stmt;
-non terminal Expr              expr, binary_expr;
+non terminal List<Declaration> defs, program_head, opt_program_head, class_body, class_body_defs, fun_body_decs;
+non terminal List<Stmt>        stmt_list, opt_stmt_list, block, else_body;
+non terminal Stmt              stmt, simple_stmt;
+non terminal Expr              expr, pexpr, cexpr;
+non terminal VarDef            var_def;
+non terminal ClassDef          class_def;
+non terminal FuncDef           fun_def;
+non terminal Literal           literal;
+non terminal StringLiteral     bin_op, comp_op;
+non terminal TypedVar          typed_var;
+non terminal TypeAnnotation    type, ret_type;
+non terminal Identifier        identifier;
+non terminal List<TypedVar>    typed_vars;
+non terminal GlobalDecl        global_decl;
+non terminal NonLocalDecl      nonlocal_decl;
+non terminal List<Expr>        opt_target, expr_list;
+non terminal Expr              target;
+non terminal MemberExpr        member_expr;
+non terminal IndexExpr         index_expr;
+non terminal FuncBody          fun_body;
+
+

 /* Precedences (lowest to highest) for resolving what would otherwise be
 * ambiguities in the form of shift/reduce conflicts.. */
-precedence left PLUS;
+precedence left OR;
+precedence left AND;
+precedence left NOT;
+precedence nonassoc EQUAL, NEQ, LT, GT, LEQ, GEQ, IS;
+precedence left PLUS, MINUS;
+precedence left MUL, DIV, MOD;
+precedence left DOT, COMMA, LBR, RBR;
+precedence right IF, ELSE;

 /* The start symbol. */
 start with program;
@ -170,45 +291,221 @@ start with program;

 /*****  GRAMMAR RULES *****/

+/* Rules are defined in the order given by the language reference */
+
+/* program */
 program ::= program_head:d opt_stmt_list:s
        {: RESULT = new Program(d.isEmpty() ? getLeft(s) : getLeft(d),
                                sxright, d, s, errors);
        :}
        ;

-/* Initial list of declarations. */
-program_head ::= /* not implemented; currently matches empty string */
-                                     {: RESULT = empty(); :}
+program_head ::= program_head:d var_def:vd              {: RESULT = combine(d, vd); :}
+                | program_head:d class_def:cd           {: RESULT = combine(d, cd); :}
+                | program_head:d fun_def:fd             {: RESULT = combine(d, fd); :}
+                | program_head:d error:e                {: RESULT = d; :}
+                |                                       {: RESULT = empty(); :}
                ;
                
 opt_stmt_list ::=                    {: RESULT = empty(); :}
                | stmt_list:s        {: RESULT = s; :}
                ;

-stmt_list ::= stmt:s                 {: RESULT = single(s); :}
-            | stmt_list:l stmt:s     {: RESULT = combine(l, s); :}
-            | stmt_list:l error      {: RESULT = l; :}
-            /* If there is a syntax error in the source, this says to discard
-             * symbols from the parsing stack and perform reductions until
-             * there is a stmt_list on top of the stack, and then to discard
-             * input symbols until it is possible to shift again, reporting
-             * a syntax error. */
+
+/* class_def */
+class_def ::= CLASS:c identifier:id LPAR identifier:parentId RPAR COLON NEWLINE INDENT class_body:cb DEDENT   {: RESULT = new ClassDef(cxleft, getRight(cb), id, parentId, cb); :}; 
+
+
+/* class_body */
+class_body ::= PASS NEWLINE                             {: RESULT = empty(); :}
+              | class_body_defs:defs                    {: RESULT = defs; :}
+              ;
+              
+class_body_defs ::= class_body_defs:defs var_def:vd       {: RESULT = combine(defs, vd); :}
+                  | class_body_defs:defs fun_def:fd       {: RESULT = combine(defs, fd); :}
+                  | class_body_defs:defs error            {: RESULT = defs; :}
+                  | var_def:vd                            {: RESULT = single(vd); :}
+                  | fun_def:fd                            {: RESULT = single(fd); :}
+                  ;
+
+
+/* fun_def */
+fun_def ::= DEF:def identifier:id LPAR typed_vars:params RPAR ret_type:rt COLON:col NEWLINE INDENT fun_body_decs:fbd stmt_list:sl DEDENT       
+            {: TypeAnnotation _rt = rt;if((rt instanceof ClassType) && ((ClassType)rt).className == "<None>") _rt = new ClassType( colxright, colxright, "<None>");RESULT = new FuncDef(defxleft, getRight(sl), id, params, _rt, fbd, sl); :}
            ;

-stmt ::= expr_stmt:s NEWLINE {: RESULT = s; :}
+ret_type ::= ARROW type:t     {: RESULT= t; :}
+          |                   {: RESULT= new ClassType(null, null,"<None>"); :}
+          ;
+
+typed_vars ::= typed_var:tv                                 {: RESULT= single(tv); :}
+              | typed_vars:tvs COMMA typed_var:tv           {: RESULT= combine(tvs, tv); :}
+              | typed_vars:tvs COMMA error                  {: RESULT= tvs; :}
+              |                                             {: RESULT= empty(); :}
              ; 
                  
-expr_stmt ::= expr:e    {: RESULT = new ExprStmt(exleft, exright, e); :}
+             
+/* fun_body */   
+fun_body ::=  fun_body_decs:fbd stmt_list:sl                {: RESULT = new FuncBody(fbd, sl);:}
+            | fun_body_decs:fbd                             {: RESULT = new FuncBody(fbd, new ArrayList<Stmt>());:}
            ;

-expr ::= binary_expr:e  {: RESULT = e; :}
-       | NUMBER:n       {: RESULT = new IntegerLiteral(nxleft, nxright, n); :}
+fun_body_decs ::= fun_body_decs:fbd  global_decl:gd         {: RESULT= combine(fbd, gd); :}
+                | fun_body_decs:fbd  nonlocal_decl:nd       {: RESULT= combine(fbd, nd); :} 
+                | fun_body_decs:fbd  var_def:vd             {: RESULT= combine(fbd, vd); :}
+                | fun_body_decs:fbd  fun_def:fd             {: RESULT= combine(fbd, fd); :} 
+                | fun_body_decs:fbd  error                  {: RESULT= fbd; :} 
+                |                                           {: RESULT= empty(); :}
+                ;
+            
+
+/* typed_var */
+typed_var ::= identifier:id COLON type:t                 {: RESULT = new TypedVar(idxleft, txright, id, t); :};
+
+
+/* type */
+type ::= identifier:id                                   {: RESULT = new ClassType(idxleft, idxright, id.name); :}
+      | STRING:str                                       {: RESULT = new ClassType(strxleft, strxright, str); :}
+      | LBR:lbr type:t RBR:rbr                           {: RESULT = new ListType(lbrxleft, rbrxright, t); :}
+      ;
+
+
+/* global_decl */
+global_decl ::= GLOBAL:g identifier:id NEWLINE             {: RESULT = new GlobalDecl(gxleft, idxright, id); :};
+
+
+/* nonlocal_decl */
+nonlocal_decl ::= NONLOCAL:n identifier:id NEWLINE        {: RESULT = new NonLocalDecl(nxleft, idxright, id); :};
+
+
+/* var_def */
+var_def ::= typed_var:t ASSIGN literal:l NEWLINE          {: RESULT = new VarDef(txleft, lxright, t, l); :};
+
+
+/* stmt */
+stmt ::= simple_stmt:s NEWLINE                                            {: RESULT = s; :}
+        | IF:i expr:cond COLON block:b else_body:elb                      {: RESULT = new IfStmt(ixleft, getRight(elb), cond, b, elb); :}
+        | WHILE:wh expr:cond COLON block:b                                {: RESULT = new WhileStmt(whxleft, getRight(b), cond, b); :}
+        | FOR:f identifier:id IN expr:e COLON block:b                     {: RESULT = new ForStmt(fxleft, getRight(b), id,  e, b); :}
+        ;
+
+
+else_body ::= ELSE:el COLON block:b                                       {: RESULT = b; :}
+            |  ELIF:el expr:cond COLON block:b else_body:elb              {: RESULT = single(new IfStmt(elxleft, getRight(elb), cond, b, elb)); :}
+            |                                                             {: RESULT = empty(); :}
+            ;
+    
+    
+/* simple_stmt */
+simple_stmt ::= PASS:p                                        {: RESULT = null; :}
+              | expr:e                                        {: RESULT = new ExprStmt(exleft, exright, e); :}
+              | RETURN:r expr:e                               {: RESULT = new ReturnStmt(rxleft, exright, e); :}
+              | RETURN                                        {: RESULT = null; :}
+              | opt_target:ot expr:e                          {: RESULT = new AssignStmt(getLeft(ot), exright, ot, e); :}
+            ;
+            
+            
+opt_target ::= opt_target:ot target:t ASSIGN                  {: RESULT = combine(ot, t); :}
+              | target:t ASSIGN                               {: RESULT = single(t); :}
+              ;
+        
+        
+/* block */      
+block ::= NEWLINE INDENT stmt_list:sl DEDENT                 {: RESULT = sl; :};
+
+
+/* literal */
+literal ::= NONE:n                                        {: RESULT = new NoneLiteral(nxleft, nxright); :}
+          | BOOL:b                                        {: RESULT = new BooleanLiteral(bxleft, bxright, b); :}
+          | NUMBER:n                                      {: RESULT = new IntegerLiteral(nxleft, nxright, n); :}
+          | STRING:s                                      {: RESULT = new StringLiteral(sxleft, sxright, s); :}
+          ;
+  
+  
+/* expr */ 
+expr ::= cexpr:ce                                         {: RESULT = ce; :}
+        | NOT:n expr:exp                                  {: RESULT = new UnaryExpr(nxleft, expxright, n, exp); :}
+        | expr:e1 AND:a expr:e2                           {: RESULT = new BinaryExpr(e1xleft, e2xright, e1, a, e2); :}
+        | expr:e1 OR:o expr:e2                            {: RESULT = new BinaryExpr(e1xleft, e2xright, e1, o, e2);  :}
+        | expr:e1 IF expr:e2 ELSE expr:e3                 {: RESULT = new IfExpr(e1xleft, e3xright, e2, e1, e3); :}
       ;


-/* A binary expression, illustrating how to find the left and right
- * source position of a phrase. */
-binary_expr ::= expr:e1 PLUS:op expr:e2
-                        {: RESULT = new BinaryExpr(e1xleft, e2xright,
-                                                   e1, op, e2); :}
+/* cexpr */
+cexpr ::= pexpr:pe                                {: RESULT = pe; :}
+          | pexpr:p1 comp_op:co cexpr:p2          {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, co.value, p2); :}
+        ;
+
+
+/* pexpr */
+pexpr ::= identifier:id                                   {: RESULT = id; :}
+        | literal:l                                       {: RESULT = l; :}
+        | LBR:lbr expr_list:l RBR:rbr                     {: RESULT = new ListExpr(lbrxleft, rbrxright, l); :}
+        | LPAR:lpar expr:e RPAR:rpar                      {: RESULT = e; :}
+        | member_expr:m                                   {: RESULT = m; :}
+        | index_expr:i                                    {: RESULT = i; :}
+        | member_expr:m LPAR expr_list:l RPAR:rpar        {: RESULT = new MethodCallExpr(mxleft, rparxright, m, l); :}
+        | identifier:id LPAR expr_list:l RPAR:rpar        {: RESULT = new CallExpr(idxleft, rparxright, id, l); :}
+        | pexpr:p1 PLUS:bo pexpr:p2                       {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :}
+        | pexpr:p1 MINUS:bo pexpr:p2                      {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :}
+        | pexpr:p1 MUL:bo pexpr:p2                        {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :}
+        | pexpr:p1 DIV:bo pexpr:p2                        {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :}
+        | pexpr:p1 MOD:bo pexpr:p2                        {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :}
+        | MINUS:m pexpr:p                                 {: RESULT = new UnaryExpr(mxleft, pxright, m, p); :}
+        ;
+        
+expr_list ::= expr:e                                      {: RESULT = single(e); :}
+            | expr_list:el COMMA expr:e                   {: RESULT = combine(el, e); :}
+            |                                             {: RESULT = new ArrayList<Expr>(); :}
+            ;
+
+/* bin_op */ //We may still be able to use bin_op, so I left it here.
+bin_op ::= PLUS:a                                           {: RESULT = new StringLiteral(axleft, axright, "+"); :}
+          | MINUS:a                                         {: RESULT = new StringLiteral(axleft, axright, "-"); :}
+          | MUL:a                                           {: RESULT = new StringLiteral(axleft, axright, "*"); :}
+          | DIV:a                                           {: RESULT = new StringLiteral(axleft, axright, "//"); :}  //Section 2.6.3 in chocopy language reference
+          | MOD:a                                           {: RESULT = new StringLiteral(axleft, axright, "%"); :} 
+          ;
+
+
+/* comp_op */ //this might also need some change in order not to break left associativity
+comp_op ::= EQUAL:a                                         {: RESULT = new StringLiteral(axleft, axright, "=="); :} 
+          | NEQ:a                                           {: RESULT = new StringLiteral(axleft, axright, "!="); :} 
+          | LEQ:a                                           {: RESULT = new StringLiteral(axleft, axright, "<="); :} 
+          | GEQ:a                                           {: RESULT = new StringLiteral(axleft, axright, ">="); :} 
+          | LT:a                                            {: RESULT = new StringLiteral(axleft, axright, "<"); :} 
+          | GT:a                                            {: RESULT = new StringLiteral(axleft, axright, ">"); :} 
+          | IS:a                                            {: RESULT = new StringLiteral(axleft, axright, "is"); :}
+          ;
+
+
+/* member_expr */
+member_expr ::= pexpr:p DOT identifier:id                   {: RESULT = new MemberExpr(pxleft, idxright, p, id); :}
              ;
+
+
+/* index_expr */
+index_expr ::= pexpr:p LBR expr:e RBR:rbr                   {: RESULT = new IndexExpr(pxleft, rbrxright, p, e); :}
+              ;
+    
+    
+/* target */
+target ::= identifier:id                               {: RESULT = id; :}
+          | member_expr:m                              {: RESULT = m; :} 
+          | index_expr:i                               {: RESULT = i; :}
+          ;
+              
+              
+/* Extras - rules below have not been given in language reference, we have them to ease implementation */
+identifier ::= ID:idStr                                 {: RESULT = new Identifier(idStrxleft, idStrxright, idStr); :};
+
+
+stmt_list ::= stmt:s                 {: RESULT = single(s); :}
+            | stmt_list:l stmt:s     {: RESULT = combine(l, s); :}
+            | stmt_list:l error      {: RESULT = l; :}
+            /* If there is a syntax error in the source, this says to discard
+             * symbols from the parsing stack and perform reductions until
+             * there is a stmt_list on top of the stack, and then to discard
+             * input symbols until it is possible to shift again, reporting
+             * a syntax error. */
+            ;
--- a/src/main/jflex/chocopy/pa1/ChocoPy.jflex
+++ b/src/main/jflex/chocopy/pa1/ChocoPy.jflex
@ -1,5 +1,7 @@
 package chocopy.pa1;
 import java_cup.runtime.*;
+import java.util.ArrayList;
+import java.util.Iterator;

 %%

@ -8,7 +10,7 @@ import java_cup.runtime.*;
 %unicode
 %line
 %column
-
+%states AFTER, STR
 %class ChocoPyLexer
 %public

@ -32,7 +34,12 @@ import java_cup.runtime.*;

    /** Producer of token-related values for the parser. */
    final ComplexSymbolFactory symbolFactory = new ComplexSymbolFactory();
-
+    private int currIndent = 0; //Current Indentation Level
+    private String currString = "";
+    private int str_l = 0, str_c = 0; //Start location of a string.
+    /*A stack that keeps track of the spaces in each Indentation Level*/
+    private ArrayList<Integer> stack = new ArrayList<Integer>(20); 
+    private boolean indentErrorUnchecked = true;
    /** Return a terminal symbol of syntactic category TYPE and no
     *  semantic value at the current source location. */
    private Symbol symbol(int type) {
@ -48,6 +55,26 @@ import java_cup.runtime.*;
            value);
    }

+    private void push(int indent){
+        stack.add(indent);
+    }
+    private int pop(){
+        if(stack.isEmpty()) return 0;
+        return stack.remove(stack.size() - 1);
+    }
+    private int top(){
+        if(stack.isEmpty()) return 0;
+        return stack.get(stack.size() - 1);
+    }
+    private boolean find(int indent){
+      if(indent == 0) return true;
+      Iterator<Integer> it = stack.iterator();
+      while(it.hasNext()){
+         if(it.next() == indent)
+            return true;
+      }
+      return false;
+    }
 %}

 /* Macros (regexes used in rules below) */
@ -55,28 +82,192 @@ import java_cup.runtime.*;
 WhiteSpace = [ \t]
 LineBreak  = \r|\n|\r\n

-IntegerLiteral = 0 | [1-9][0-9]*

+IntegerLiteral = 0|[1-9][0-9]* // Accroding to the manual, 00+ is illeagal
+StringLiteral = ([^\"\\]|(\\\")|(\\t)|(\\r)|(\\n)|(\\\\))+ // \n, \r, \t, \\, \" and Anything except \ and " 
+Identifiers = (_|[a-z]|[A-Z])(_|[a-z]|[A-Z]|[0-9])* 
+Comments = #[^\r\n]*
 %%
+//YYINITIAL state is where we're dealing with indentations.
+//We will set the state to YYINITIAL when starting a 
+//new line unless this line is within a string, e.g.:
+/*
+"this is \
+a string across \
+multiple lines\
+"
+*/
+<YYINITIAL>{
+  {WhiteSpace}                
+  { 
+      /*Add indentation */
+      if(yytext() == "\t")
+        currIndent += 8; //'\t' = 8 spaces
+      else 
+        currIndent ++;
+  }
+/*
+# This python code will test if '\t' is 8 spaces
+# It will run and print '1\n2'
+# Please tell me if your Python reports an error
+# Or you find documentations that says otherwise
+
+if True:
+    print(1) # \t
+        print(2) # 8 spaces
+*/
  
+  {LineBreak} 
+  {
+      /*
+      If this is a blank line, start over on the next line.
+      An empty line should just be ignored, therefore we don't 
+      pass a NEWLINE to Cup.
+      */
+      currIndent = 0;
+  }
+  {Comments}           { /* ignored */ } //Ignore blank lines

-<YYINITIAL> {
+  /*If it's not a blank line (Current character isn't a 
+    Whitespace/linebreak/comment), deal with indentation here and 
+    start accepting whatever is on this line in `AFTER' state*/
+  [^ \t\r\n#] 
+  {
+      //rewind the current character.
+      yypushback(1);
+      if(top() > currIndent)
+      {   
+          /*
+          If the indentation of the line is less than number of 
+          indents current level should have,
+          keep dedenting until it reaches the level with the same 
+          number of indents.
+          It's like a loop, because we're not changing the state 
+          and we rewinded the current character. So it will keep 
+          going until top()<= currIndent and it will switch to 
+          AFTER state.
+          */ 
+          pop();
+          if(top() < currIndent)
+          {
+            currIndent = top();
+            return symbolFactory.newSymbol("<bad indentation>", ChocoPyTokens.UNRECOGNIZED,
+              new ComplexSymbolFactory.Location(yyline + 1, yycolumn - 1),
+              new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()),
+              currIndent);
+          }
+          return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[ChocoPyTokens.DEDENT], ChocoPyTokens.DEDENT,
+            new ComplexSymbolFactory.Location(yyline + 1, yycolumn - 1),
+            new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()),
+            currIndent);
+      }
+      /*Otherwise, we will start dealing with the rest 
+        of the line after indentation in AFTER state. */
+      yybegin(AFTER);
+      if(top()< currIndent)
+      {   
+          /*
+          If current indentation is more than the number of indents 
+          current level should have, start a new level which will have 
+          `currIndent' indents.
+          */
+
+          push(currIndent);
+          return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[ChocoPyTokens.INDENT], ChocoPyTokens.INDENT,
+            new ComplexSymbolFactory.Location(yyline + 1, yycolumn - 1),
+            new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()),
+            currIndent);
+      }
+  }
+}
+<AFTER> {

  /* Delimiters. */
-  {LineBreak}                 { return symbol(ChocoPyTokens.NEWLINE); }
+  {LineBreak}                 {  yybegin(YYINITIAL); currIndent = 0;indentErrorUnchecked = true; return symbol(ChocoPyTokens.NEWLINE);}
+  ":"                         { return symbol(ChocoPyTokens.COLON); }
+  ","                         { return symbol(ChocoPyTokens.COMMA); }

  /* Literals. */
  {IntegerLiteral}            { return symbol(ChocoPyTokens.NUMBER,
                                                 Integer.parseInt(yytext())); }

+  "\""                           {yybegin(STR); str_l = yyline + 1; str_c = yycolumn + 1; currString = "";} //Start taking a string when see a "
+  "False"                        { return symbol(ChocoPyTokens.BOOL, false); }
+  "True"                         { return symbol(ChocoPyTokens.BOOL, true); }
+  "None"                         { return symbol(ChocoPyTokens.NONE); }
+
+  /*Keywords*/
+  "if"                          {return symbol(ChocoPyTokens.IF);}
+  "else"                          {return symbol(ChocoPyTokens.ELSE);}
+  "elif"                          {return symbol(ChocoPyTokens.ELIF);}
+  "while"                          {return symbol(ChocoPyTokens.WHILE);}
+  "class"                          {return symbol(ChocoPyTokens.CLASS);}
+  "def"                          {return symbol(ChocoPyTokens.DEF);}
+  "lambda"                          {return symbol(ChocoPyTokens.LAMBDA);}
+  "as"                           { return symbol(ChocoPyTokens.AS); }
+  "for"                          { return symbol(ChocoPyTokens.FOR); }
+  "global"                       { return symbol(ChocoPyTokens.GLOBAL); }
+  "in"                           { return symbol(ChocoPyTokens.IN); }
+  "nonlocal"                     { return symbol(ChocoPyTokens.NONLOCAL); }
+  "pass"                         { return symbol(ChocoPyTokens.PASS); }
+  "return"                       { return symbol(ChocoPyTokens.RETURN); }
+  "assert"                       { return symbol(ChocoPyTokens.ASSERT); }
+  "await"                        { return symbol(ChocoPyTokens.AWAIT); }
+  "break"                        { return symbol(ChocoPyTokens.BREAK); }
+  "continue"                     { return symbol(ChocoPyTokens.CONTINUE); }
+  "del"                          { return symbol(ChocoPyTokens.DEL); }
+  "except"                       { return symbol(ChocoPyTokens.EXCEPT); }
+  "finally"                      { return symbol(ChocoPyTokens.FINALLY); }
+  "from"                         { return symbol(ChocoPyTokens.FROM); }
+  "import"                       { return symbol(ChocoPyTokens.IMPORT); }
+  "raise"                        { return symbol(ChocoPyTokens.RAISE); }
+  "try"                          { return symbol(ChocoPyTokens.TRY); }
+  "with"                         { return symbol(ChocoPyTokens.WITH); }
+  "yield"                        { return symbol(ChocoPyTokens.YIELD); }
+
+
  /* Operators. */
-  "+"                         { return symbol(ChocoPyTokens.PLUS, yytext()); }
+  "+"                         { return symbol(ChocoPyTokens.PLUS); }
+  "-"                         { return symbol(ChocoPyTokens.MINUS); }
+  "*"                         { return symbol(ChocoPyTokens.MUL); }
+  "//"                         { return symbol(ChocoPyTokens.DIV); }  
+  "/"                         { return symbol(ChocoPyTokens.DIV); }  //Accroding to manual, chocopy don't have fp division, '/', '//' should be integr division
+  "%"                         { return symbol(ChocoPyTokens.MOD); }  
+  ">"                         { return symbol(ChocoPyTokens.GT); }
+  "<"                         { return symbol(ChocoPyTokens.LT); }
+  "=="                         { return symbol(ChocoPyTokens.EQUAL); }
+  "!="                         { return symbol(ChocoPyTokens.NEQ); }
+  ">="                         { return symbol(ChocoPyTokens.GEQ); }
+  "<="                         { return symbol(ChocoPyTokens.LEQ); }
+  "="                         { return symbol(ChocoPyTokens.ASSIGN); }
+  "and"                         { return symbol(ChocoPyTokens.AND); }
+  "or"                         { return symbol(ChocoPyTokens.OR); }
+  "not"                         { return symbol(ChocoPyTokens.NOT); }
+  "."                         { return symbol(ChocoPyTokens.DOT); }
+  "("                         { return symbol(ChocoPyTokens.LPAR); }
+  ")"                         { return symbol(ChocoPyTokens.RPAR); }
+  "["                         { return symbol(ChocoPyTokens.LBR); }
+  "]"                         { return symbol(ChocoPyTokens.RBR); }
+  "->"                           { return symbol(ChocoPyTokens.ARROW); }
+  "is"                           { return symbol(ChocoPyTokens.IS); }
+  
 
+  /*Identifiers*/
+  {Identifiers}               {return symbol(ChocoPyTokens.ID, yytext());}
  /* Whitespace. */
  {WhiteSpace}                { /* ignore */ }
+  /* Comment. */
+  {Comments}                  { /* ignore */ }
 }
-
-<<EOF>>                       { return symbol(ChocoPyTokens.EOF); }
+<STR>{
+    {StringLiteral}           {currString += yytext();}
+    \\$                       { /*'\' at the end of line, do nothing.*/ }
+    "\""                      {yybegin(AFTER); return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[ChocoPyTokens.STRING], ChocoPyTokens.STRING,
+                              new ComplexSymbolFactory.Location(str_l, str_c),
+                              new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()),
+                              currString);} // accepted a ", return to AFTER state
+}
+<<EOF>>                       { if(!stack.isEmpty()){ return symbol(ChocoPyTokens.DEDENT, pop());} return symbol(ChocoPyTokens.EOF);}

 /* Error fallback. */
 [^]                           { return symbol(ChocoPyTokens.UNRECOGNIZED); }
--- a/src/test/data/pa1/student_contributed/bad.py
+++ b/src/test/data/pa1/student_contributed/bad.py
@ -1 +1,35 @@
 1 2 3
+
+def fun5():
+    c = 6
+    def fun6():
+        print("Hello")
+  c = 4 + 5
+
+if True:
+    if True:
+        print("Hello")
+        if True:
+            print("Maybe")
+    else:
+        print("World")
+else:
+    print("Again")
+else:
+    print("And Again")
+
+class Thor(object):
+    y:int = 0
+    print("Right place?")
+
+class Stones(object):
+    y:int = 0
+    def fun(x:int):
+        print("Right place?")
+        def bar():
+            return 2+3
+    print("Wrong Place")
+
+def fun1():
+    def fun2():
+        print("Hello")
--- a/src/test/data/pa1/student_contributed/good.py
+++ b/src/test/data/pa1/student_contributed/good.py
@ -1 +1,80 @@
-1 + 2 + 3
+class Foo(object):
+    x:int = 0
+
+    def __init__(self:"Foo", x:int):
+        self.x = x
+    
+    def bar(y:int):
+        print("Hello World!",self.x+y)
+        y = 10
+
+def get_stones(name:str)->str:
+    def map_name(nm:str)->str:
+        return stones[color.index(nm)]
+    color=["Red","Blue"]
+    stones=["Mind","Soul"]
+    return map_name(name)
+
+def funa():
+    def funb():
+        print("Hello")
+    funb()
+
+def fund():
+    def fune():
+        print("Hello")
+    c = 4 + 5
+
+def funf():
+    def fung():
+        print("Hello")
+    c = 6
+    c = 4 + 5
+
+
+if True:
+    if True:
+        if True:
+            print("Hello")
+print("World")
+
+if True:
+    if True:
+        if True:
+            print("Hello")
+        print("World")
+
+if True:
+    if True:
+        if True:
+            print("Hello")
+    print("World")
+
+if True:
+    if True:
+        if True:
+            print("Hello")
+    else:
+        print("World")
+
+if True:
+    if True:
+        if True:
+            print("Hello")
+else:
+    print("World")
+
+
+
+f = Foo(1)
+print(f.x)
+f.bar(4)
+
+a=[[[1],[2]],[[3],[4]]]
+print(a[0][0][1]*a[1][1][0])
+
+multiline_string="Hi World,
+Here I am"
+
+stone="Blue"
+print(get_stones(stone))
--- a/test.sh
+++ b/test.sh
@ -0,0 +1,12 @@
+#!/bin/bash
+
+FILENAME=$1
+
+if [ -z "$1" ] ; then
+    echo "Running all test cases. Usage for individual test cases: test.sh FILENAME (inside src/test/data/pa1/sample/ folder)"
+    java -cp "chocopy-ref.jar:target/assignment.jar" chocopy.ChocoPy --pass=s --test --dir src/test/data/pa1/sample/
+    exit 1
+fi
+
+java -cp "chocopy-ref.jar:target/assignment.jar" chocopy.ChocoPy \
+  --pass=s --test src/test/data/pa1/sample/${FILENAME}