From 609903851d29089cf143810f28bb5e95e0c9fa22 Mon Sep 17 00:00:00 2001 From: bill Date: Mon, 22 Feb 2021 16:42:06 +0800 Subject: [PATCH] Bug fixes: Catching Indentation Errors. A typo on regex of Identifiers Potential fixes on allowing function body with only definations. (not applied) TODO: Allowing a program to have interleaving definations and statements? --- src/main/cup/chocopy/pa1/ChocoPy.cup | 31 +++++++++++++++++++----- src/main/jflex/chocopy/pa1/ChocoPy.jflex | 28 ++++++++++++++++++--- 2 files changed, 50 insertions(+), 9 deletions(-) diff --git a/src/main/cup/chocopy/pa1/ChocoPy.cup b/src/main/cup/chocopy/pa1/ChocoPy.cup index 15340a3..20db76b 100644 --- a/src/main/cup/chocopy/pa1/ChocoPy.cup +++ b/src/main/cup/chocopy/pa1/ChocoPy.cup @@ -1,4 +1,5 @@ import java.util.ArrayList; +import java.util.Iterator; import java.util.List; import java_cup.runtime.*; @@ -113,12 +114,26 @@ action code {: } return list; } - + List combine(List list, List item) { + if (item != null) { + Iterator it = item.iterator(); + while(it.hasNext()) + list.add(it.next()); + } + return list; + } /** Return a mutable empty list. */ List empty() { return new ArrayList(); } - + class FuncBody { + public List fbd; + public List sl; + public FuncBody(List fbd, List sl){ + this.fbd = fbd; + this.sl = sl; + } + } /** Return the leftmost non-whitespace location in NODES, or null if NODES * is empty. Assumes that the nodes of NODES are ordered in increasing * order of location, from left to right. */ @@ -228,7 +243,7 @@ terminal String IS; /* Returned by the lexer for erroneous tokens. Since it does not appear in * the grammar, it indicates a syntax error. */ -terminal UNRECOGNIZED; +terminal String UNRECOGNIZED; /* Nonterminal symbols (defined in production rules below). * As for terminal symbols, @@ -236,7 +251,7 @@ terminal UNRECOGNIZED; * defines the listed nonterminal identifier symbols to have semantic values * of type . */ non terminal Program program; -non terminal List program_head, class_body, class_body_defs, fun_body_decs; +non terminal List defs, program_head, opt_program_head, class_body, class_body_defs, fun_body_decs; non terminal List stmt_list, opt_stmt_list, block, else_body; non terminal Stmt stmt, simple_stmt; non terminal Expr expr, pexpr, cexpr; @@ -255,7 +270,7 @@ non terminal List opt_target, expr_list; non terminal Expr target; non terminal MemberExpr member_expr; non terminal IndexExpr index_expr; - +non terminal FuncBody fun_body; @@ -330,7 +345,11 @@ typed_vars ::= typed_var:tv {: RESULT= single(tv ; -/* fun_body */ +/* fun_body */ +fun_body ::= fun_body_decs:fbd stmt_list:sl {: RESULT = new FuncBody(fbd, sl);:} + | fun_body_decs:fbd {: RESULT = new FuncBody(fbd, new ArrayList());:} + ; + fun_body_decs ::= fun_body_decs:fbd global_decl:gd {: RESULT= combine(fbd, gd); :} | fun_body_decs:fbd nonlocal_decl:nd {: RESULT= combine(fbd, nd); :} | fun_body_decs:fbd var_def:vd {: RESULT= combine(fbd, vd); :} diff --git a/src/main/jflex/chocopy/pa1/ChocoPy.jflex b/src/main/jflex/chocopy/pa1/ChocoPy.jflex index 55446aa..de6ec6c 100644 --- a/src/main/jflex/chocopy/pa1/ChocoPy.jflex +++ b/src/main/jflex/chocopy/pa1/ChocoPy.jflex @@ -1,6 +1,7 @@ package chocopy.pa1; import java_cup.runtime.*; import java.util.ArrayList; +import java.util.Iterator; %% /*** Do not change the flags below unless you know what you are doing. ***/ @@ -37,6 +38,7 @@ import java.util.ArrayList; private int str_l = 0, str_c = 0; //Start location of a string. /*A stack that keeps track of the spaces in each Indentation Level*/ private ArrayList stack = new ArrayList(20); + private boolean indentErrorUnchecked = true; /** Return a terminal symbol of syntactic category TYPE and no * semantic value at the current source location. */ private Symbol symbol(int type) { @@ -63,6 +65,15 @@ import java.util.ArrayList; if(stack.isEmpty()) return 0; return stack.get(stack.size() - 1); } + private boolean find(int indent){ + if(indent == 0) return true; + Iterator it = stack.iterator(); + while(it.hasNext()){ + if(it.next() == indent) + return true; + } + return false; + } %} /* Macros (regexes used in rules below) */ @@ -72,7 +83,7 @@ LineBreak = \r|\n|\r\n IntegerLiteral = 0|[1-9][0-9]* // Accroding to the manual, 00+ is illeagal StringLiteral = ([^\"\\]|(\\\")|(\\t)|(\\r)|(\\n)|(\\\\))+ // \n, \r, \t, \\, \" and Anything except \ and " -Identifiers = (_|[a-z]|[A-Z])(_|[a-z]|[A-Z][0-9])* +Identifiers = (_|[a-z]|[A-Z])(_|[a-z]|[A-Z]|[0-9])* Comments = #[^\r\n]* %% //YYINITIAL state is where we're dealing with indentations. @@ -135,7 +146,18 @@ if True: AFTER state. */ pop(); - return symbol(ChocoPyTokens.DEDENT, currIndent); + if(top() < currIndent) + { + currIndent = top(); + return symbolFactory.newSymbol("", ChocoPyTokens.UNRECOGNIZED, + new ComplexSymbolFactory.Location(yyline + 1, yycolumn - 1), + new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()), + currIndent); + } + return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[ChocoPyTokens.DEDENT], ChocoPyTokens.DEDENT, + new ComplexSymbolFactory.Location(yyline + 1, yycolumn - 1), + new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()), + currIndent); } /*Otherwise, we will start dealing with the rest of the line after indentation in AFTER state. */ @@ -159,7 +181,7 @@ if True: { /* Delimiters. */ - {LineBreak} { yybegin(YYINITIAL); currIndent = 0;return symbol(ChocoPyTokens.NEWLINE);} + {LineBreak} { yybegin(YYINITIAL); currIndent = 0;indentErrorUnchecked = true; return symbol(ChocoPyTokens.NEWLINE);} ":" { return symbol(ChocoPyTokens.COLON); } "," { return symbol(ChocoPyTokens.COMMA); }