Catching Indentation Errors.
	A typo on regex of Identifiers
	Potential fixes on allowing function body with only definations. (not applied)
TODO:
	Allowing a program to have interleaving definations and statements?
master
bill 3 years ago
parent f6091d744f
commit 609903851d

@ -1,4 +1,5 @@
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Iterator;
import java.util.List; import java.util.List;
import java_cup.runtime.*; import java_cup.runtime.*;
@ -113,12 +114,26 @@ action code {:
} }
return list; return list;
} }
<T> List<T> combine(List<T> list, List<T> item) {
if (item != null) {
Iterator<T> it = item.iterator();
while(it.hasNext())
list.add(it.next());
}
return list;
}
/** Return a mutable empty list. */ /** Return a mutable empty list. */
<T> List<T> empty() { <T> List<T> empty() {
return new ArrayList<T>(); return new ArrayList<T>();
} }
class FuncBody {
public List<Declaration> fbd;
public List<Stmt> sl;
public FuncBody(List<Declaration> fbd, List<Stmt> sl){
this.fbd = fbd;
this.sl = sl;
}
}
/** Return the leftmost non-whitespace location in NODES, or null if NODES /** Return the leftmost non-whitespace location in NODES, or null if NODES
* is empty. Assumes that the nodes of NODES are ordered in increasing * is empty. Assumes that the nodes of NODES are ordered in increasing
* order of location, from left to right. */ * order of location, from left to right. */
@ -228,7 +243,7 @@ terminal String IS;
/* Returned by the lexer for erroneous tokens. Since it does not appear in /* Returned by the lexer for erroneous tokens. Since it does not appear in
* the grammar, it indicates a syntax error. */ * the grammar, it indicates a syntax error. */
terminal UNRECOGNIZED; terminal String UNRECOGNIZED;
/* Nonterminal symbols (defined in production rules below). /* Nonterminal symbols (defined in production rules below).
* As for terminal symbols, * As for terminal symbols,
@ -236,7 +251,7 @@ terminal UNRECOGNIZED;
* defines the listed nonterminal identifier symbols to have semantic values * defines the listed nonterminal identifier symbols to have semantic values
* of type <type>. */ * of type <type>. */
non terminal Program program; non terminal Program program;
non terminal List<Declaration> program_head, class_body, class_body_defs, fun_body_decs; non terminal List<Declaration> defs, program_head, opt_program_head, class_body, class_body_defs, fun_body_decs;
non terminal List<Stmt> stmt_list, opt_stmt_list, block, else_body; non terminal List<Stmt> stmt_list, opt_stmt_list, block, else_body;
non terminal Stmt stmt, simple_stmt; non terminal Stmt stmt, simple_stmt;
non terminal Expr expr, pexpr, cexpr; non terminal Expr expr, pexpr, cexpr;
@ -255,7 +270,7 @@ non terminal List<Expr> opt_target, expr_list;
non terminal Expr target; non terminal Expr target;
non terminal MemberExpr member_expr; non terminal MemberExpr member_expr;
non terminal IndexExpr index_expr; non terminal IndexExpr index_expr;
non terminal FuncBody fun_body;
@ -330,7 +345,11 @@ typed_vars ::= typed_var:tv {: RESULT= single(tv
; ;
/* fun_body */ /* fun_body */
fun_body ::= fun_body_decs:fbd stmt_list:sl {: RESULT = new FuncBody(fbd, sl);:}
| fun_body_decs:fbd {: RESULT = new FuncBody(fbd, new ArrayList<Stmt>());:}
;
fun_body_decs ::= fun_body_decs:fbd global_decl:gd {: RESULT= combine(fbd, gd); :} fun_body_decs ::= fun_body_decs:fbd global_decl:gd {: RESULT= combine(fbd, gd); :}
| fun_body_decs:fbd nonlocal_decl:nd {: RESULT= combine(fbd, nd); :} | fun_body_decs:fbd nonlocal_decl:nd {: RESULT= combine(fbd, nd); :}
| fun_body_decs:fbd var_def:vd {: RESULT= combine(fbd, vd); :} | fun_body_decs:fbd var_def:vd {: RESULT= combine(fbd, vd); :}

@ -1,6 +1,7 @@
package chocopy.pa1; package chocopy.pa1;
import java_cup.runtime.*; import java_cup.runtime.*;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Iterator;
%% %%
/*** Do not change the flags below unless you know what you are doing. ***/ /*** Do not change the flags below unless you know what you are doing. ***/
@ -37,6 +38,7 @@ import java.util.ArrayList;
private int str_l = 0, str_c = 0; //Start location of a string. private int str_l = 0, str_c = 0; //Start location of a string.
/*A stack that keeps track of the spaces in each Indentation Level*/ /*A stack that keeps track of the spaces in each Indentation Level*/
private ArrayList<Integer> stack = new ArrayList<Integer>(20); private ArrayList<Integer> stack = new ArrayList<Integer>(20);
private boolean indentErrorUnchecked = true;
/** Return a terminal symbol of syntactic category TYPE and no /** Return a terminal symbol of syntactic category TYPE and no
* semantic value at the current source location. */ * semantic value at the current source location. */
private Symbol symbol(int type) { private Symbol symbol(int type) {
@ -63,6 +65,15 @@ import java.util.ArrayList;
if(stack.isEmpty()) return 0; if(stack.isEmpty()) return 0;
return stack.get(stack.size() - 1); return stack.get(stack.size() - 1);
} }
private boolean find(int indent){
if(indent == 0) return true;
Iterator<Integer> it = stack.iterator();
while(it.hasNext()){
if(it.next() == indent)
return true;
}
return false;
}
%} %}
/* Macros (regexes used in rules below) */ /* Macros (regexes used in rules below) */
@ -72,7 +83,7 @@ LineBreak = \r|\n|\r\n
IntegerLiteral = 0|[1-9][0-9]* // Accroding to the manual, 00+ is illeagal IntegerLiteral = 0|[1-9][0-9]* // Accroding to the manual, 00+ is illeagal
StringLiteral = ([^\"\\]|(\\\")|(\\t)|(\\r)|(\\n)|(\\\\))+ // \n, \r, \t, \\, \" and Anything except \ and " StringLiteral = ([^\"\\]|(\\\")|(\\t)|(\\r)|(\\n)|(\\\\))+ // \n, \r, \t, \\, \" and Anything except \ and "
Identifiers = (_|[a-z]|[A-Z])(_|[a-z]|[A-Z][0-9])* Identifiers = (_|[a-z]|[A-Z])(_|[a-z]|[A-Z]|[0-9])*
Comments = #[^\r\n]* Comments = #[^\r\n]*
%% %%
//YYINITIAL state is where we're dealing with indentations. //YYINITIAL state is where we're dealing with indentations.
@ -135,7 +146,18 @@ if True:
AFTER state. AFTER state.
*/ */
pop(); pop();
return symbol(ChocoPyTokens.DEDENT, currIndent); if(top() < currIndent)
{
currIndent = top();
return symbolFactory.newSymbol("<bad indentation>", ChocoPyTokens.UNRECOGNIZED,
new ComplexSymbolFactory.Location(yyline + 1, yycolumn - 1),
new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()),
currIndent);
}
return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[ChocoPyTokens.DEDENT], ChocoPyTokens.DEDENT,
new ComplexSymbolFactory.Location(yyline + 1, yycolumn - 1),
new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()),
currIndent);
} }
/*Otherwise, we will start dealing with the rest /*Otherwise, we will start dealing with the rest
of the line after indentation in AFTER state. */ of the line after indentation in AFTER state. */
@ -159,7 +181,7 @@ if True:
<AFTER> { <AFTER> {
/* Delimiters. */ /* Delimiters. */
{LineBreak} { yybegin(YYINITIAL); currIndent = 0;return symbol(ChocoPyTokens.NEWLINE);} {LineBreak} { yybegin(YYINITIAL); currIndent = 0;indentErrorUnchecked = true; return symbol(ChocoPyTokens.NEWLINE);}
":" { return symbol(ChocoPyTokens.COLON); } ":" { return symbol(ChocoPyTokens.COLON); }
"," { return symbol(ChocoPyTokens.COMMA); } "," { return symbol(ChocoPyTokens.COMMA); }

Loading…
Cancel
Save