Catching Indentation Errors.
	A typo on regex of Identifiers
	Potential fixes on allowing function body with only definations. (not applied)
TODO:
	Allowing a program to have interleaving definations and statements?
master
bill 3 years ago
parent f6091d744f
commit 609903851d

@ -1,4 +1,5 @@
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java_cup.runtime.*;
@ -113,12 +114,26 @@ action code {:
}
return list;
}
<T> List<T> combine(List<T> list, List<T> item) {
if (item != null) {
Iterator<T> it = item.iterator();
while(it.hasNext())
list.add(it.next());
}
return list;
}
/** Return a mutable empty list. */
<T> List<T> empty() {
return new ArrayList<T>();
}
class FuncBody {
public List<Declaration> fbd;
public List<Stmt> sl;
public FuncBody(List<Declaration> fbd, List<Stmt> sl){
this.fbd = fbd;
this.sl = sl;
}
}
/** Return the leftmost non-whitespace location in NODES, or null if NODES
* is empty. Assumes that the nodes of NODES are ordered in increasing
* order of location, from left to right. */
@ -228,7 +243,7 @@ terminal String IS;
/* Returned by the lexer for erroneous tokens. Since it does not appear in
* the grammar, it indicates a syntax error. */
terminal UNRECOGNIZED;
terminal String UNRECOGNIZED;
/* Nonterminal symbols (defined in production rules below).
* As for terminal symbols,
@ -236,7 +251,7 @@ terminal UNRECOGNIZED;
* defines the listed nonterminal identifier symbols to have semantic values
* of type <type>. */
non terminal Program program;
non terminal List<Declaration> program_head, class_body, class_body_defs, fun_body_decs;
non terminal List<Declaration> defs, program_head, opt_program_head, class_body, class_body_defs, fun_body_decs;
non terminal List<Stmt> stmt_list, opt_stmt_list, block, else_body;
non terminal Stmt stmt, simple_stmt;
non terminal Expr expr, pexpr, cexpr;
@ -255,7 +270,7 @@ non terminal List<Expr> opt_target, expr_list;
non terminal Expr target;
non terminal MemberExpr member_expr;
non terminal IndexExpr index_expr;
non terminal FuncBody fun_body;
@ -330,7 +345,11 @@ typed_vars ::= typed_var:tv {: RESULT= single(tv
;
/* fun_body */
/* fun_body */
fun_body ::= fun_body_decs:fbd stmt_list:sl {: RESULT = new FuncBody(fbd, sl);:}
| fun_body_decs:fbd {: RESULT = new FuncBody(fbd, new ArrayList<Stmt>());:}
;
fun_body_decs ::= fun_body_decs:fbd global_decl:gd {: RESULT= combine(fbd, gd); :}
| fun_body_decs:fbd nonlocal_decl:nd {: RESULT= combine(fbd, nd); :}
| fun_body_decs:fbd var_def:vd {: RESULT= combine(fbd, vd); :}

@ -1,6 +1,7 @@
package chocopy.pa1;
import java_cup.runtime.*;
import java.util.ArrayList;
import java.util.Iterator;
%%
/*** Do not change the flags below unless you know what you are doing. ***/
@ -37,6 +38,7 @@ import java.util.ArrayList;
private int str_l = 0, str_c = 0; //Start location of a string.
/*A stack that keeps track of the spaces in each Indentation Level*/
private ArrayList<Integer> stack = new ArrayList<Integer>(20);
private boolean indentErrorUnchecked = true;
/** Return a terminal symbol of syntactic category TYPE and no
* semantic value at the current source location. */
private Symbol symbol(int type) {
@ -63,6 +65,15 @@ import java.util.ArrayList;
if(stack.isEmpty()) return 0;
return stack.get(stack.size() - 1);
}
private boolean find(int indent){
if(indent == 0) return true;
Iterator<Integer> it = stack.iterator();
while(it.hasNext()){
if(it.next() == indent)
return true;
}
return false;
}
%}
/* Macros (regexes used in rules below) */
@ -72,7 +83,7 @@ LineBreak = \r|\n|\r\n
IntegerLiteral = 0|[1-9][0-9]* // Accroding to the manual, 00+ is illeagal
StringLiteral = ([^\"\\]|(\\\")|(\\t)|(\\r)|(\\n)|(\\\\))+ // \n, \r, \t, \\, \" and Anything except \ and "
Identifiers = (_|[a-z]|[A-Z])(_|[a-z]|[A-Z][0-9])*
Identifiers = (_|[a-z]|[A-Z])(_|[a-z]|[A-Z]|[0-9])*
Comments = #[^\r\n]*
%%
//YYINITIAL state is where we're dealing with indentations.
@ -135,7 +146,18 @@ if True:
AFTER state.
*/
pop();
return symbol(ChocoPyTokens.DEDENT, currIndent);
if(top() < currIndent)
{
currIndent = top();
return symbolFactory.newSymbol("<bad indentation>", ChocoPyTokens.UNRECOGNIZED,
new ComplexSymbolFactory.Location(yyline + 1, yycolumn - 1),
new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()),
currIndent);
}
return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[ChocoPyTokens.DEDENT], ChocoPyTokens.DEDENT,
new ComplexSymbolFactory.Location(yyline + 1, yycolumn - 1),
new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()),
currIndent);
}
/*Otherwise, we will start dealing with the rest
of the line after indentation in AFTER state. */
@ -159,7 +181,7 @@ if True:
<AFTER> {
/* Delimiters. */
{LineBreak} { yybegin(YYINITIAL); currIndent = 0;return symbol(ChocoPyTokens.NEWLINE);}
{LineBreak} { yybegin(YYINITIAL); currIndent = 0;indentErrorUnchecked = true; return symbol(ChocoPyTokens.NEWLINE);}
":" { return symbol(ChocoPyTokens.COLON); }
"," { return symbol(ChocoPyTokens.COMMA); }

Loading…
Cancel
Save