Merge pull request #2 from nyu-compiler-construction/bill/merge-flex-cup

Bill/merge flex cup
master
sunyinqi0508 4 years ago committed by GitHub
commit b81176b759
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

3
.gitignore vendored

@ -147,3 +147,6 @@ tramp
Session.vim Session.vim
.netrwhist .netrwhist
*~ *~
# JFlex
src/main/jflex/chocopy/pa1/ChocoPyLexer.java

@ -0,0 +1,21 @@
Compiler Construction PA1 Worklog
<br>
<br>Team:
<br>Apoorva Ranade(ar6496)
<br>Sanjar Ahmadov(sa5640)
<br>Yinqi Sun(ys3540)
<br>
<br>Acknowledgments: Provide attribution to any collaborations, external resources, or out-side help.
<br>
<br>Indentation: Describe your strategy for handling INDENT and DEDENT tokens. Point to the relevant source files and line numbers.
<br>A stack is maintained by the lexer to keep track of indentations. A count is accumulated for the number of whitespace characters before the first token. If the count changes from the previous line count, a stack operation is performed. If count increases, another value is added to the stack. If count decreases, the topmost value is popped from the stack.
<br>
<br>Challenges: Describe any challenges (besides indentation) you encountered and the way you solved them. Mention the approaches that did not work, if any.
<br> Shift-reduce errors while parsing the grammar. One approach to fix is to change the grammar. We chose to fix this issue by adding a precedence as in the case of expr by adding right precedence for if and else.
<br> Handling errors was another challenge. This required debugging and small changes to program flow.
<br> Understanding the giving code was a small challenge and took some time before we could start coding.
<br>
<br>Improvements: Describe any improvements you introduced that were not strictly necessary to pass the tests, such as implementing additional functionality, adding new tests, or enabling static analysis.
<br> Added more tests to rigorously check program flow and indentation.
<br> Function body must have atleast oone statement which is not a part of a nested function.
<br> Support for multi-line strings.

@ -0,0 +1,3 @@
#!/bin/bash
mvn clean package -e

@ -0,0 +1,3 @@
#!/bin/bash
./build.sh
./test.sh

@ -1,4 +1,5 @@
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Iterator;
import java.util.List; import java.util.List;
import java_cup.runtime.*; import java_cup.runtime.*;
@ -113,12 +114,26 @@ action code {:
} }
return list; return list;
} }
<T> List<T> combine(List<T> list, List<T> item) {
if (item != null) {
Iterator<T> it = item.iterator();
while(it.hasNext())
list.add(it.next());
}
return list;
}
/** Return a mutable empty list. */ /** Return a mutable empty list. */
<T> List<T> empty() { <T> List<T> empty() {
return new ArrayList<T>(); return new ArrayList<T>();
} }
class FuncBody {
public List<Declaration> fbd;
public List<Stmt> sl;
public FuncBody(List<Declaration> fbd, List<Stmt> sl){
this.fbd = fbd;
this.sl = sl;
}
}
/** Return the leftmost non-whitespace location in NODES, or null if NODES /** Return the leftmost non-whitespace location in NODES, or null if NODES
* is empty. Assumes that the nodes of NODES are ordered in increasing * is empty. Assumes that the nodes of NODES are ordered in increasing
* order of location, from left to right. */ * order of location, from left to right. */
@ -131,6 +146,18 @@ action code {:
first.getLocation()[1]); first.getLocation()[1]);
} }
/** Return the rightmost non-whitespace location in NODES, or null if NODES
* is empty. Assumes that the nodes of NODES are ordered in increasing
* order of location, from left to right. */
ComplexSymbolFactory.Location getRight(List<? extends Node> nodes) {
if (nodes.isEmpty()) {
return null;
}
Node last = nodes.get(nodes.size()-1);
return new ComplexSymbolFactory.Location(last.getLocation()[2],
last.getLocation()[3]);
}
:} :}
/* Terminal symbols (tokens returned by the lexer). The declaration /* Terminal symbols (tokens returned by the lexer). The declaration
@ -142,12 +169,81 @@ action code {:
* semantic value of type <type> for these symbols that may be referenced * semantic value of type <type> for these symbols that may be referenced
* in actions ( {: ... :} ). * in actions ( {: ... :} ).
*/ */
terminal INDENT;
terminal DEDENT;
terminal String ID;
terminal String STRING;
/* Terminal Delimiters */
terminal NEWLINE; terminal NEWLINE;
terminal String PLUS; terminal String COLON;
terminal String COMMA;
/* Terminal Literals */
terminal Integer NUMBER; terminal Integer NUMBER;
terminal Boolean BOOL;
terminal String NONE;
/* Terminal Keywords */
terminal String IF;
terminal String ELSE;
terminal String ELIF;
terminal String WHILE;
terminal String CLASS;
terminal String DEF;
terminal String LAMBDA;
terminal String AS;
terminal String FOR;
terminal String GLOBAL;
terminal String IN;
terminal String NONLOCAL;
terminal String PASS;
terminal String RETURN;
terminal String ASSERT;
terminal String AWAIT;
terminal String BREAK;
terminal String CONTINUE;
terminal String DEL;
terminal String EXCEPT;
terminal String FINALLY;
terminal String FROM;
terminal String IMPORT;
terminal String RAISE;
terminal String TRY;
terminal String WITH;
terminal String YIELD;
/* Terminal Operators */
terminal String PLUS;
terminal String MINUS;
terminal String MUL;
terminal String DIV;
terminal String MOD;
terminal String GT;
terminal String LT;
terminal String EQUAL;
terminal String NEQ;
terminal String GEQ;
terminal String LEQ;
terminal String ASSIGN;
terminal String AND;
terminal String OR;
terminal String NOT;
terminal String DOT;
terminal String LPAR;
terminal String RPAR;
terminal String LBR;
terminal String RBR;
terminal String ARROW;
terminal String IS;
/* Returned by the lexer for erroneous tokens. Since it does not appear in /* Returned by the lexer for erroneous tokens. Since it does not appear in
* the grammar, it indicates a syntax error. */ * the grammar, it indicates a syntax error. */
terminal UNRECOGNIZED; terminal String UNRECOGNIZED;
/* Nonterminal symbols (defined in production rules below). /* Nonterminal symbols (defined in production rules below).
* As for terminal symbols, * As for terminal symbols,
@ -155,14 +251,39 @@ terminal UNRECOGNIZED;
* defines the listed nonterminal identifier symbols to have semantic values * defines the listed nonterminal identifier symbols to have semantic values
* of type <type>. */ * of type <type>. */
non terminal Program program; non terminal Program program;
non terminal List<Declaration> program_head; non terminal List<Declaration> defs, program_head, opt_program_head, class_body, class_body_defs, fun_body_decs;
non terminal List<Stmt> stmt_list, opt_stmt_list; non terminal List<Stmt> stmt_list, opt_stmt_list, block, else_body;
non terminal Stmt stmt, expr_stmt; non terminal Stmt stmt, simple_stmt;
non terminal Expr expr, binary_expr; non terminal Expr expr, pexpr, cexpr;
non terminal VarDef var_def;
non terminal ClassDef class_def;
non terminal FuncDef fun_def;
non terminal Literal literal;
non terminal StringLiteral bin_op, comp_op;
non terminal TypedVar typed_var;
non terminal TypeAnnotation type, ret_type;
non terminal Identifier identifier;
non terminal List<TypedVar> typed_vars;
non terminal GlobalDecl global_decl;
non terminal NonLocalDecl nonlocal_decl;
non terminal List<Expr> opt_target, expr_list;
non terminal Expr target;
non terminal MemberExpr member_expr;
non terminal IndexExpr index_expr;
non terminal FuncBody fun_body;
/* Precedences (lowest to highest) for resolving what would otherwise be /* Precedences (lowest to highest) for resolving what would otherwise be
* ambiguities in the form of shift/reduce conflicts.. */ * ambiguities in the form of shift/reduce conflicts.. */
precedence left PLUS; precedence left OR;
precedence left AND;
precedence left NOT;
precedence nonassoc EQUAL, NEQ, LT, GT, LEQ, GEQ, IS;
precedence left PLUS, MINUS;
precedence left MUL, DIV, MOD;
precedence left DOT, COMMA, LBR, RBR;
precedence right IF, ELSE;
/* The start symbol. */ /* The start symbol. */
start with program; start with program;
@ -170,45 +291,221 @@ start with program;
/***** GRAMMAR RULES *****/ /***** GRAMMAR RULES *****/
/* Rules are defined in the order given by the language reference */
/* program */
program ::= program_head:d opt_stmt_list:s program ::= program_head:d opt_stmt_list:s
{: RESULT = new Program(d.isEmpty() ? getLeft(s) : getLeft(d), {: RESULT = new Program(d.isEmpty() ? getLeft(s) : getLeft(d),
sxright, d, s, errors); sxright, d, s, errors);
:} :}
; ;
/* Initial list of declarations. */ program_head ::= program_head:d var_def:vd {: RESULT = combine(d, vd); :}
program_head ::= /* not implemented; currently matches empty string */ | program_head:d class_def:cd {: RESULT = combine(d, cd); :}
{: RESULT = empty(); :} | program_head:d fun_def:fd {: RESULT = combine(d, fd); :}
| program_head:d error:e {: RESULT = d; :}
| {: RESULT = empty(); :}
; ;
opt_stmt_list ::= {: RESULT = empty(); :} opt_stmt_list ::= {: RESULT = empty(); :}
| stmt_list:s {: RESULT = s; :} | stmt_list:s {: RESULT = s; :}
; ;
stmt_list ::= stmt:s {: RESULT = single(s); :}
| stmt_list:l stmt:s {: RESULT = combine(l, s); :} /* class_def */
| stmt_list:l error {: RESULT = l; :} class_def ::= CLASS:c identifier:id LPAR identifier:parentId RPAR COLON NEWLINE INDENT class_body:cb DEDENT {: RESULT = new ClassDef(cxleft, getRight(cb), id, parentId, cb); :};
/* If there is a syntax error in the source, this says to discard
* symbols from the parsing stack and perform reductions until
* there is a stmt_list on top of the stack, and then to discard /* class_body */
* input symbols until it is possible to shift again, reporting class_body ::= PASS NEWLINE {: RESULT = empty(); :}
* a syntax error. */ | class_body_defs:defs {: RESULT = defs; :}
;
class_body_defs ::= class_body_defs:defs var_def:vd {: RESULT = combine(defs, vd); :}
| class_body_defs:defs fun_def:fd {: RESULT = combine(defs, fd); :}
| class_body_defs:defs error {: RESULT = defs; :}
| var_def:vd {: RESULT = single(vd); :}
| fun_def:fd {: RESULT = single(fd); :}
;
/* fun_def */
fun_def ::= DEF:def identifier:id LPAR typed_vars:params RPAR ret_type:rt COLON:col NEWLINE INDENT fun_body_decs:fbd stmt_list:sl DEDENT
{: TypeAnnotation _rt = rt;if((rt instanceof ClassType) && ((ClassType)rt).className == "<None>") _rt = new ClassType( colxright, colxright, "<None>");RESULT = new FuncDef(defxleft, getRight(sl), id, params, _rt, fbd, sl); :}
;
ret_type ::= ARROW type:t {: RESULT= t; :}
| {: RESULT= new ClassType(null, null,"<None>"); :}
;
typed_vars ::= typed_var:tv {: RESULT= single(tv); :}
| typed_vars:tvs COMMA typed_var:tv {: RESULT= combine(tvs, tv); :}
| typed_vars:tvs COMMA error {: RESULT= tvs; :}
| {: RESULT= empty(); :}
;
/* fun_body */
fun_body ::= fun_body_decs:fbd stmt_list:sl {: RESULT = new FuncBody(fbd, sl);:}
| fun_body_decs:fbd {: RESULT = new FuncBody(fbd, new ArrayList<Stmt>());:}
;
fun_body_decs ::= fun_body_decs:fbd global_decl:gd {: RESULT= combine(fbd, gd); :}
| fun_body_decs:fbd nonlocal_decl:nd {: RESULT= combine(fbd, nd); :}
| fun_body_decs:fbd var_def:vd {: RESULT= combine(fbd, vd); :}
| fun_body_decs:fbd fun_def:fd {: RESULT= combine(fbd, fd); :}
| fun_body_decs:fbd error {: RESULT= fbd; :}
| {: RESULT= empty(); :}
;
/* typed_var */
typed_var ::= identifier:id COLON type:t {: RESULT = new TypedVar(idxleft, txright, id, t); :};
/* type */
type ::= identifier:id {: RESULT = new ClassType(idxleft, idxright, id.name); :}
| STRING:str {: RESULT = new ClassType(strxleft, strxright, str); :}
| LBR:lbr type:t RBR:rbr {: RESULT = new ListType(lbrxleft, rbrxright, t); :}
;
/* global_decl */
global_decl ::= GLOBAL:g identifier:id NEWLINE {: RESULT = new GlobalDecl(gxleft, idxright, id); :};
/* nonlocal_decl */
nonlocal_decl ::= NONLOCAL:n identifier:id NEWLINE {: RESULT = new NonLocalDecl(nxleft, idxright, id); :};
/* var_def */
var_def ::= typed_var:t ASSIGN literal:l NEWLINE {: RESULT = new VarDef(txleft, lxright, t, l); :};
/* stmt */
stmt ::= simple_stmt:s NEWLINE {: RESULT = s; :}
| IF:i expr:cond COLON block:b else_body:elb {: RESULT = new IfStmt(ixleft, getRight(elb), cond, b, elb); :}
| WHILE:wh expr:cond COLON block:b {: RESULT = new WhileStmt(whxleft, getRight(b), cond, b); :}
| FOR:f identifier:id IN expr:e COLON block:b {: RESULT = new ForStmt(fxleft, getRight(b), id, e, b); :}
; ;
stmt ::= expr_stmt:s NEWLINE {: RESULT = s; :}
else_body ::= ELSE:el COLON block:b {: RESULT = b; :}
| ELIF:el expr:cond COLON block:b else_body:elb {: RESULT = single(new IfStmt(elxleft, getRight(elb), cond, b, elb)); :}
| {: RESULT = empty(); :}
; ;
expr_stmt ::= expr:e {: RESULT = new ExprStmt(exleft, exright, e); :}
/* simple_stmt */
simple_stmt ::= PASS:p {: RESULT = null; :}
| expr:e {: RESULT = new ExprStmt(exleft, exright, e); :}
| RETURN:r expr:e {: RESULT = new ReturnStmt(rxleft, exright, e); :}
| RETURN {: RESULT = null; :}
| opt_target:ot expr:e {: RESULT = new AssignStmt(getLeft(ot), exright, ot, e); :}
; ;
expr ::= binary_expr:e {: RESULT = e; :}
opt_target ::= opt_target:ot target:t ASSIGN {: RESULT = combine(ot, t); :}
| target:t ASSIGN {: RESULT = single(t); :}
;
/* block */
block ::= NEWLINE INDENT stmt_list:sl DEDENT {: RESULT = sl; :};
/* literal */
literal ::= NONE:n {: RESULT = new NoneLiteral(nxleft, nxright); :}
| BOOL:b {: RESULT = new BooleanLiteral(bxleft, bxright, b); :}
| NUMBER:n {: RESULT = new IntegerLiteral(nxleft, nxright, n); :} | NUMBER:n {: RESULT = new IntegerLiteral(nxleft, nxright, n); :}
| STRING:s {: RESULT = new StringLiteral(sxleft, sxright, s); :}
;
/* expr */
expr ::= cexpr:ce {: RESULT = ce; :}
| NOT:n expr:exp {: RESULT = new UnaryExpr(nxleft, expxright, n, exp); :}
| expr:e1 AND:a expr:e2 {: RESULT = new BinaryExpr(e1xleft, e2xright, e1, a, e2); :}
| expr:e1 OR:o expr:e2 {: RESULT = new BinaryExpr(e1xleft, e2xright, e1, o, e2); :}
| expr:e1 IF expr:e2 ELSE expr:e3 {: RESULT = new IfExpr(e1xleft, e3xright, e2, e1, e3); :}
;
/* cexpr */
cexpr ::= pexpr:pe {: RESULT = pe; :}
| pexpr:p1 comp_op:co cexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, co.value, p2); :}
;
/* pexpr */
pexpr ::= identifier:id {: RESULT = id; :}
| literal:l {: RESULT = l; :}
| LBR:lbr expr_list:l RBR:rbr {: RESULT = new ListExpr(lbrxleft, rbrxright, l); :}
| LPAR:lpar expr:e RPAR:rpar {: RESULT = e; :}
| member_expr:m {: RESULT = m; :}
| index_expr:i {: RESULT = i; :}
| member_expr:m LPAR expr_list:l RPAR:rpar {: RESULT = new MethodCallExpr(mxleft, rparxright, m, l); :}
| identifier:id LPAR expr_list:l RPAR:rpar {: RESULT = new CallExpr(idxleft, rparxright, id, l); :}
| pexpr:p1 PLUS:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :}
| pexpr:p1 MINUS:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :}
| pexpr:p1 MUL:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :}
| pexpr:p1 DIV:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :}
| pexpr:p1 MOD:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :}
| MINUS:m pexpr:p {: RESULT = new UnaryExpr(mxleft, pxright, m, p); :}
;
expr_list ::= expr:e {: RESULT = single(e); :}
| expr_list:el COMMA expr:e {: RESULT = combine(el, e); :}
| {: RESULT = new ArrayList<Expr>(); :}
;
/* bin_op */ //We may still be able to use bin_op, so I left it here.
bin_op ::= PLUS:a {: RESULT = new StringLiteral(axleft, axright, "+"); :}
| MINUS:a {: RESULT = new StringLiteral(axleft, axright, "-"); :}
| MUL:a {: RESULT = new StringLiteral(axleft, axright, "*"); :}
| DIV:a {: RESULT = new StringLiteral(axleft, axright, "//"); :} //Section 2.6.3 in chocopy language reference
| MOD:a {: RESULT = new StringLiteral(axleft, axright, "%"); :}
;
/* comp_op */ //this might also need some change in order not to break left associativity
comp_op ::= EQUAL:a {: RESULT = new StringLiteral(axleft, axright, "=="); :}
| NEQ:a {: RESULT = new StringLiteral(axleft, axright, "!="); :}
| LEQ:a {: RESULT = new StringLiteral(axleft, axright, "<="); :}
| GEQ:a {: RESULT = new StringLiteral(axleft, axright, ">="); :}
| LT:a {: RESULT = new StringLiteral(axleft, axright, "<"); :}
| GT:a {: RESULT = new StringLiteral(axleft, axright, ">"); :}
| IS:a {: RESULT = new StringLiteral(axleft, axright, "is"); :}
;
/* member_expr */
member_expr ::= pexpr:p DOT identifier:id {: RESULT = new MemberExpr(pxleft, idxright, p, id); :}
;
/* index_expr */
index_expr ::= pexpr:p LBR expr:e RBR:rbr {: RESULT = new IndexExpr(pxleft, rbrxright, p, e); :}
; ;
/* A binary expression, illustrating how to find the left and right /* target */
* source position of a phrase. */ target ::= identifier:id {: RESULT = id; :}
binary_expr ::= expr:e1 PLUS:op expr:e2 | member_expr:m {: RESULT = m; :}
{: RESULT = new BinaryExpr(e1xleft, e2xright, | index_expr:i {: RESULT = i; :}
e1, op, e2); :} ;
/* Extras - rules below have not been given in language reference, we have them to ease implementation */
identifier ::= ID:idStr {: RESULT = new Identifier(idStrxleft, idStrxright, idStr); :};
stmt_list ::= stmt:s {: RESULT = single(s); :}
| stmt_list:l stmt:s {: RESULT = combine(l, s); :}
| stmt_list:l error {: RESULT = l; :}
/* If there is a syntax error in the source, this says to discard
* symbols from the parsing stack and perform reductions until
* there is a stmt_list on top of the stack, and then to discard
* input symbols until it is possible to shift again, reporting
* a syntax error. */
; ;

@ -1,5 +1,7 @@
package chocopy.pa1; package chocopy.pa1;
import java_cup.runtime.*; import java_cup.runtime.*;
import java.util.ArrayList;
import java.util.Iterator;
%% %%
@ -8,7 +10,7 @@ import java_cup.runtime.*;
%unicode %unicode
%line %line
%column %column
%states AFTER, STR
%class ChocoPyLexer %class ChocoPyLexer
%public %public
@ -32,7 +34,12 @@ import java_cup.runtime.*;
/** Producer of token-related values for the parser. */ /** Producer of token-related values for the parser. */
final ComplexSymbolFactory symbolFactory = new ComplexSymbolFactory(); final ComplexSymbolFactory symbolFactory = new ComplexSymbolFactory();
private int currIndent = 0; //Current Indentation Level
private String currString = "";
private int str_l = 0, str_c = 0; //Start location of a string.
/*A stack that keeps track of the spaces in each Indentation Level*/
private ArrayList<Integer> stack = new ArrayList<Integer>(20);
private boolean indentErrorUnchecked = true;
/** Return a terminal symbol of syntactic category TYPE and no /** Return a terminal symbol of syntactic category TYPE and no
* semantic value at the current source location. */ * semantic value at the current source location. */
private Symbol symbol(int type) { private Symbol symbol(int type) {
@ -48,6 +55,26 @@ import java_cup.runtime.*;
value); value);
} }
private void push(int indent){
stack.add(indent);
}
private int pop(){
if(stack.isEmpty()) return 0;
return stack.remove(stack.size() - 1);
}
private int top(){
if(stack.isEmpty()) return 0;
return stack.get(stack.size() - 1);
}
private boolean find(int indent){
if(indent == 0) return true;
Iterator<Integer> it = stack.iterator();
while(it.hasNext()){
if(it.next() == indent)
return true;
}
return false;
}
%} %}
/* Macros (regexes used in rules below) */ /* Macros (regexes used in rules below) */
@ -55,28 +82,192 @@ import java_cup.runtime.*;
WhiteSpace = [ \t] WhiteSpace = [ \t]
LineBreak = \r|\n|\r\n LineBreak = \r|\n|\r\n
IntegerLiteral = 0 | [1-9][0-9]*
IntegerLiteral = 0|[1-9][0-9]* // Accroding to the manual, 00+ is illeagal
StringLiteral = ([^\"\\]|(\\\")|(\\t)|(\\r)|(\\n)|(\\\\))+ // \n, \r, \t, \\, \" and Anything except \ and "
Identifiers = (_|[a-z]|[A-Z])(_|[a-z]|[A-Z]|[0-9])*
Comments = #[^\r\n]*
%% %%
//YYINITIAL state is where we're dealing with indentations.
//We will set the state to YYINITIAL when starting a
//new line unless this line is within a string, e.g.:
/*
"this is \
a string across \
multiple lines\
"
*/
<YYINITIAL>{
{WhiteSpace}
{
/*Add indentation */
if(yytext() == "\t")
currIndent += 8; //'\t' = 8 spaces
else
currIndent ++;
}
/*
# This python code will test if '\t' is 8 spaces
# It will run and print '1\n2'
# Please tell me if your Python reports an error
# Or you find documentations that says otherwise
if True:
print(1) # \t
print(2) # 8 spaces
*/
{LineBreak}
{
/*
If this is a blank line, start over on the next line.
An empty line should just be ignored, therefore we don't
pass a NEWLINE to Cup.
*/
currIndent = 0;
}
{Comments} { /* ignored */ } //Ignore blank lines
/*If it's not a blank line (Current character isn't a
Whitespace/linebreak/comment), deal with indentation here and
start accepting whatever is on this line in `AFTER' state*/
[^ \t\r\n#]
{
//rewind the current character.
yypushback(1);
if(top() > currIndent)
{
/*
If the indentation of the line is less than number of
indents current level should have,
keep dedenting until it reaches the level with the same
number of indents.
It's like a loop, because we're not changing the state
and we rewinded the current character. So it will keep
going until top()<= currIndent and it will switch to
AFTER state.
*/
pop();
if(top() < currIndent)
{
currIndent = top();
return symbolFactory.newSymbol("<bad indentation>", ChocoPyTokens.UNRECOGNIZED,
new ComplexSymbolFactory.Location(yyline + 1, yycolumn - 1),
new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()),
currIndent);
}
return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[ChocoPyTokens.DEDENT], ChocoPyTokens.DEDENT,
new ComplexSymbolFactory.Location(yyline + 1, yycolumn - 1),
new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()),
currIndent);
}
/*Otherwise, we will start dealing with the rest
of the line after indentation in AFTER state. */
yybegin(AFTER);
if(top()< currIndent)
{
/*
If current indentation is more than the number of indents
current level should have, start a new level which will have
`currIndent' indents.
*/
<YYINITIAL> { push(currIndent);
return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[ChocoPyTokens.INDENT], ChocoPyTokens.INDENT,
new ComplexSymbolFactory.Location(yyline + 1, yycolumn - 1),
new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()),
currIndent);
}
}
}
<AFTER> {
/* Delimiters. */ /* Delimiters. */
{LineBreak} { return symbol(ChocoPyTokens.NEWLINE); } {LineBreak} { yybegin(YYINITIAL); currIndent = 0;indentErrorUnchecked = true; return symbol(ChocoPyTokens.NEWLINE);}
":" { return symbol(ChocoPyTokens.COLON); }
"," { return symbol(ChocoPyTokens.COMMA); }
/* Literals. */ /* Literals. */
{IntegerLiteral} { return symbol(ChocoPyTokens.NUMBER, {IntegerLiteral} { return symbol(ChocoPyTokens.NUMBER,
Integer.parseInt(yytext())); } Integer.parseInt(yytext())); }
"\"" {yybegin(STR); str_l = yyline + 1; str_c = yycolumn + 1; currString = "";} //Start taking a string when see a "
"False" { return symbol(ChocoPyTokens.BOOL, false); }
"True" { return symbol(ChocoPyTokens.BOOL, true); }
"None" { return symbol(ChocoPyTokens.NONE); }
/*Keywords*/
"if" {return symbol(ChocoPyTokens.IF);}
"else" {return symbol(ChocoPyTokens.ELSE);}
"elif" {return symbol(ChocoPyTokens.ELIF);}
"while" {return symbol(ChocoPyTokens.WHILE);}
"class" {return symbol(ChocoPyTokens.CLASS);}
"def" {return symbol(ChocoPyTokens.DEF);}
"lambda" {return symbol(ChocoPyTokens.LAMBDA);}
"as" { return symbol(ChocoPyTokens.AS); }
"for" { return symbol(ChocoPyTokens.FOR); }
"global" { return symbol(ChocoPyTokens.GLOBAL); }
"in" { return symbol(ChocoPyTokens.IN); }
"nonlocal" { return symbol(ChocoPyTokens.NONLOCAL); }
"pass" { return symbol(ChocoPyTokens.PASS); }
"return" { return symbol(ChocoPyTokens.RETURN); }
"assert" { return symbol(ChocoPyTokens.ASSERT); }
"await" { return symbol(ChocoPyTokens.AWAIT); }
"break" { return symbol(ChocoPyTokens.BREAK); }
"continue" { return symbol(ChocoPyTokens.CONTINUE); }
"del" { return symbol(ChocoPyTokens.DEL); }
"except" { return symbol(ChocoPyTokens.EXCEPT); }
"finally" { return symbol(ChocoPyTokens.FINALLY); }
"from" { return symbol(ChocoPyTokens.FROM); }
"import" { return symbol(ChocoPyTokens.IMPORT); }
"raise" { return symbol(ChocoPyTokens.RAISE); }
"try" { return symbol(ChocoPyTokens.TRY); }
"with" { return symbol(ChocoPyTokens.WITH); }
"yield" { return symbol(ChocoPyTokens.YIELD); }
/* Operators. */ /* Operators. */
"+" { return symbol(ChocoPyTokens.PLUS, yytext()); } "+" { return symbol(ChocoPyTokens.PLUS); }
"-" { return symbol(ChocoPyTokens.MINUS); }
"*" { return symbol(ChocoPyTokens.MUL); }
"//" { return symbol(ChocoPyTokens.DIV); }
"/" { return symbol(ChocoPyTokens.DIV); } //Accroding to manual, chocopy don't have fp division, '/', '//' should be integr division
"%" { return symbol(ChocoPyTokens.MOD); }
">" { return symbol(ChocoPyTokens.GT); }
"<" { return symbol(ChocoPyTokens.LT); }
"==" { return symbol(ChocoPyTokens.EQUAL); }
"!=" { return symbol(ChocoPyTokens.NEQ); }
">=" { return symbol(ChocoPyTokens.GEQ); }
"<=" { return symbol(ChocoPyTokens.LEQ); }
"=" { return symbol(ChocoPyTokens.ASSIGN); }
"and" { return symbol(ChocoPyTokens.AND); }
"or" { return symbol(ChocoPyTokens.OR); }
"not" { return symbol(ChocoPyTokens.NOT); }
"." { return symbol(ChocoPyTokens.DOT); }
"(" { return symbol(ChocoPyTokens.LPAR); }
")" { return symbol(ChocoPyTokens.RPAR); }
"[" { return symbol(ChocoPyTokens.LBR); }
"]" { return symbol(ChocoPyTokens.RBR); }
"->" { return symbol(ChocoPyTokens.ARROW); }
"is" { return symbol(ChocoPyTokens.IS); }
/*Identifiers*/
{Identifiers} {return symbol(ChocoPyTokens.ID, yytext());}
/* Whitespace. */ /* Whitespace. */
{WhiteSpace} { /* ignore */ } {WhiteSpace} { /* ignore */ }
/* Comment. */
{Comments} { /* ignore */ }
} }
<STR>{
<<EOF>> { return symbol(ChocoPyTokens.EOF); } {StringLiteral} {currString += yytext();}
\\$ { /*'\' at the end of line, do nothing.*/ }
"\"" {yybegin(AFTER); return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[ChocoPyTokens.STRING], ChocoPyTokens.STRING,
new ComplexSymbolFactory.Location(str_l, str_c),
new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()),
currString);} // accepted a ", return to AFTER state
}
<<EOF>> { if(!stack.isEmpty()){ return symbol(ChocoPyTokens.DEDENT, pop());} return symbol(ChocoPyTokens.EOF);}
/* Error fallback. */ /* Error fallback. */
[^] { return symbol(ChocoPyTokens.UNRECOGNIZED); } [^] { return symbol(ChocoPyTokens.UNRECOGNIZED); }

@ -1 +1,35 @@
1 2 3 1 2 3
def fun5():
c = 6
def fun6():
print("Hello")
c = 4 + 5
if True:
if True:
print("Hello")
if True:
print("Maybe")
else:
print("World")
else:
print("Again")
else:
print("And Again")
class Thor(object):
y:int = 0
print("Right place?")
class Stones(object):
y:int = 0
def fun(x:int):
print("Right place?")
def bar():
return 2+3
print("Wrong Place")
def fun1():
def fun2():
print("Hello")

@ -1 +1,80 @@
1 + 2 + 3 class Foo(object):
x:int = 0
def __init__(self:"Foo", x:int):
self.x = x
def bar(y:int):
print("Hello World!",self.x+y)
y = 10
def get_stones(name:str)->str:
def map_name(nm:str)->str:
return stones[color.index(nm)]
color=["Red","Blue"]
stones=["Mind","Soul"]
return map_name(name)
def funa():
def funb():
print("Hello")
funb()
def fund():
def fune():
print("Hello")
c = 4 + 5
def funf():
def fung():
print("Hello")
c = 6
c = 4 + 5
if True:
if True:
if True:
print("Hello")
print("World")
if True:
if True:
if True:
print("Hello")
print("World")
if True:
if True:
if True:
print("Hello")
print("World")
if True:
if True:
if True:
print("Hello")
else:
print("World")
if True:
if True:
if True:
print("Hello")
else:
print("World")
f = Foo(1)
print(f.x)
f.bar(4)
a=[[[1],[2]],[[3],[4]]]
print(a[0][0][1]*a[1][1][0])
multiline_string="Hi World,
Here I am"
stone="Blue"
print(get_stones(stone))

@ -0,0 +1,12 @@
#!/bin/bash
FILENAME=$1
if [ -z "$1" ] ; then
echo "Running all test cases. Usage for individual test cases: test.sh FILENAME (inside src/test/data/pa1/sample/ folder)"
java -cp "chocopy-ref.jar:target/assignment.jar" chocopy.ChocoPy --pass=s --test --dir src/test/data/pa1/sample/
exit 1
fi
java -cp "chocopy-ref.jar:target/assignment.jar" chocopy.ChocoPy \
--pass=s --test src/test/data/pa1/sample/${FILENAME}
Loading…
Cancel
Save