From 62330bd2370d572325a28b06f9b53b838e659371 Mon Sep 17 00:00:00 2001 From: bill Date: Thu, 18 Feb 2021 00:07:18 +0800 Subject: [PATCH 01/26] Fixed some typos, added comments. --- .gitignore | 3 + src/main/jflex/chocopy/pa1/ChocoPy.jflex | 79 +++++++++++++++++------- 2 files changed, 60 insertions(+), 22 deletions(-) diff --git a/.gitignore b/.gitignore index 21cc758..987fe16 100644 --- a/.gitignore +++ b/.gitignore @@ -147,3 +147,6 @@ tramp Session.vim .netrwhist *~ + +# JFlex +src/main/jflex/chocopy/pa1/ChocoPyLexer.java diff --git a/src/main/jflex/chocopy/pa1/ChocoPy.jflex b/src/main/jflex/chocopy/pa1/ChocoPy.jflex index d7640a7..24c0328 100644 --- a/src/main/jflex/chocopy/pa1/ChocoPy.jflex +++ b/src/main/jflex/chocopy/pa1/ChocoPy.jflex @@ -32,9 +32,10 @@ import java.util.ArrayList; /** Producer of token-related values for the parser. */ final ComplexSymbolFactory symbolFactory = new ComplexSymbolFactory(); - private int currIndent = 0; + private int currIndent = 0; //Current Indentation Level private int currString = ""; - private ArrayList stack = new ArrayList(20); + /*A stack that keeps track of the spaces in each Indentation Level*/ + private ArrayList stack = new ArrayList(20); /** Return a terminal symbol of syntactic category TYPE and no * semantic value at the current source location. */ private Symbol symbol(int type) { @@ -57,7 +58,7 @@ import java.util.ArrayList; return stack.remove(stack.size() - 1); } private int top(){ - if(stack.isEmpty) return 0; + if(stack.isEmpty()) return 0; return stack.get(stack.size() - 1); } %} @@ -67,39 +68,74 @@ import java.util.ArrayList; WhiteSpace = [ \t] LineBreak = \r|\n|\r\n -IntegerLiteral = 0|[1-9][0-9]* -StringLiteral = ([^\"\\]|(\\\")|(\\t)|(\\r)|(\\n)|(\\\\))* -Names = (_|[a-z]|[A-Z])(_|[a-z]|[A-Z])* +IntegerLiteral = 0|[1-9][0-9]* // Accroding to the manual, 00+ is illeagal +StringLiteral = ([^\"\\]|(\\\")|(\\t)|(\\r)|(\\n)|(\\\\))* // \n, \r, \t, \\, \" and Anything except \ and " +Identifiers = (_|[a-z]|[A-Z])(_|[a-z]|[A-Z][0-9])* Comments = #[^\r\n]* %% { {WhiteSpace} { - String space = yytext(); - if(space == "\t") - currIndent += 8; + /*Add indentation */ + if(yytext() == "\t") + currIndent += 8; //'\t' = 8 spaces else currIndent ++; } - {LineBreak} +/* +# This python code will test if '\t' is 8 spaces +# It will run and print '1\n2' +# Please tell me if your Python reports an error +# Or you find documentations that says otherwise + +if True: + print(1) # \t + print(2) # 8 spaces +*/ + + {LineBreak} { + /* + If this is a blank line, start over on the next line. + An empty line should just be ignored, therefore we don't + pass a NEWLINE to Cup. + */ currIndent = 0; } - {Comments} { /* ignored */ } - [^ \t\r\n#] + {Comments} { /* ignored */ } //Ignore blank lines + + /*If it's not a blank line (Current character isn't a + Whitespace/linebreak/comment), deal with indentation here and + start accepting whatever is on this line in `AFTER' state*/ + [^ \t\r\n#] { + //rewind the current character. yypushback(1); if(top() > currIndent) - { + { + /* + If the indentation of the line < indents current level should have, + keep dedenting until it reaches the right level. + It's like a loop, because we're not changing the state + and we rewinded the current character. So it will keep + going until top()<= currIndent and it will switch to + AFTER state. + */ pop(); - return symbol(ChocoPyTokens.DEDENT); + return symbol(ChocoPyTokens.DEDENT, currIndent); } + /*Otherwise, we will start dealing with the rest + of the line after indentation in from the next token.*/ yystart(AFTER); if(top()< currIndent) - { + { + /* + If current indentation > indents current level should have, + start a new level which will have `currIndent' spaces. + */ push(currIndent); - return symbol(ChocoPyTokens.INDENT); + return symbol(ChocoPyTokens.INDENT, currIndent); } } } @@ -113,8 +149,7 @@ Comments = #[^\r\n]* /* Literals. */ {IntegerLiteral} { return symbol(ChocoPyTokens.NUMBER, Integer.parseInt(yytext())); } -// {StringLiteral} { return symbol(ChocoPyTokens.STRING, yytext());} - "\"" {yystart(STR); currString = "";} + "\"" {yystart(STR); currString = "";} //Start taking a string when see a " "False" { return symbol(ChocoPyTokens.BOOL, false); } "True" { return symbol(ChocoPyTokens.BOOL, true); } "None" { return symbol(ChocoPyTokens.NONE); } @@ -154,7 +189,7 @@ Comments = #[^\r\n]* "-" { return symbol(ChocoPyTokens.MINUS); } "*" { return symbol(ChocoPyTokens.MUL); } "//" { return symbol(ChocoPyTokens.DIV); } - "/" { return symbol(ChocoPyTokens.DIV); } + "/" { return symbol(ChocoPyTokens.DIV); } //Accroding to manual, chocopy don't have fp division, '/', '//' should be integr division "%" { return symbol(ChocoPyTokens.MOD); } ">" { return symbol(ChocoPyTokens.GT); } "<" { return symbol(ChocoPyTokens.LT); } @@ -176,16 +211,16 @@ Comments = #[^\r\n]* /*Identifiers*/ - {Names} {return symbol(ChocoPyTokens.NAMES, yytext());} + {Identifiers} {return symbol(ChocoPyTokens.ID, yytext());} /* Whitespace. */ {WhiteSpace} { /* ignore */ } /* Comment. */ {Comments} { /* ignore */ } } { - {StringLiteral} {currString+=yytext();} + {StringLiteral} {currString += yytext();} \\$ { /*'\' at the end of line, do nothing.*/ } - "\"" {yybegin(AFTER); return symbol(ChocoPyTokens.STRING, currString);} + "\"" {yybegin(AFTER); return symbol(ChocoPyTokens.STRING, currString);} // accepted a ", return to AFTER state } <> { return symbol(ChocoPyTokens.EOF); } From d5097498b7b103865f919305ba4a32f651d7e594 Mon Sep 17 00:00:00 2001 From: bill Date: Thu, 18 Feb 2021 00:27:21 +0800 Subject: [PATCH 02/26] more comments --- src/main/jflex/chocopy/pa1/ChocoPy.jflex | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/main/jflex/chocopy/pa1/ChocoPy.jflex b/src/main/jflex/chocopy/pa1/ChocoPy.jflex index 24c0328..dc0677f 100644 --- a/src/main/jflex/chocopy/pa1/ChocoPy.jflex +++ b/src/main/jflex/chocopy/pa1/ChocoPy.jflex @@ -50,6 +50,7 @@ import java.util.ArrayList; new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()), value); } + private void push(int indent){ stack.add(indent); } @@ -73,7 +74,15 @@ StringLiteral = ([^\"\\]|(\\\")|(\\t)|(\\r)|(\\n)|(\\\\))* // \n, \r, \t, \\, \" Identifiers = (_|[a-z]|[A-Z])(_|[a-z]|[A-Z][0-9])* Comments = #[^\r\n]* %% - +//YYINITIAL state is where we're dealing with indentations. +//We will set the state to YYINITIAL when starting a +//new line unless this line is within a string, e.g.: +/* +"this is \ +a string across \ +multiple lines\ +" +*/ { {WhiteSpace} { @@ -115,8 +124,10 @@ if True: if(top() > currIndent) { /* - If the indentation of the line < indents current level should have, - keep dedenting until it reaches the right level. + If the indentation of the line is less than number of + indents current level should have, + keep dedenting until it reaches the level with the same + number of indents. It's like a loop, because we're not changing the state and we rewinded the current character. So it will keep going until top()<= currIndent and it will switch to @@ -126,12 +137,12 @@ if True: return symbol(ChocoPyTokens.DEDENT, currIndent); } /*Otherwise, we will start dealing with the rest - of the line after indentation in from the next token.*/ + of the line after indentation in AFTER state. */ yystart(AFTER); if(top()< currIndent) { /* - If current indentation > indents current level should have, + If current indentation is more than indents current level should have, start a new level which will have `currIndent' spaces. */ push(currIndent); From ecb8d23418dd389b5160205022981e36b2c54425 Mon Sep 17 00:00:00 2001 From: bill Date: Thu, 18 Feb 2021 00:36:30 +0800 Subject: [PATCH 03/26] bug fixes --- src/main/jflex/chocopy/pa1/ChocoPy.jflex | 8 +- src/main/jflex/chocopy/pa1/ChocoPyLexer.java | 1287 ------------------ 2 files changed, 4 insertions(+), 1291 deletions(-) delete mode 100644 src/main/jflex/chocopy/pa1/ChocoPyLexer.java diff --git a/src/main/jflex/chocopy/pa1/ChocoPy.jflex b/src/main/jflex/chocopy/pa1/ChocoPy.jflex index dc0677f..ccb7187 100644 --- a/src/main/jflex/chocopy/pa1/ChocoPy.jflex +++ b/src/main/jflex/chocopy/pa1/ChocoPy.jflex @@ -33,7 +33,7 @@ import java.util.ArrayList; /** Producer of token-related values for the parser. */ final ComplexSymbolFactory symbolFactory = new ComplexSymbolFactory(); private int currIndent = 0; //Current Indentation Level - private int currString = ""; + private String currString = ""; /*A stack that keeps track of the spaces in each Indentation Level*/ private ArrayList stack = new ArrayList(20); /** Return a terminal symbol of syntactic category TYPE and no @@ -70,7 +70,7 @@ WhiteSpace = [ \t] LineBreak = \r|\n|\r\n IntegerLiteral = 0|[1-9][0-9]* // Accroding to the manual, 00+ is illeagal -StringLiteral = ([^\"\\]|(\\\")|(\\t)|(\\r)|(\\n)|(\\\\))* // \n, \r, \t, \\, \" and Anything except \ and " +StringLiteral = ([^\"\\]|(\\\")|(\\t)|(\\r)|(\\n)|(\\\\))+ // \n, \r, \t, \\, \" and Anything except \ and " Identifiers = (_|[a-z]|[A-Z])(_|[a-z]|[A-Z][0-9])* Comments = #[^\r\n]* %% @@ -138,7 +138,7 @@ if True: } /*Otherwise, we will start dealing with the rest of the line after indentation in AFTER state. */ - yystart(AFTER); + yybegin(AFTER); if(top()< currIndent) { /* @@ -160,7 +160,7 @@ if True: /* Literals. */ {IntegerLiteral} { return symbol(ChocoPyTokens.NUMBER, Integer.parseInt(yytext())); } - "\"" {yystart(STR); currString = "";} //Start taking a string when see a " + "\"" {yybegin(STR); currString = "";} //Start taking a string when see a " "False" { return symbol(ChocoPyTokens.BOOL, false); } "True" { return symbol(ChocoPyTokens.BOOL, true); } "None" { return symbol(ChocoPyTokens.NONE); } diff --git a/src/main/jflex/chocopy/pa1/ChocoPyLexer.java b/src/main/jflex/chocopy/pa1/ChocoPyLexer.java deleted file mode 100644 index ac26ed7..0000000 --- a/src/main/jflex/chocopy/pa1/ChocoPyLexer.java +++ /dev/null @@ -1,1287 +0,0 @@ -// DO NOT EDIT -// Generated by JFlex 1.8.2 http://jflex.de/ -// source: ChocoPy.jflex - -package chocopy.pa1; -import java_cup.runtime.*; -import java.util.ArrayList; - -// See https://github.com/jflex-de/jflex/issues/222 -@SuppressWarnings("FallThrough") -public class ChocoPyLexer implements java_cup.runtime.Scanner { - - /** This character denotes the end of file. */ - public static final int YYEOF = -1; - - /** Initial size of the lookahead buffer. */ - private static final int ZZ_BUFFERSIZE = 16384; - - // Lexical states. - public static final int YYINITIAL = 0; - public static final int AFTER = 2; - public static final int STR = 4; - - /** - * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l - * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l - * at the beginning of a line - * l is of the form l = 2*k, k a non negative integer - */ - private static final int ZZ_LEXSTATE[] = { - 0, 0, 1, 1, 2, 2 - }; - - /** - * Top-level table for translating characters to character classes - */ - private static final int [] ZZ_CMAP_TOP = zzUnpackcmap_top(); - - private static final String ZZ_CMAP_TOP_PACKED_0 = - "\1\0\37\u0100\1\u0200\u10df\u0100"; - - private static int [] zzUnpackcmap_top() { - int [] result = new int[4352]; - int offset = 0; - offset = zzUnpackcmap_top(ZZ_CMAP_TOP_PACKED_0, offset, result); - return result; - } - - private static int zzUnpackcmap_top(String packed, int offset, int [] result) { - int i = 0; /* index in packed string */ - int j = offset; /* index in unpacked array */ - int l = packed.length(); - while (i < l) { - int count = packed.charAt(i++); - int value = packed.charAt(i++); - do result[j++] = value; while (--count > 0); - } - return j; - } - - - /** - * Second-level tables for translating characters to character classes - */ - private static final int [] ZZ_CMAP_BLOCKS = zzUnpackcmap_blocks(); - - private static final String ZZ_CMAP_BLOCKS_PACKED_0 = - "\11\0\1\1\1\2\2\3\1\4\22\0\1\1\1\5"+ - "\1\6\1\7\1\0\1\10\2\0\1\11\1\12\1\13"+ - "\1\14\1\15\1\16\1\17\1\20\1\21\11\22\1\23"+ - "\1\0\1\24\1\25\1\26\2\0\5\27\1\30\7\27"+ - "\1\31\5\27\1\32\6\27\1\33\1\34\1\35\1\0"+ - "\1\27\1\0\1\36\1\37\1\40\1\41\1\42\1\43"+ - "\1\44\1\45\1\46\1\27\1\47\1\50\1\51\1\52"+ - "\1\53\1\54\1\27\1\55\1\56\1\57\1\60\1\27"+ - "\1\61\1\62\1\63\1\27\12\0\1\3\u01a2\0\2\3"+ - "\326\0"; - - private static int [] zzUnpackcmap_blocks() { - int [] result = new int[768]; - int offset = 0; - offset = zzUnpackcmap_blocks(ZZ_CMAP_BLOCKS_PACKED_0, offset, result); - return result; - } - - private static int zzUnpackcmap_blocks(String packed, int offset, int [] result) { - int i = 0; /* index in packed string */ - int j = offset; /* index in unpacked array */ - int l = packed.length(); - while (i < l) { - int count = packed.charAt(i++); - int value = packed.charAt(i++); - do result[j++] = value; while (--count > 0); - } - return j; - } - - /** - * Translates DFA states to action switch labels. - */ - private static final int [] ZZ_ACTION = zzUnpackAction(); - - private static final String ZZ_ACTION_PACKED_0 = - "\2\0\1\1\1\2\1\3\2\4\1\5\1\6\1\7"+ - "\2\10\1\6\1\11\1\7\1\12\1\13\1\14\1\15"+ - "\1\16\1\17\1\20\1\21\1\22\2\23\1\24\1\25"+ - "\1\26\1\27\4\30\1\31\1\32\20\30\1\1\1\33"+ - "\1\6\1\34\1\35\1\22\1\36\1\37\1\40\4\30"+ - "\1\41\13\30\1\42\1\30\1\43\1\44\2\30\1\45"+ - "\7\30\1\0\2\46\3\30\1\47\5\30\1\50\1\51"+ - "\4\30\1\52\5\30\1\53\3\30\1\54\4\30\1\55"+ - "\1\56\5\30\1\57\1\60\2\30\1\61\4\30\1\62"+ - "\3\30\1\63\1\30\1\64\1\30\1\65\1\66\1\67"+ - "\7\30\1\70\1\30\1\71\1\72\1\73\1\30\1\74"+ - "\1\30\1\75\1\76\1\77\1\30\1\100\1\30\1\101"+ - "\1\30\1\102\1\103"; - - private static int [] zzUnpackAction() { - int [] result = new int[176]; - int offset = 0; - offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); - return result; - } - - private static int zzUnpackAction(String packed, int offset, int [] result) { - int i = 0; /* index in packed string */ - int j = offset; /* index in unpacked array */ - int l = packed.length(); - while (i < l) { - int count = packed.charAt(i++); - int value = packed.charAt(i++); - do result[j++] = value; while (--count > 0); - } - return j; - } - - - /** - * Translates a state to a row index in the transition table - */ - private static final int [] ZZ_ROWMAP = zzUnpackRowMap(); - - private static final String ZZ_ROWMAP_PACKED_0 = - "\0\0\0\64\0\150\0\234\0\234\0\234\0\320\0\u0104"+ - "\0\234\0\234\0\234\0\u0138\0\u016c\0\234\0\u01a0\0\234"+ - "\0\234\0\234\0\234\0\234\0\234\0\u01d4\0\234\0\u0208"+ - "\0\234\0\u023c\0\234\0\u0270\0\u02a4\0\u02d8\0\u030c\0\u0340"+ - "\0\u0374\0\u03a8\0\234\0\234\0\u03dc\0\u0410\0\u0444\0\u0478"+ - "\0\u04ac\0\u04e0\0\u0514\0\u0548\0\u057c\0\u05b0\0\u05e4\0\u0618"+ - "\0\u064c\0\u0680\0\u06b4\0\u06e8\0\u071c\0\234\0\u0750\0\234"+ - "\0\234\0\234\0\234\0\234\0\234\0\u0784\0\u07b8\0\u07ec"+ - "\0\u0820\0\u0854\0\u0888\0\u08bc\0\u08f0\0\u0924\0\u0958\0\u098c"+ - "\0\u09c0\0\u09f4\0\u0a28\0\u0a5c\0\u0a90\0\u030c\0\u0ac4\0\u030c"+ - "\0\u030c\0\u0af8\0\u0b2c\0\u030c\0\u0b60\0\u0b94\0\u0bc8\0\u0bfc"+ - "\0\u0c30\0\u0c64\0\u0c98\0\u0ccc\0\234\0\u0d00\0\u0d34\0\u0d68"+ - "\0\u0d9c\0\u030c\0\u0dd0\0\u0e04\0\u0e38\0\u0e6c\0\u0ea0\0\u030c"+ - "\0\u030c\0\u0ed4\0\u0f08\0\u0f3c\0\u0f70\0\u030c\0\u0fa4\0\u0fd8"+ - "\0\u100c\0\u1040\0\u1074\0\u030c\0\u10a8\0\u10dc\0\u1110\0\u030c"+ - "\0\u1144\0\u1178\0\u11ac\0\u11e0\0\u030c\0\u030c\0\u1214\0\u1248"+ - "\0\u127c\0\u12b0\0\u12e4\0\u030c\0\u030c\0\u1318\0\u134c\0\u030c"+ - "\0\u1380\0\u13b4\0\u13e8\0\u141c\0\u030c\0\u1450\0\u1484\0\u14b8"+ - "\0\u030c\0\u14ec\0\u030c\0\u1520\0\u030c\0\u030c\0\u030c\0\u1554"+ - "\0\u1588\0\u15bc\0\u15f0\0\u1624\0\u1658\0\u168c\0\u030c\0\u16c0"+ - "\0\u030c\0\u030c\0\u030c\0\u16f4\0\u030c\0\u1728\0\u030c\0\u030c"+ - "\0\u030c\0\u175c\0\u030c\0\u1790\0\u030c\0\u17c4\0\u030c\0\u030c"; - - private static int [] zzUnpackRowMap() { - int [] result = new int[176]; - int offset = 0; - offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); - return result; - } - - private static int zzUnpackRowMap(String packed, int offset, int [] result) { - int i = 0; /* index in packed string */ - int j = offset; /* index in unpacked array */ - int l = packed.length(); - while (i < l) { - int high = packed.charAt(i++) << 16; - result[j++] = high | packed.charAt(i++); - } - return j; - } - - /** - * The transition table of the DFA - */ - private static final int [] ZZ_TRANS = zzUnpackTrans(); - - private static final String ZZ_TRANS_PACKED_0 = - "\1\4\1\5\1\6\1\4\1\7\2\4\1\10\54\4"+ - "\1\11\1\12\1\13\1\11\1\14\1\15\1\16\1\17"+ - "\1\20\1\21\1\22\1\23\1\24\1\25\1\26\1\27"+ - "\1\30\1\31\1\32\1\33\1\34\1\35\1\36\1\37"+ - "\1\40\1\41\1\42\1\43\1\11\1\44\1\45\1\46"+ - "\1\47\1\50\1\51\1\52\1\53\1\37\1\54\1\37"+ - "\1\55\1\37\1\56\1\57\1\60\1\61\1\37\1\62"+ - "\1\37\1\63\1\37\1\64\6\65\1\66\25\65\1\67"+ - "\27\65\66\0\1\6\61\0\2\10\1\0\1\10\1\0"+ - "\57\10\2\0\1\13\106\0\1\70\36\0\2\17\1\0"+ - "\1\17\1\0\57\17\26\0\1\71\55\0\1\72\64\0"+ - "\2\32\66\0\1\73\63\0\1\74\63\0\1\75\65\0"+ - "\4\37\3\0\26\37\27\0\4\37\3\0\1\76\25\37"+ - "\27\0\4\37\3\0\15\37\1\77\10\37\27\0\4\37"+ - "\3\0\17\37\1\100\6\37\27\0\4\37\3\0\14\37"+ - "\1\101\3\37\1\102\2\37\1\103\2\37\27\0\4\37"+ - "\3\0\17\37\1\104\6\37\27\0\4\37\3\0\12\37"+ - "\1\105\2\37\1\106\10\37\27\0\4\37\3\0\4\37"+ - "\1\107\21\37\27\0\4\37\3\0\12\37\1\110\11\37"+ - "\1\111\1\37\27\0\4\37\3\0\10\37\1\112\4\37"+ - "\1\113\1\37\1\114\6\37\27\0\4\37\3\0\12\37"+ - "\1\115\13\37\27\0\4\37\3\0\5\37\1\116\5\37"+ - "\1\117\1\120\3\37\1\121\5\37\27\0\4\37\3\0"+ - "\1\122\25\37\27\0\4\37\3\0\15\37\1\123\10\37"+ - "\27\0\4\37\3\0\17\37\1\124\6\37\27\0\4\37"+ - "\3\0\1\125\25\37\27\0\4\37\3\0\1\126\3\37"+ - "\1\127\21\37\27\0\4\37\3\0\17\37\1\130\6\37"+ - "\27\0\4\37\3\0\7\37\1\131\1\132\15\37\27\0"+ - "\4\37\3\0\10\37\1\133\15\37\6\65\1\0\25\65"+ - "\1\134\27\65\2\0\2\135\1\136\1\0\1\65\25\0"+ - "\1\65\15\0\1\65\2\0\1\65\1\0\1\65\33\0"+ - "\4\37\3\0\12\37\1\137\13\37\27\0\4\37\3\0"+ - "\14\37\1\140\11\37\27\0\4\37\3\0\22\37\1\141"+ - "\3\37\27\0\4\37\3\0\3\37\1\142\22\37\27\0"+ - "\4\37\3\0\20\37\1\143\5\37\27\0\4\37\3\0"+ - "\1\144\25\37\27\0\4\37\3\0\4\37\1\145\21\37"+ - "\27\0\4\37\3\0\1\146\25\37\27\0\4\37\3\0"+ - "\14\37\1\147\11\37\27\0\4\37\3\0\5\37\1\150"+ - "\4\37\1\151\13\37\27\0\4\37\3\0\10\37\1\152"+ - "\7\37\1\153\5\37\27\0\4\37\3\0\2\37\1\154"+ - "\23\37\27\0\4\37\3\0\14\37\1\155\11\37\27\0"+ - "\4\37\3\0\17\37\1\156\6\37\27\0\4\37\3\0"+ - "\15\37\1\157\10\37\27\0\4\37\3\0\15\37\1\160"+ - "\10\37\27\0\4\37\3\0\16\37\1\161\7\37\27\0"+ - "\4\37\3\0\13\37\1\162\12\37\27\0\4\37\3\0"+ - "\14\37\1\163\4\37\1\164\4\37\27\0\4\37\3\0"+ - "\20\37\1\165\5\37\27\0\4\37\3\0\10\37\1\166"+ - "\15\37\27\0\4\37\3\0\21\37\1\167\4\37\27\0"+ - "\4\37\3\0\25\37\1\170\27\0\4\37\3\0\10\37"+ - "\1\171\15\37\27\0\4\37\3\0\21\37\1\172\4\37"+ - "\27\0\4\37\3\0\4\37\1\173\21\37\6\0\1\65"+ - "\25\0\1\65\15\0\1\65\2\0\1\65\1\0\1\65"+ - "\6\0\1\135\110\0\4\37\3\0\20\37\1\174\5\37"+ - "\27\0\4\37\3\0\4\37\1\175\21\37\27\0\4\37"+ - "\3\0\4\37\1\176\21\37\27\0\4\37\3\0\4\37"+ - "\1\177\21\37\27\0\4\37\3\0\10\37\1\200\15\37"+ - "\27\0\4\37\3\0\1\201\25\37\27\0\4\37\3\0"+ - "\20\37\1\202\5\37\27\0\4\37\3\0\21\37\1\203"+ - "\4\37\27\0\4\37\3\0\5\37\1\204\20\37\27\0"+ - "\4\37\3\0\4\37\1\205\21\37\27\0\4\37\3\0"+ - "\4\37\1\206\21\37\27\0\4\37\3\0\1\207\25\37"+ - "\27\0\4\37\3\0\13\37\1\210\12\37\27\0\4\37"+ - "\3\0\1\37\1\211\24\37\27\0\4\37\3\0\15\37"+ - "\1\212\10\37\27\0\4\37\3\0\1\37\1\213\24\37"+ - "\27\0\4\37\3\0\12\37\1\214\13\37\27\0\4\37"+ - "\3\0\20\37\1\215\5\37\27\0\4\37\3\0\20\37"+ - "\1\216\5\37\27\0\4\37\3\0\22\37\1\217\3\37"+ - "\27\0\4\37\3\0\12\37\1\220\13\37\27\0\4\37"+ - "\3\0\7\37\1\221\16\37\27\0\4\37\3\0\12\37"+ - "\1\222\13\37\27\0\4\37\3\0\4\37\1\223\21\37"+ - "\27\0\4\37\3\0\17\37\1\224\6\37\27\0\4\37"+ - "\3\0\21\37\1\225\4\37\27\0\4\37\3\0\11\37"+ - "\1\226\14\37\27\0\4\37\3\0\20\37\1\227\5\37"+ - "\27\0\4\37\3\0\10\37\1\230\15\37\27\0\4\37"+ - "\3\0\16\37\1\231\7\37\27\0\4\37\3\0\12\37"+ - "\1\232\13\37\27\0\4\37\3\0\1\233\25\37\27\0"+ - "\4\37\3\0\17\37\1\234\6\37\27\0\4\37\3\0"+ - "\3\37\1\235\22\37\27\0\4\37\3\0\15\37\1\236"+ - "\10\37\27\0\4\37\3\0\4\37\1\237\21\37\27\0"+ - "\4\37\3\0\17\37\1\240\6\37\27\0\4\37\3\0"+ - "\4\37\1\241\21\37\27\0\4\37\3\0\3\37\1\242"+ - "\22\37\27\0\4\37\3\0\21\37\1\243\4\37\27\0"+ - "\4\37\3\0\14\37\1\244\11\37\27\0\4\37\3\0"+ - "\21\37\1\245\4\37\27\0\4\37\3\0\12\37\1\246"+ - "\13\37\27\0\4\37\3\0\12\37\1\247\13\37\27\0"+ - "\4\37\3\0\21\37\1\250\4\37\27\0\4\37\3\0"+ - "\1\251\25\37\27\0\4\37\3\0\2\37\1\252\23\37"+ - "\27\0\4\37\3\0\14\37\1\253\11\37\27\0\4\37"+ - "\3\0\22\37\1\254\3\37\27\0\4\37\3\0\25\37"+ - "\1\255\27\0\4\37\3\0\1\256\25\37\27\0\4\37"+ - "\3\0\4\37\1\257\21\37\27\0\4\37\3\0\12\37"+ - "\1\260\13\37"; - - private static int [] zzUnpackTrans() { - int [] result = new int[6136]; - int offset = 0; - offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); - return result; - } - - private static int zzUnpackTrans(String packed, int offset, int [] result) { - int i = 0; /* index in packed string */ - int j = offset; /* index in unpacked array */ - int l = packed.length(); - while (i < l) { - int count = packed.charAt(i++); - int value = packed.charAt(i++); - value--; - do result[j++] = value; while (--count > 0); - } - return j; - } - - - /** Error code for "Unknown internal scanner error". */ - private static final int ZZ_UNKNOWN_ERROR = 0; - /** Error code for "could not match input". */ - private static final int ZZ_NO_MATCH = 1; - /** Error code for "pushback value was too large". */ - private static final int ZZ_PUSHBACK_2BIG = 2; - - /** - * Error messages for {@link #ZZ_UNKNOWN_ERROR}, {@link #ZZ_NO_MATCH}, and - * {@link #ZZ_PUSHBACK_2BIG} respectively. - */ - private static final String ZZ_ERROR_MSG[] = { - "Unknown internal scanner error", - "Error: could not match input", - "Error: pushback value was too large" - }; - - /** - * ZZ_ATTRIBUTE[aState] contains the attributes of state {@code aState} - */ - private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); - - private static final String ZZ_ATTRIBUTE_PACKED_0 = - "\2\0\1\1\3\11\2\1\3\11\2\1\1\11\1\1"+ - "\6\11\1\1\1\11\1\1\1\11\1\1\1\11\7\1"+ - "\2\11\21\1\1\11\1\1\6\11\36\1\1\0\1\11"+ - "\123\1"; - - private static int [] zzUnpackAttribute() { - int [] result = new int[176]; - int offset = 0; - offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); - return result; - } - - private static int zzUnpackAttribute(String packed, int offset, int [] result) { - int i = 0; /* index in packed string */ - int j = offset; /* index in unpacked array */ - int l = packed.length(); - while (i < l) { - int count = packed.charAt(i++); - int value = packed.charAt(i++); - do result[j++] = value; while (--count > 0); - } - return j; - } - - /** Input device. */ - private java.io.Reader zzReader; - - /** Current state of the DFA. */ - private int zzState; - - /** Current lexical state. */ - private int zzLexicalState = YYINITIAL; - - /** - * This buffer contains the current text to be matched and is the source of the {@link #yytext()} - * string. - */ - private char zzBuffer[] = new char[ZZ_BUFFERSIZE]; - - /** Text position at the last accepting state. */ - private int zzMarkedPos; - - /** Current text position in the buffer. */ - private int zzCurrentPos; - - /** Marks the beginning of the {@link #yytext()} string in the buffer. */ - private int zzStartRead; - - /** Marks the last character in the buffer, that has been read from input. */ - private int zzEndRead; - - /** - * Whether the scanner is at the end of file. - * @see #yyatEOF - */ - private boolean zzAtEOF; - - /** - * The number of occupied positions in {@link #zzBuffer} beyond {@link #zzEndRead}. - * - *

When a lead/high surrogate has been read from the input stream into the final - * {@link #zzBuffer} position, this will have a value of 1; otherwise, it will have a value of 0. - */ - private int zzFinalHighSurrogate = 0; - - /** Number of newlines encountered up to the start of the matched text. */ - private int yyline; - - /** Number of characters from the last newline up to the start of the matched text. */ - private int yycolumn; - - /** Number of characters up to the start of the matched text. */ - @SuppressWarnings("unused") - private long yychar; - - /** Whether the scanner is currently at the beginning of a line. */ - @SuppressWarnings("unused") - private boolean zzAtBOL = true; - - /** Whether the user-EOF-code has already been executed. */ - @SuppressWarnings("unused") - private boolean zzEOFDone; - - /* user code: */ - /* The code below includes some convenience methods to create tokens - * of a given type and optionally a value that the CUP parser can - * understand. Specifically, a lot of the logic below deals with - * embedded information about where in the source code a given token - * was recognized, so that the parser can report errors accurately. - * (It need not be modified for this project.) */ - - /** Producer of token-related values for the parser. */ - final ComplexSymbolFactory symbolFactory = new ComplexSymbolFactory(); - private int currIndent = 0; - private int currString = ""; - private ArrayList stack = new ArrayList(20); - /** Return a terminal symbol of syntactic category TYPE and no - * semantic value at the current source location. */ - private Symbol symbol(int type) { - return symbol(type); - } - - /** Return a terminal symbol of syntactic category TYPE and semantic - * value VALUE at the current source location. */ - private Symbol symbol(int type, Object value) { - return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[type], type, - new ComplexSymbolFactory.Location(yyline + 1, yycolumn + 1), - new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()), - value); - } - private void push(int indent){ - stack.add(indent); - } - private int pop(){ - if(stack.isEmpty()) return 0; - return stack.remove(stack.size() - 1); - } - private int top(){ - if(stack.isEmpty) return 0; - return stack.get(stack.size() - 1); - } - - - /** - * Creates a new scanner - * - * @param in the java.io.Reader to read input from. - */ - public ChocoPyLexer(java.io.Reader in) { - this.zzReader = in; - } - - /** - * Translates raw input code points to DFA table row - */ - private static int zzCMap(int input) { - int offset = input & 255; - return offset == input ? ZZ_CMAP_BLOCKS[offset] : ZZ_CMAP_BLOCKS[ZZ_CMAP_TOP[input >> 8] | offset]; - } - - /** - * Refills the input buffer. - * - * @return {@code false} iff there was new input. - * @exception java.io.IOException if any I/O-Error occurs - */ - private boolean zzRefill() throws java.io.IOException { - - /* first: make room (if you can) */ - if (zzStartRead > 0) { - zzEndRead += zzFinalHighSurrogate; - zzFinalHighSurrogate = 0; - System.arraycopy(zzBuffer, zzStartRead, - zzBuffer, 0, - zzEndRead - zzStartRead); - - /* translate stored positions */ - zzEndRead -= zzStartRead; - zzCurrentPos -= zzStartRead; - zzMarkedPos -= zzStartRead; - zzStartRead = 0; - } - - /* is the buffer big enough? */ - if (zzCurrentPos >= zzBuffer.length - zzFinalHighSurrogate) { - /* if not: blow it up */ - char newBuffer[] = new char[zzBuffer.length * 2]; - System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length); - zzBuffer = newBuffer; - zzEndRead += zzFinalHighSurrogate; - zzFinalHighSurrogate = 0; - } - - /* fill the buffer with new input */ - int requested = zzBuffer.length - zzEndRead; - int numRead = zzReader.read(zzBuffer, zzEndRead, requested); - - /* not supposed to occur according to specification of java.io.Reader */ - if (numRead == 0) { - throw new java.io.IOException( - "Reader returned 0 characters. See JFlex examples/zero-reader for a workaround."); - } - if (numRead > 0) { - zzEndRead += numRead; - if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) { - if (numRead == requested) { // We requested too few chars to encode a full Unicode character - --zzEndRead; - zzFinalHighSurrogate = 1; - } else { // There is room in the buffer for at least one more char - int c = zzReader.read(); // Expecting to read a paired low surrogate char - if (c == -1) { - return true; - } else { - zzBuffer[zzEndRead++] = (char)c; - } - } - } - /* potentially more input available */ - return false; - } - - /* numRead < 0 ==> end of stream */ - return true; - } - - - /** - * Closes the input reader. - * - * @throws java.io.IOException if the reader could not be closed. - */ - public final void yyclose() throws java.io.IOException { - zzAtEOF = true; // indicate end of file - zzEndRead = zzStartRead; // invalidate buffer - - if (zzReader != null) { - zzReader.close(); - } - } - - - /** - * Resets the scanner to read from a new input stream. - * - *

Does not close the old reader. - * - *

All internal variables are reset, the old input stream cannot be reused (internal - * buffer is discarded and lost). Lexical state is set to {@code ZZ_INITIAL}. - * - *

Internal scan buffer is resized down to its initial length, if it has grown. - * - * @param reader The new input stream. - */ - public final void yyreset(java.io.Reader reader) { - zzReader = reader; - zzEOFDone = false; - yyResetPosition(); - zzLexicalState = YYINITIAL; - if (zzBuffer.length > ZZ_BUFFERSIZE) { - zzBuffer = new char[ZZ_BUFFERSIZE]; - } - } - - /** - * Resets the input position. - */ - private final void yyResetPosition() { - zzAtBOL = true; - zzAtEOF = false; - zzCurrentPos = 0; - zzMarkedPos = 0; - zzStartRead = 0; - zzEndRead = 0; - zzFinalHighSurrogate = 0; - yyline = 0; - yycolumn = 0; - yychar = 0L; - } - - - /** - * Returns whether the scanner has reached the end of the reader it reads from. - * - * @return whether the scanner has reached EOF. - */ - public final boolean yyatEOF() { - return zzAtEOF; - } - - - /** - * Returns the current lexical state. - * - * @return the current lexical state. - */ - public final int yystate() { - return zzLexicalState; - } - - - /** - * Enters a new lexical state. - * - * @param newState the new lexical state - */ - public final void yybegin(int newState) { - zzLexicalState = newState; - } - - - /** - * Returns the text matched by the current regular expression. - * - * @return the matched text. - */ - public final String yytext() { - return new String(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead); - } - - - /** - * Returns the character at the given position from the matched text. - * - *

It is equivalent to {@code yytext().charAt(pos)}, but faster. - * - * @param position the position of the character to fetch. A value from 0 to {@code yylength()-1}. - * - * @return the character at {@code position}. - */ - public final char yycharat(int position) { - return zzBuffer[zzStartRead + position]; - } - - - /** - * How many characters were matched. - * - * @return the length of the matched text region. - */ - public final int yylength() { - return zzMarkedPos-zzStartRead; - } - - - /** - * Reports an error that occurred while scanning. - * - *

In a well-formed scanner (no or only correct usage of {@code yypushback(int)} and a - * match-all fallback rule) this method will only be called with things that - * "Can't Possibly Happen". - * - *

If this method is called, something is seriously wrong (e.g. a JFlex bug producing a faulty - * scanner etc.). - * - *

Usual syntax/scanner level error handling should be done in error fallback rules. - * - * @param errorCode the code of the error message to display. - */ - private static void zzScanError(int errorCode) { - String message; - try { - message = ZZ_ERROR_MSG[errorCode]; - } catch (ArrayIndexOutOfBoundsException e) { - message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR]; - } - - throw new Error(message); - } - - - /** - * Pushes the specified amount of characters back into the input stream. - * - *

They will be read again by then next call of the scanning method. - * - * @param number the number of characters to be read again. This number must not be greater than - * {@link #yylength()}. - */ - public void yypushback(int number) { - if ( number > yylength() ) - zzScanError(ZZ_PUSHBACK_2BIG); - - zzMarkedPos -= number; - } - - - - - /** - * Resumes scanning until the next regular expression is matched, the end of input is encountered - * or an I/O-Error occurs. - * - * @return the next token. - * @exception java.io.IOException if any I/O-Error occurs. - */ - @Override public java_cup.runtime.Symbol next_token() throws java.io.IOException { - int zzInput; - int zzAction; - - // cached fields: - int zzCurrentPosL; - int zzMarkedPosL; - int zzEndReadL = zzEndRead; - char[] zzBufferL = zzBuffer; - - int [] zzTransL = ZZ_TRANS; - int [] zzRowMapL = ZZ_ROWMAP; - int [] zzAttrL = ZZ_ATTRIBUTE; - - while (true) { - zzMarkedPosL = zzMarkedPos; - - boolean zzR = false; - int zzCh; - int zzCharCount; - for (zzCurrentPosL = zzStartRead ; - zzCurrentPosL < zzMarkedPosL ; - zzCurrentPosL += zzCharCount ) { - zzCh = Character.codePointAt(zzBufferL, zzCurrentPosL, zzMarkedPosL); - zzCharCount = Character.charCount(zzCh); - switch (zzCh) { - case '\u000B': // fall through - case '\u000C': // fall through - case '\u0085': // fall through - case '\u2028': // fall through - case '\u2029': - yyline++; - yycolumn = 0; - zzR = false; - break; - case '\r': - yyline++; - yycolumn = 0; - zzR = true; - break; - case '\n': - if (zzR) - zzR = false; - else { - yyline++; - yycolumn = 0; - } - break; - default: - zzR = false; - yycolumn += zzCharCount; - } - } - - if (zzR) { - // peek one character ahead if it is - // (if we have counted one line too much) - boolean zzPeek; - if (zzMarkedPosL < zzEndReadL) - zzPeek = zzBufferL[zzMarkedPosL] == '\n'; - else if (zzAtEOF) - zzPeek = false; - else { - boolean eof = zzRefill(); - zzEndReadL = zzEndRead; - zzMarkedPosL = zzMarkedPos; - zzBufferL = zzBuffer; - if (eof) - zzPeek = false; - else - zzPeek = zzBufferL[zzMarkedPosL] == '\n'; - } - if (zzPeek) yyline--; - } - zzAction = -1; - - zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL; - - zzState = ZZ_LEXSTATE[zzLexicalState]; - - // set up zzAction for empty match case: - int zzAttributes = zzAttrL[zzState]; - if ( (zzAttributes & 1) == 1 ) { - zzAction = zzState; - } - - - zzForAction: { - while (true) { - - if (zzCurrentPosL < zzEndReadL) { - zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL); - zzCurrentPosL += Character.charCount(zzInput); - } - else if (zzAtEOF) { - zzInput = YYEOF; - break zzForAction; - } - else { - // store back cached positions - zzCurrentPos = zzCurrentPosL; - zzMarkedPos = zzMarkedPosL; - boolean eof = zzRefill(); - // get translated positions and possibly new buffer - zzCurrentPosL = zzCurrentPos; - zzMarkedPosL = zzMarkedPos; - zzBufferL = zzBuffer; - zzEndReadL = zzEndRead; - if (eof) { - zzInput = YYEOF; - break zzForAction; - } - else { - zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL); - zzCurrentPosL += Character.charCount(zzInput); - } - } - int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMap(zzInput) ]; - if (zzNext == -1) break zzForAction; - zzState = zzNext; - - zzAttributes = zzAttrL[zzState]; - if ( (zzAttributes & 1) == 1 ) { - zzAction = zzState; - zzMarkedPosL = zzCurrentPosL; - if ( (zzAttributes & 8) == 8 ) break zzForAction; - } - - } - } - - // store back cached position - zzMarkedPos = zzMarkedPosL; - - if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { - zzAtEOF = true; - { - return symbol(ChocoPyTokens.EOF); - } - } - else { - switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { - case 1: - { currString+=yytext(); - } - // fall through - case 68: break; - case 2: - { yypushback(1); - if(top() > currIndent) - { - pop(); - return symbol(ChocoPyTokens.DEDENT); - } - yystart(AFTER); - if(top()< currIndent) - { - push(currIndent); - return symbol(ChocoPyTokens.INDENT); - } - } - // fall through - case 69: break; - case 3: - { String space = yytext(); - if(space == "\t") - currIndent += 8; - else - currIndent ++; - } - // fall through - case 70: break; - case 4: - { currIndent = 0; - } - // fall through - case 71: break; - case 5: - { /* ignored */ - } - // fall through - case 72: break; - case 6: - { return symbol(ChocoPyTokens.UNRECOGNIZED); - } - // fall through - case 73: break; - case 7: - { /* ignore */ - } - // fall through - case 74: break; - case 8: - { return symbol(ChocoPyTokens.NEWLINE); yybegin(YYINITIAL); currIndent = 0; - } - // fall through - case 75: break; - case 9: - { yystart(STR); currString = ""; - } - // fall through - case 76: break; - case 10: - { return symbol(ChocoPyTokens.MOD); - } - // fall through - case 77: break; - case 11: - { return symbol(ChocoPyTokens.LPAR); - } - // fall through - case 78: break; - case 12: - { return symbol(ChocoPyTokens.RPAR); - } - // fall through - case 79: break; - case 13: - { return symbol(ChocoPyTokens.MUL); - } - // fall through - case 80: break; - case 14: - { return symbol(ChocoPyTokens.PLUS); - } - // fall through - case 81: break; - case 15: - { return symbol(ChocoPyTokens.COMMA); - } - // fall through - case 82: break; - case 16: - { return symbol(ChocoPyTokens.MINUS); - } - // fall through - case 83: break; - case 17: - { return symbol(ChocoPyTokens.DOT); - } - // fall through - case 84: break; - case 18: - { return symbol(ChocoPyTokens.DIV); - } - // fall through - case 85: break; - case 19: - { return symbol(ChocoPyTokens.NUMBER, - Integer.parseInt(yytext())); - } - // fall through - case 86: break; - case 20: - { return symbol(ChocoPyTokens.COLON); - } - // fall through - case 87: break; - case 21: - { return symbol(ChocoPyTokens.LT); - } - // fall through - case 88: break; - case 22: - { return symbol(ChocoPyTokens.ASSIGN); - } - // fall through - case 89: break; - case 23: - { return symbol(ChocoPyTokens.GT); - } - // fall through - case 90: break; - case 24: - { return symbol(ChocoPyTokens.NAMES, yytext()); - } - // fall through - case 91: break; - case 25: - { return symbol(ChocoPyTokens.LBR); - } - // fall through - case 92: break; - case 26: - { return symbol(ChocoPyTokens.RBR); - } - // fall through - case 93: break; - case 27: - { yybegin(AFTER); return symbol(ChocoPyTokens.STRING, currString); - } - // fall through - case 94: break; - case 28: - { return symbol(ChocoPyTokens.NEQ); - } - // fall through - case 95: break; - case 29: - { return symbol(ChocoPyTokens.ARROW); - } - // fall through - case 96: break; - case 30: - { return symbol(ChocoPyTokens.LEQ); - } - // fall through - case 97: break; - case 31: - { return symbol(ChocoPyTokens.EQUAL); - } - // fall through - case 98: break; - case 32: - { return symbol(ChocoPyTokens.GEQ); - } - // fall through - case 99: break; - case 33: - { return symbol(ChocoPyTokens.AS); - } - // fall through - case 100: break; - case 34: - { return symbol(ChocoPyTokens.IF); - } - // fall through - case 101: break; - case 35: - { return symbol(ChocoPyTokens.IN); - } - // fall through - case 102: break; - case 36: - { return symbol(ChocoPyTokens.IS); - } - // fall through - case 103: break; - case 37: - { return symbol(ChocoPyTokens.OR); - } - // fall through - case 104: break; - case 38: - // lookahead expression with fixed base length - zzMarkedPos = Character.offsetByCodePoints - (zzBufferL, zzStartRead, zzEndRead - zzStartRead, zzStartRead, 1); - { /*'\' at the end of line, do nothing.*/ - } - // fall through - case 105: break; - case 39: - { return symbol(ChocoPyTokens.AND); - } - // fall through - case 106: break; - case 40: - { return symbol(ChocoPyTokens.DEF); - } - // fall through - case 107: break; - case 41: - { return symbol(ChocoPyTokens.DEL); - } - // fall through - case 108: break; - case 42: - { return symbol(ChocoPyTokens.FOR); - } - // fall through - case 109: break; - case 43: - { return symbol(ChocoPyTokens.NOT); - } - // fall through - case 110: break; - case 44: - { return symbol(ChocoPyTokens.TRY); - } - // fall through - case 111: break; - case 45: - { return symbol(ChocoPyTokens.NONE); - } - // fall through - case 112: break; - case 46: - { return symbol(ChocoPyTokens.BOOL, true); - } - // fall through - case 113: break; - case 47: - { return symbol(ChocoPyTokens.ELIF); - } - // fall through - case 114: break; - case 48: - { return symbol(ChocoPyTokens.ELSE); - } - // fall through - case 115: break; - case 49: - { return symbol(ChocoPyTokens.FROM); - } - // fall through - case 116: break; - case 50: - { return symbol(ChocoPyTokens.PASS); - } - // fall through - case 117: break; - case 51: - { return symbol(ChocoPyTokens.WITH); - } - // fall through - case 118: break; - case 52: - { return symbol(ChocoPyTokens.BOOL, false); - } - // fall through - case 119: break; - case 53: - { return symbol(ChocoPyTokens.AWAIT); - } - // fall through - case 120: break; - case 54: - { return symbol(ChocoPyTokens.BREAK); - } - // fall through - case 121: break; - case 55: - { return symbol(ChocoPyTokens.CLASS); - } - // fall through - case 122: break; - case 56: - { return symbol(ChocoPyTokens.RAISE); - } - // fall through - case 123: break; - case 57: - { return symbol(ChocoPyTokens.WHILE); - } - // fall through - case 124: break; - case 58: - { return symbol(ChocoPyTokens.YIELD); - } - // fall through - case 125: break; - case 59: - { return symbol(ChocoPyTokens.ASSERT); - } - // fall through - case 126: break; - case 60: - { return symbol(ChocoPyTokens.EXCEPT); - } - // fall through - case 127: break; - case 61: - { return symbol(ChocoPyTokens.GLOBAL); - } - // fall through - case 128: break; - case 62: - { return symbol(ChocoPyTokens.IMPORT); - } - // fall through - case 129: break; - case 63: - { return symbol(ChocoPyTokens.LAMBDA); - } - // fall through - case 130: break; - case 64: - { return symbol(ChocoPyTokens.RETURN); - } - // fall through - case 131: break; - case 65: - { return symbol(ChocoPyTokens.FINALLY); - } - // fall through - case 132: break; - case 66: - { return symbol(ChocoPyTokens.CONTINUE); - } - // fall through - case 133: break; - case 67: - { return symbol(ChocoPyTokens.NONLOCAL); - } - // fall through - case 134: break; - default: - zzScanError(ZZ_NO_MATCH); - } - } - } - } - - /** - * Converts an int token code into the name of the - * token by reflection on the cup symbol class/interface ChocoPyTokens - */ - private static String getTokenName(int token) { - try { - java.lang.reflect.Field [] classFields = ChocoPyTokens.class.getFields(); - for (int i = 0; i < classFields.length; i++) { - if (classFields[i].getInt(null) == token) { - return classFields[i].getName(); - } - } - } catch (Exception e) { - e.printStackTrace(System.err); - } - - return "UNKNOWN TOKEN"; - } - - /** - * Same as next_token but also prints the token to standard out - * for debugging. - */ - public java_cup.runtime.Symbol debug_next_token() throws java.io.IOException { - java_cup.runtime.Symbol s = next_token(); - System.out.println( "line:" + (yyline+1) + " col:" + (yycolumn+1) + " --"+ yytext() + "--" + getTokenName(s.sym) + "--"); - return s; - } - - /** - * Runs the scanner on input files. - * - * This main method is the debugging routine for the scanner. - * It prints debugging information about each returned token to - * System.out until the end of file is reached, or an error occured. - * - * @param argv the command line, contains the filenames to run - * the scanner on. - */ - public static void main(String[] argv) { - if (argv.length == 0) { - System.out.println("Usage : java ChocoPyLexer [ --encoding ] "); - } - else { - int firstFilePos = 0; - String encodingName = "UTF-8"; - if (argv[0].equals("--encoding")) { - firstFilePos = 2; - encodingName = argv[1]; - try { - // Side-effect: is encodingName valid? - java.nio.charset.Charset.forName(encodingName); - } catch (Exception e) { - System.out.println("Invalid encoding '" + encodingName + "'"); - return; - } - } - for (int i = firstFilePos; i < argv.length; i++) { - ChocoPyLexer scanner = null; - try { - java.io.FileInputStream stream = new java.io.FileInputStream(argv[i]); - java.io.Reader reader = new java.io.InputStreamReader(stream, encodingName); - scanner = new ChocoPyLexer(reader); - while ( !scanner.zzAtEOF ) scanner.debug_next_token(); - } - catch (java.io.FileNotFoundException e) { - System.out.println("File not found : \""+argv[i]+"\""); - } - catch (java.io.IOException e) { - System.out.println("IO error scanning file \""+argv[i]+"\""); - System.out.println(e); - } - catch (Exception e) { - System.out.println("Unexpected exception:"); - e.printStackTrace(); - } - } - } - } - - -} From e844793c0292c3713b021b741b34d46e2f8968a4 Mon Sep 17 00:00:00 2001 From: Sanjar Ahmadov Date: Wed, 17 Feb 2021 13:34:39 -0500 Subject: [PATCH 04/26] Added terminals to parser --- src/main/cup/chocopy/pa1/ChocoPy.cup | 73 +++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 2 deletions(-) diff --git a/src/main/cup/chocopy/pa1/ChocoPy.cup b/src/main/cup/chocopy/pa1/ChocoPy.cup index d4ff444..c1b8605 100644 --- a/src/main/cup/chocopy/pa1/ChocoPy.cup +++ b/src/main/cup/chocopy/pa1/ChocoPy.cup @@ -142,9 +142,78 @@ action code {: * semantic value of type for these symbols that may be referenced * in actions ( {: ... :} ). */ +terminal INDENT; +terminal DEDENT; +terminal String ID; +terminal String STRING; + + + +/* Terminal Delimiters */ terminal NEWLINE; -terminal String PLUS; -terminal Integer NUMBER; +terminal String COLON; +terminal String COMMA; + +/* Terminal Literals */ +terminal Integer NUMBER; +terminal Boolean BOOL; +terminal String NONE; + +/* Terminal Keywords */ +terminal String IF; +terminal String ELSE; +terminal String ELIF; +terminal String WHILE; +terminal String CLASS; +terminal String DEF; +terminal String LAMBDA; +terminal String AS; +terminal String FOR; +terminal String GLOBAL; +terminal String IN; +terminal String NONLOCAL; +terminal String PASS; +terminal String RETURN; +terminal String ASSERT; +terminal String AWAIT; +terminal String BREAK; +terminal String CONTINUE; +terminal String DEL; +terminal String EXCEPT; +terminal String FINALLY; +terminal String FROM; +terminal String IMPORT; +terminal String RAISE; +terminal String TRY; +terminal String WITH; +terminal String YIELD; + + +/* Terminal Operators */ +terminal String PLUS; +terminal String MINUS; +terminal String MUL; +terminal String DIV; +terminal String MOD; +terminal String GT; +terminal String LT; +terminal String EQUAL; +terminal String NEQ; +terminal String GEQ; +terminal String LEQ; +terminal String ASSIGN; +terminal String AND; +terminal String OR; +terminal String NOT; +terminal String DOT; +terminal String LPAR; +terminal String RPAR; +terminal String LBR; +terminal String RBR; +terminal String ARROW; +terminal String IS; + + /* Returned by the lexer for erroneous tokens. Since it does not appear in * the grammar, it indicates a syntax error. */ terminal UNRECOGNIZED; From 4cd3062255ea9e6376a554cdcf4d152112177b96 Mon Sep 17 00:00:00 2001 From: Sanjar Ahmadov Date: Wed, 17 Feb 2021 14:44:11 -0500 Subject: [PATCH 05/26] Added precedences --- src/main/cup/chocopy/pa1/ChocoPy.cup | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/main/cup/chocopy/pa1/ChocoPy.cup b/src/main/cup/chocopy/pa1/ChocoPy.cup index c1b8605..f6abdda 100644 --- a/src/main/cup/chocopy/pa1/ChocoPy.cup +++ b/src/main/cup/chocopy/pa1/ChocoPy.cup @@ -213,7 +213,6 @@ terminal String RBR; terminal String ARROW; terminal String IS; - /* Returned by the lexer for erroneous tokens. Since it does not appear in * the grammar, it indicates a syntax error. */ terminal UNRECOGNIZED; @@ -231,7 +230,13 @@ non terminal Expr expr, binary_expr; /* Precedences (lowest to highest) for resolving what would otherwise be * ambiguities in the form of shift/reduce conflicts.. */ -precedence left PLUS; +precedence left OR; +precedence left AND; +precedence left NOT; +precedence nonassoc EQUAL, NEQ, LT, GT, LEQ, GEQ, IS; +precedence left PLUS, MINUS; +precedence left MUL, DIV, MOD; +precedence left DOT, COMMA, LBR, RBR; /* The start symbol. */ start with program; From 2ab3fddc66a086be21df37951b1c72a1c0964b5f Mon Sep 17 00:00:00 2001 From: bill Date: Thu, 18 Feb 2021 20:38:43 +0800 Subject: [PATCH 06/26] Bug fixed: unreachable code; symbol(value) not implemented. --- src/main/jflex/chocopy/pa1/ChocoPy.jflex | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/jflex/chocopy/pa1/ChocoPy.jflex b/src/main/jflex/chocopy/pa1/ChocoPy.jflex index ccb7187..9cc41de 100644 --- a/src/main/jflex/chocopy/pa1/ChocoPy.jflex +++ b/src/main/jflex/chocopy/pa1/ChocoPy.jflex @@ -39,7 +39,7 @@ import java.util.ArrayList; /** Return a terminal symbol of syntactic category TYPE and no * semantic value at the current source location. */ private Symbol symbol(int type) { - return symbol(type); + return symbol(type, null); } /** Return a terminal symbol of syntactic category TYPE and semantic @@ -142,8 +142,9 @@ if True: if(top()< currIndent) { /* - If current indentation is more than indents current level should have, - start a new level which will have `currIndent' spaces. + If current indentation is more than the number of indents + current level should have, start a new level which will have + `currIndent' indents. */ push(currIndent); return symbol(ChocoPyTokens.INDENT, currIndent); @@ -153,7 +154,7 @@ if True: { /* Delimiters. */ - {LineBreak} { return symbol(ChocoPyTokens.NEWLINE); yybegin(YYINITIAL); currIndent = 0;} + {LineBreak} { yybegin(YYINITIAL); currIndent = 0;return symbol(ChocoPyTokens.NEWLINE);} ":" { return symbol(ChocoPyTokens.COLON); } "," { return symbol(ChocoPyTokens.COMMA); } From d247243be145771d4d5df624a3ec67a00acf151b Mon Sep 17 00:00:00 2001 From: Sanjar Ahmadov Date: Thu, 18 Feb 2021 15:26:14 -0500 Subject: [PATCH 07/26] Added first 10 (till var_def) rules of grammer from lauguage reference --- src/main/cup/chocopy/pa1/ChocoPy.cup | 118 ++++++++++++++++++++++++++- 1 file changed, 114 insertions(+), 4 deletions(-) diff --git a/src/main/cup/chocopy/pa1/ChocoPy.cup b/src/main/cup/chocopy/pa1/ChocoPy.cup index f6abdda..fb1f495 100644 --- a/src/main/cup/chocopy/pa1/ChocoPy.cup +++ b/src/main/cup/chocopy/pa1/ChocoPy.cup @@ -130,6 +130,18 @@ action code {: return new ComplexSymbolFactory.Location(first.getLocation()[0], first.getLocation()[1]); } + + /** Return the rightmost non-whitespace location in NODES, or null if NODES + * is empty. Assumes that the nodes of NODES are ordered in increasing + * order of location, from left to right. */ + ComplexSymbolFactory.Location getRight(List nodes) { + if (nodes.isEmpty()) { + return null; + } + Node last = nodes.get(nodes.size()-1); + return new ComplexSymbolFactory.Location(last.getLocation()[2], + last.getLocation()[3]); + } :} @@ -213,6 +225,7 @@ terminal String RBR; terminal String ARROW; terminal String IS; + /* Returned by the lexer for erroneous tokens. Since it does not appear in * the grammar, it indicates a syntax error. */ terminal UNRECOGNIZED; @@ -223,10 +236,22 @@ terminal UNRECOGNIZED; * defines the listed nonterminal identifier symbols to have semantic values * of type . */ non terminal Program program; -non terminal List program_head; +non terminal List program_head, class_body, class_body_defs, fun_body_decs; non terminal List stmt_list, opt_stmt_list; non terminal Stmt stmt, expr_stmt; non terminal Expr expr, binary_expr; +non terminal VarDef var_def; +non terminal ClassDef class_def; +non terminal FuncDef fun_def; +non terminal Literal literal; +non terminal TypedVar typed_var; +non terminal TypeAnnotation type, ret_type; +non terminal Identifier identifier; +non terminal List typed_vars; +non terminal GlobalDecl global_decl; +non terminal NonLocalDecl nonlocal_decl; + + /* Precedences (lowest to highest) for resolving what would otherwise be * ambiguities in the form of shift/reduce conflicts.. */ @@ -244,16 +269,97 @@ start with program; /***** GRAMMAR RULES *****/ +/* Rules are defined in the order given by the language reference */ + +/* program */ program ::= program_head:d opt_stmt_list:s {: RESULT = new Program(d.isEmpty() ? getLeft(s) : getLeft(d), sxright, d, s, errors); :} ; -/* Initial list of declarations. */ -program_head ::= /* not implemented; currently matches empty string */ - {: RESULT = empty(); :} +program_head ::= program_head:d var_def:vd {: RESULT = combine(d, vd); :} + | program_head:d class_def:cd {: RESULT = combine(d, cd); :} + | program_head:d fun_def:fd {: RESULT = combine(d, fd); :} + | program_head:d error:e {: RESULT = d; :} + | {: RESULT = empty(); :} + ; + + +/* class_def */ +class_def ::= CLASS:c identifier:id LPAR identifier:parentId RPAR COLON NEWLINE INDENT class_body:cb DEDENT {: RESULT = new ClassDef(cxleft, getRight(cb), id, parentId, cb); :}; + + +/* class_body */ +class_body ::= PASS NEWLINE {: RESULT = empty(); :} + | class_body_defs:defs {: RESULT = defs; :} + ; + +class_body_defs ::= class_body_defs:defs var_def:vd {: RESULT = combine(defs, vd); :} + | class_body_defs:defs fun_def:fd {: RESULT = combine(defs, fd); :} + | class_body_defs:defs error {: RESULT = defs; :} + | var_def:vd {: RESULT = single(vd); :} + | fun_def:fd {: RESULT = single(fd); :} + ; + + +/* fun_def */ +fun_def ::= DEF:def identifier:id LPAR typed_vars:params RPAR ret_type:rt COLON NEWLINE INDENT fun_body_decs:fbd stmt_list:sl DEDENT + {: RESULT = new FuncDef(defxleft, getRight(sl), id, params, rt, fbd, sl); :} + ; + +ret_type ::= ARROW type:t {: RESULT= t; :} + | {: RESULT= null; :} + ; + +typed_vars ::= typed_var:tv {: RESULT= single(tv); :} + | typed_vars:tvs COMMA typed_var:tv {: RESULT= combine(tvs, tv); :} + | typed_vars:tvs COMMA error {: RESULT= tvs; :} + | {: RESULT= empty(); :} + ; + + + +/* fun_body */ +fun_body_decs ::= fun_body_decs:fbd global_decl:gd {: RESULT= combine(fbd, gd); :} + | fun_body_decs:fbd nonlocal_decl:nd {: RESULT= combine(fbd, nd); :} + | fun_body_decs:fbd var_def:vd {: RESULT= combine(fbd, vd); :} + | fun_body_decs:fbd fun_def:fd {: RESULT= combine(fbd, fd); :} + | fun_body_decs:fbd error {: RESULT= fbd; :} + | {: RESULT= empty(); :} ; + + +/* typed_var */ +typed_var ::= identifier:id COLON type:t {: RESULT = new TypedVar(idxleft, txright, id, t); :}; + + +/* type */ +type ::= identifier:id {: RESULT = new ClassType(idxleft, idxright, id.name); :} + | STRING:str {: RESULT = new ClassType(strxleft, strxright, str); :} + | LBR:lbr type:t RBR:rbr {: RESULT = new ListType(lbrxleft, rbrxright, t); :} + ; + + +/* global_decl */ +global_decl ::= GLOBAL:g identifier:id NEWLINE {: RESULT = new GlobalDecl(gxleft, idxright, id); :}; + + +/* nonlocal_decl */ +nonlocal_decl ::= NONLOCAL:n identifier:id NEWLINE {: RESULT = new NonLocalDecl(nxleft, idxright, id); :}; + + +/* var_def */ +var_def ::= typed_var:t ASSIGN literal:l NEWLINE {: RESULT = new VarDef(txleft, lxright, t, l); :}; + + +/* literal */ +literal ::= NONE:n {: RESULT = new NoneLiteral(nxleft, nxright); :} + | BOOL:b {: RESULT = new BooleanLiteral(bxleft, bxright, b); :} + | NUMBER:n {: RESULT = new IntegerLiteral(nxleft, nxright, n); :} + | STRING:s {: RESULT = new StringLiteral(sxleft, sxright, s); :} + ; + opt_stmt_list ::= {: RESULT = empty(); :} | stmt_list:s {: RESULT = s; :} @@ -286,3 +392,7 @@ binary_expr ::= expr:e1 PLUS:op expr:e2 {: RESULT = new BinaryExpr(e1xleft, e2xright, e1, op, e2); :} ; + + +/* Extras - rules below have not been given in language reference, we have them to ease implementation */ +identifier ::= ID:idStr {: RESULT = new Identifier(idStrxleft, idStrxright, idStr); :}; From 27b46f386d1bee3457d4c93fca9f208e399d7939 Mon Sep 17 00:00:00 2001 From: bill Date: Sat, 20 Feb 2021 04:04:06 +0800 Subject: [PATCH 08/26] Lexer will pass the original tokens even if it can be inferred to match the type of terminal defined in .cup file. --- src/main/jflex/chocopy/pa1/ChocoPy.jflex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/jflex/chocopy/pa1/ChocoPy.jflex b/src/main/jflex/chocopy/pa1/ChocoPy.jflex index 9cc41de..8ecbb02 100644 --- a/src/main/jflex/chocopy/pa1/ChocoPy.jflex +++ b/src/main/jflex/chocopy/pa1/ChocoPy.jflex @@ -39,7 +39,7 @@ import java.util.ArrayList; /** Return a terminal symbol of syntactic category TYPE and no * semantic value at the current source location. */ private Symbol symbol(int type) { - return symbol(type, null); + return symbol(type, yytext()); } /** Return a terminal symbol of syntactic category TYPE and semantic From cf3bd4d7457356b3f53056652943fa9beebfbfe8 Mon Sep 17 00:00:00 2001 From: Sanjar Ahmadov Date: Fri, 19 Feb 2021 18:44:22 -0500 Subject: [PATCH 09/26] Finished all grammar - passes 24 test cases --- src/main/cup/chocopy/pa1/ChocoPy.cup | 150 ++++++++++++++++++++++----- 1 file changed, 122 insertions(+), 28 deletions(-) diff --git a/src/main/cup/chocopy/pa1/ChocoPy.cup b/src/main/cup/chocopy/pa1/ChocoPy.cup index fb1f495..40f819a 100644 --- a/src/main/cup/chocopy/pa1/ChocoPy.cup +++ b/src/main/cup/chocopy/pa1/ChocoPy.cup @@ -237,19 +237,25 @@ terminal UNRECOGNIZED; * of type . */ non terminal Program program; non terminal List program_head, class_body, class_body_defs, fun_body_decs; -non terminal List stmt_list, opt_stmt_list; -non terminal Stmt stmt, expr_stmt; -non terminal Expr expr, binary_expr; +non terminal List stmt_list, opt_stmt_list, block, else_body; +non terminal Stmt stmt, simple_stmt; +non terminal Expr expr, binary_expr, pexpr, cexpr, cmp_pexpr; non terminal VarDef var_def; non terminal ClassDef class_def; non terminal FuncDef fun_def; non terminal Literal literal; +non terminal StringLiteral bin_op, comp_op; non terminal TypedVar typed_var; non terminal TypeAnnotation type, ret_type; non terminal Identifier identifier; non terminal List typed_vars; non terminal GlobalDecl global_decl; non terminal NonLocalDecl nonlocal_decl; +non terminal List opt_target, expr_list; +non terminal Expr target; +non terminal MemberExpr member_expr; +non terminal IndexExpr index_expr; + @@ -262,6 +268,7 @@ precedence nonassoc EQUAL, NEQ, LT, GT, LEQ, GEQ, IS; precedence left PLUS, MINUS; precedence left MUL, DIV, MOD; precedence left DOT, COMMA, LBR, RBR; +precedence left IF, ELSE; /* The start symbol. */ start with program; @@ -284,6 +291,10 @@ program_head ::= program_head:d var_def:vd {: RESULT = combine(d, v | program_head:d error:e {: RESULT = d; :} | {: RESULT = empty(); :} ; + +opt_stmt_list ::= {: RESULT = empty(); :} + | stmt_list:s {: RESULT = s; :} + ; /* class_def */ @@ -318,8 +329,7 @@ typed_vars ::= typed_var:tv {: RESULT= single(tv | {: RESULT= empty(); :} ; - - + /* fun_body */ fun_body_decs ::= fun_body_decs:fbd global_decl:gd {: RESULT= combine(fbd, gd); :} | fun_body_decs:fbd nonlocal_decl:nd {: RESULT= combine(fbd, nd); :} @@ -353,46 +363,130 @@ nonlocal_decl ::= NONLOCAL:n identifier:id NEWLINE {: RESULT = new NonLoc var_def ::= typed_var:t ASSIGN literal:l NEWLINE {: RESULT = new VarDef(txleft, lxright, t, l); :}; +/* stmt */ +stmt ::= simple_stmt:s NEWLINE {: RESULT = s; :} + | IF:i expr:cond COLON block:b else_body:elb {: RESULT = new IfStmt(ixleft, getRight(elb), cond, b, elb); :} + | WHILE:wh expr:cond COLON block:b {: RESULT = new WhileStmt(whxleft, getRight(b), cond, b); :} + | FOR:f identifier:id IN expr:e COLON block:b {: RESULT = new ForStmt(fxleft, getRight(b), id, e, b); :} + ; + + +else_body ::= ELSE:el COLON block:b {: RESULT = b; :} + | ELIF:el expr:cond COLON block:b else_body:elb {: RESULT = single(new IfStmt(elxleft, getRight(elb), cond, b, elb)); :} + | {: RESULT = empty(); :} + ; + +/* simple_stmt */ +simple_stmt ::= PASS:p {: RESULT = null; :} + | expr:e {: RESULT = new ExprStmt(exleft, exright, e); :} + | RETURN:r expr:e {: RESULT = new ReturnStmt(rxleft, exright, e); :} + | RETURN {: RESULT = null; :} + | opt_target:ot expr:e {: RESULT = new AssignStmt(getLeft(ot), exright, ot, e); :} + ; + + +opt_target ::= opt_target:ot target:t ASSIGN {: RESULT = combine(ot, t); :} + | target:t ASSIGN {: RESULT = single(t); :} + ; + + +/* block */ +block ::= NEWLINE INDENT stmt_list:sl DEDENT {: RESULT = sl; :}; + + /* literal */ literal ::= NONE:n {: RESULT = new NoneLiteral(nxleft, nxright); :} | BOOL:b {: RESULT = new BooleanLiteral(bxleft, bxright, b); :} | NUMBER:n {: RESULT = new IntegerLiteral(nxleft, nxright, n); :} | STRING:s {: RESULT = new StringLiteral(sxleft, sxright, s); :} ; + + +/* expr */ +expr ::= pexpr:ce {: RESULT = ce; :} + | NOT:n expr:exp {: RESULT = new UnaryExpr(nxleft, expxright, n, exp); :} + | expr:e1 AND:a expr:e2 {: RESULT = new BinaryExpr(e1xleft, e2xright, e1, a, e2); :} + | expr:e1 OR:o expr:e2 {: RESULT = new BinaryExpr(e1xleft, e2xright, e1, o, e2); :} + | expr:e1 IF expr:e2 ELSE expr:e3 {: RESULT = new IfExpr(e1xleft, e3xright, e2, e1, e3); :} + ; -opt_stmt_list ::= {: RESULT = empty(); :} - | stmt_list:s {: RESULT = s; :} - ; +/* cexpr */ +/* +cexpr ::= pexpr:pe {: RESULT = new Expr(pexleft, pexright); :} + | cmp_pexpr:cp {: RESULT = new Expr(cpxleft, cpxright); :} + ; -stmt_list ::= stmt:s {: RESULT = single(s); :} - | stmt_list:l stmt:s {: RESULT = combine(l, s); :} - | stmt_list:l error {: RESULT = l; :} - /* If there is a syntax error in the source, this says to discard - * symbols from the parsing stack and perform reductions until - * there is a stmt_list on top of the stack, and then to discard - * input symbols until it is possible to shift again, reporting - * a syntax error. */ + +cmp_pexpr ::= pexpr:p comp_op:co cmp_pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, co, p2); :} + | pexpr:p {: RESULT = new Expr(pxleft, pxright); :} + ; +*/ + +/* pexpr */ +pexpr ::= identifier:id {: RESULT = id; :} + | literal:l {: RESULT = l; :} + | LBR:lbr expr_list:l RBR:rbr {: RESULT = new ListExpr(lbrxleft, rbrxright, l); :} + | LPAR:lpar expr:e RPAR:rpar {: RESULT = e; :} + | member_expr:m {: RESULT = m; :} + | index_expr:i {: RESULT = i; :} + | member_expr:m LPAR expr_list:l RPAR:rpar {: RESULT = new MethodCallExpr(mxleft, rparxright, m, l); :} + | identifier:id LPAR expr_list:l RPAR:rpar {: RESULT = new CallExpr(idxleft, rparxright, id, l); :} + | pexpr:p1 bin_op:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo.value, p2); :} + | MINUS:m pexpr:p {: RESULT = new UnaryExpr(mxleft, pxright, m, p); :} + | pexpr:p1 comp_op:co pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, co.value, p2); :} + ; + +expr_list ::= expr:e {: RESULT = single(e); :} + | expr_list:el COMMA expr:e {: RESULT = combine(el, e); :} + | {: RESULT = null; :} ; -stmt ::= expr_stmt:s NEWLINE {: RESULT = s; :} - ; -expr_stmt ::= expr:e {: RESULT = new ExprStmt(exleft, exright, e); :} - ; +/* bin_op */ +bin_op ::= PLUS:a {: RESULT = new StringLiteral(axleft, axright, "+"); :} + | MINUS:a {: RESULT = new StringLiteral(axleft, axright, "-"); :} + | MUL:a {: RESULT = new StringLiteral(axleft, axright, "*"); :} + | DIV:a {: RESULT = new StringLiteral(axleft, axright, "/"); :} + | MOD:a {: RESULT = new StringLiteral(axleft, axright, "%"); :} + ; -expr ::= binary_expr:e {: RESULT = e; :} - | NUMBER:n {: RESULT = new IntegerLiteral(nxleft, nxright, n); :} - ; +/* comp_op */ +comp_op ::= EQUAL:a {: RESULT = new StringLiteral(axleft, axright, "=="); :} + | NEQ:a {: RESULT = new StringLiteral(axleft, axright, "!="); :} + | LEQ:a {: RESULT = new StringLiteral(axleft, axright, "<="); :} + | GEQ:a {: RESULT = new StringLiteral(axleft, axright, ">="); :} + | LT:a {: RESULT = new StringLiteral(axleft, axright, "<"); :} + | GT:a {: RESULT = new StringLiteral(axleft, axright, ">"); :} + | IS:a {: RESULT = new StringLiteral(axleft, axright, "is"); :} + ; + +/* member_expr */ +member_expr ::= pexpr:p DOT identifier:id {: RESULT = new MemberExpr(pxleft, idxright, p, id); :} + ; -/* A binary expression, illustrating how to find the left and right - * source position of a phrase. */ -binary_expr ::= expr:e1 PLUS:op expr:e2 - {: RESULT = new BinaryExpr(e1xleft, e2xright, - e1, op, e2); :} +/* index_expr */ +index_expr ::= pexpr:p LBR expr:e RBR:rbr {: RESULT = new IndexExpr(pxleft, rbrxright, p, e); :} ; + +/* target */ +target ::= identifier:id {: RESULT = id; :} + | member_expr:m {: RESULT = m; :} + | index_expr:i {: RESULT = i; :} + ; /* Extras - rules below have not been given in language reference, we have them to ease implementation */ identifier ::= ID:idStr {: RESULT = new Identifier(idStrxleft, idStrxright, idStr); :}; + + +stmt_list ::= stmt:s {: RESULT = single(s); :} + | stmt_list:l stmt:s {: RESULT = combine(l, s); :} + | stmt_list:l error {: RESULT = l; :} + /* If there is a syntax error in the source, this says to discard + * symbols from the parsing stack and perform reductions until + * there is a stmt_list on top of the stack, and then to discard + * input symbols until it is possible to shift again, reporting + * a syntax error. */ + ; \ No newline at end of file From 7b2e1dab3f8dcd16642ad514011b43275a5baf03 Mon Sep 17 00:00:00 2001 From: Sanjar Ahmadov Date: Fri, 19 Feb 2021 18:45:13 -0500 Subject: [PATCH 10/26] Added test scripts to ease testing --- build.sh | 3 +++ full_test.sh | 3 +++ test.sh | 12 ++++++++++++ 3 files changed, 18 insertions(+) create mode 100755 build.sh create mode 100755 full_test.sh create mode 100755 test.sh diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..f4e6621 --- /dev/null +++ b/build.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +mvn clean package -e diff --git a/full_test.sh b/full_test.sh new file mode 100755 index 0000000..19504c9 --- /dev/null +++ b/full_test.sh @@ -0,0 +1,3 @@ +#!/bin/bash +./build.sh +./test.sh diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..7060f44 --- /dev/null +++ b/test.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +FILENAME=$1 + +if [ -z "$1" ] ; then + echo "Running all test cases. Usage for individual test cases: test.sh FILENAME (inside src/test/data/pa1/sample/ folder)" + java -cp "chocopy-ref.jar:target/assignment.jar" chocopy.ChocoPy --pass=s --test --dir src/test/data/pa1/sample/ + exit 1 +fi + +java -cp "chocopy-ref.jar:target/assignment.jar" chocopy.ChocoPy \ + --pass=s --test src/test/data/pa1/sample/${FILENAME} From 1c02aa84d85b4382e8d97332bf83fb98e1b65d0f Mon Sep 17 00:00:00 2001 From: Sanjar Ahmadov Date: Fri, 19 Feb 2021 19:03:02 -0500 Subject: [PATCH 11/26] Added cexpr --- src/main/cup/chocopy/pa1/ChocoPy.cup | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/main/cup/chocopy/pa1/ChocoPy.cup b/src/main/cup/chocopy/pa1/ChocoPy.cup index 40f819a..e4004ff 100644 --- a/src/main/cup/chocopy/pa1/ChocoPy.cup +++ b/src/main/cup/chocopy/pa1/ChocoPy.cup @@ -403,7 +403,7 @@ literal ::= NONE:n {: RESULT = new NoneLi /* expr */ -expr ::= pexpr:ce {: RESULT = ce; :} +expr ::= cexpr:ce {: RESULT = ce; :} | NOT:n expr:exp {: RESULT = new UnaryExpr(nxleft, expxright, n, exp); :} | expr:e1 AND:a expr:e2 {: RESULT = new BinaryExpr(e1xleft, e2xright, e1, a, e2); :} | expr:e1 OR:o expr:e2 {: RESULT = new BinaryExpr(e1xleft, e2xright, e1, o, e2); :} @@ -412,29 +412,23 @@ expr ::= pexpr:ce {: RESULT = ce; :} /* cexpr */ -/* -cexpr ::= pexpr:pe {: RESULT = new Expr(pexleft, pexright); :} - | cmp_pexpr:cp {: RESULT = new Expr(cpxleft, cpxright); :} +cexpr ::= pexpr:pe {: RESULT = pe; :} + | pexpr:p1 comp_op:co cexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, co.value, p2); :} ; -cmp_pexpr ::= pexpr:p comp_op:co cmp_pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, co, p2); :} - | pexpr:p {: RESULT = new Expr(pxleft, pxright); :} - ; -*/ /* pexpr */ pexpr ::= identifier:id {: RESULT = id; :} | literal:l {: RESULT = l; :} | LBR:lbr expr_list:l RBR:rbr {: RESULT = new ListExpr(lbrxleft, rbrxright, l); :} - | LPAR:lpar expr:e RPAR:rpar {: RESULT = e; :} + | LPAR:lpar expr:e RPAR:rpar {: RESULT = e; :} | member_expr:m {: RESULT = m; :} | index_expr:i {: RESULT = i; :} | member_expr:m LPAR expr_list:l RPAR:rpar {: RESULT = new MethodCallExpr(mxleft, rparxright, m, l); :} | identifier:id LPAR expr_list:l RPAR:rpar {: RESULT = new CallExpr(idxleft, rparxright, id, l); :} | pexpr:p1 bin_op:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo.value, p2); :} | MINUS:m pexpr:p {: RESULT = new UnaryExpr(mxleft, pxright, m, p); :} - | pexpr:p1 comp_op:co pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, co.value, p2); :} ; expr_list ::= expr:e {: RESULT = single(e); :} From 0672838265f037bbe321007d84c961370e2804fc Mon Sep 17 00:00:00 2001 From: Sanjar Ahmadov Date: Fri, 19 Feb 2021 19:06:13 -0500 Subject: [PATCH 12/26] Cleanup --- src/main/cup/chocopy/pa1/ChocoPy.cup | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/main/cup/chocopy/pa1/ChocoPy.cup b/src/main/cup/chocopy/pa1/ChocoPy.cup index e4004ff..13e324c 100644 --- a/src/main/cup/chocopy/pa1/ChocoPy.cup +++ b/src/main/cup/chocopy/pa1/ChocoPy.cup @@ -239,7 +239,7 @@ non terminal Program program; non terminal List program_head, class_body, class_body_defs, fun_body_decs; non terminal List stmt_list, opt_stmt_list, block, else_body; non terminal Stmt stmt, simple_stmt; -non terminal Expr expr, binary_expr, pexpr, cexpr, cmp_pexpr; +non terminal Expr expr, pexpr, cexpr; non terminal VarDef var_def; non terminal ClassDef class_def; non terminal FuncDef fun_def; @@ -376,6 +376,7 @@ else_body ::= ELSE:el COLON block:b {: RES | {: RESULT = empty(); :} ; + /* simple_stmt */ simple_stmt ::= PASS:p {: RESULT = null; :} | expr:e {: RESULT = new ExprStmt(exleft, exright, e); :} @@ -417,7 +418,6 @@ cexpr ::= pexpr:pe {: RESULT = pe; :} ; - /* pexpr */ pexpr ::= identifier:id {: RESULT = id; :} | literal:l {: RESULT = l; :} @@ -456,14 +456,17 @@ comp_op ::= EQUAL:a {: RESULT = new Stri | IS:a {: RESULT = new StringLiteral(axleft, axright, "is"); :} ; + /* member_expr */ member_expr ::= pexpr:p DOT identifier:id {: RESULT = new MemberExpr(pxleft, idxright, p, id); :} ; + /* index_expr */ index_expr ::= pexpr:p LBR expr:e RBR:rbr {: RESULT = new IndexExpr(pxleft, rbrxright, p, e); :} ; + /* target */ target ::= identifier:id {: RESULT = id; :} | member_expr:m {: RESULT = m; :} From c5c9bb2f0d177dc9a983d7fa5bd3e2529117ee54 Mon Sep 17 00:00:00 2001 From: bill Date: Sat, 20 Feb 2021 19:46:30 +0800 Subject: [PATCH 13/26] Changes made on Flex to adapt for merging. --- src/main/jflex/chocopy/pa1/ChocoPy.jflex | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/main/jflex/chocopy/pa1/ChocoPy.jflex b/src/main/jflex/chocopy/pa1/ChocoPy.jflex index 8ecbb02..8699d5c 100644 --- a/src/main/jflex/chocopy/pa1/ChocoPy.jflex +++ b/src/main/jflex/chocopy/pa1/ChocoPy.jflex @@ -34,6 +34,7 @@ import java.util.ArrayList; final ComplexSymbolFactory symbolFactory = new ComplexSymbolFactory(); private int currIndent = 0; //Current Indentation Level private String currString = ""; + private int str_l = 0, str_c = 0; //Start location of a string. /*A stack that keeps track of the spaces in each Indentation Level*/ private ArrayList stack = new ArrayList(20); /** Return a terminal symbol of syntactic category TYPE and no @@ -45,6 +46,7 @@ import java.util.ArrayList; /** Return a terminal symbol of syntactic category TYPE and semantic * value VALUE at the current source location. */ private Symbol symbol(int type, Object value) { + //System.out.println(yytext() + ChocoPyTokens.terminalNames[type]); return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[type], type, new ComplexSymbolFactory.Location(yyline + 1, yycolumn + 1), new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()), @@ -134,6 +136,7 @@ if True: AFTER state. */ pop(); + //System.out.println("dedent"); return symbol(ChocoPyTokens.DEDENT, currIndent); } /*Otherwise, we will start dealing with the rest @@ -146,6 +149,8 @@ if True: current level should have, start a new level which will have `currIndent' indents. */ + //System.out.println("indent"); + push(currIndent); return symbol(ChocoPyTokens.INDENT, currIndent); } @@ -161,7 +166,7 @@ if True: /* Literals. */ {IntegerLiteral} { return symbol(ChocoPyTokens.NUMBER, Integer.parseInt(yytext())); } - "\"" {yybegin(STR); currString = "";} //Start taking a string when see a " + "\"" {yybegin(STR); str_l = yyline + 1; str_c = yycolumn + 1; currString = "";} //Start taking a string when see a " "False" { return symbol(ChocoPyTokens.BOOL, false); } "True" { return symbol(ChocoPyTokens.BOOL, true); } "None" { return symbol(ChocoPyTokens.NONE); } @@ -223,7 +228,7 @@ if True: /*Identifiers*/ - {Identifiers} {return symbol(ChocoPyTokens.ID, yytext());} + {Identifiers} {return symbol(ChocoPyTokens.ID, yytext());} /* Whitespace. */ {WhiteSpace} { /* ignore */ } /* Comment. */ @@ -232,9 +237,12 @@ if True: { {StringLiteral} {currString += yytext();} \\$ { /*'\' at the end of line, do nothing.*/ } - "\"" {yybegin(AFTER); return symbol(ChocoPyTokens.STRING, currString);} // accepted a ", return to AFTER state + "\"" {yybegin(AFTER); return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[ChocoPyTokens.STRING], ChocoPyTokens.STRING, + new ComplexSymbolFactory.Location(str_l, str_c), + new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()), + currString);} // accepted a ", return to AFTER state } -<> { return symbol(ChocoPyTokens.EOF); } +<> { if(!stack.isEmpty()){ return symbol(ChocoPyTokens.DEDENT, pop());} return symbol(ChocoPyTokens.EOF);} /* Error fallback. */ [^] { return symbol(ChocoPyTokens.UNRECOGNIZED); } From ab6c72e690319326465a4ad63629ff47b0ff8da0 Mon Sep 17 00:00:00 2001 From: bill Date: Sat, 20 Feb 2021 21:42:01 +0800 Subject: [PATCH 14/26] Simple fixes for the errors. --- src/main/cup/chocopy/pa1/ChocoPy.cup | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/main/cup/chocopy/pa1/ChocoPy.cup b/src/main/cup/chocopy/pa1/ChocoPy.cup index 13e324c..bb26a36 100644 --- a/src/main/cup/chocopy/pa1/ChocoPy.cup +++ b/src/main/cup/chocopy/pa1/ChocoPy.cup @@ -268,7 +268,7 @@ precedence nonassoc EQUAL, NEQ, LT, GT, LEQ, GEQ, IS; precedence left PLUS, MINUS; precedence left MUL, DIV, MOD; precedence left DOT, COMMA, LBR, RBR; -precedence left IF, ELSE; +precedence right IF, ELSE; /* The start symbol. */ start with program; @@ -315,12 +315,12 @@ class_body_defs ::= class_body_defs:defs var_def:vd {: RESULT = combine(de /* fun_def */ -fun_def ::= DEF:def identifier:id LPAR typed_vars:params RPAR ret_type:rt COLON NEWLINE INDENT fun_body_decs:fbd stmt_list:sl DEDENT - {: RESULT = new FuncDef(defxleft, getRight(sl), id, params, rt, fbd, sl); :} +fun_def ::= DEF:def identifier:id LPAR typed_vars:params RPAR ret_type:rt COLON:col NEWLINE INDENT fun_body_decs:fbd stmt_list:sl DEDENT + {: TypeAnnotation _rt = rt;if(rt.getLocation()[0] == -2) _rt = new ClassType( colxright, colxright, "");RESULT = new FuncDef(defxleft, getRight(sl), id, params, _rt, fbd, sl); :} ; ret_type ::= ARROW type:t {: RESULT= t; :} - | {: RESULT= null; :} + | {: RESULT= new ClassType(new ComplexSymbolFactory.Location(-2,-1), new ComplexSymbolFactory.Location(-1,-1),""); :} ; typed_vars ::= typed_var:tv {: RESULT= single(tv); :} @@ -427,13 +427,17 @@ pexpr ::= identifier:id {: RESULT = id; :} | index_expr:i {: RESULT = i; :} | member_expr:m LPAR expr_list:l RPAR:rpar {: RESULT = new MethodCallExpr(mxleft, rparxright, m, l); :} | identifier:id LPAR expr_list:l RPAR:rpar {: RESULT = new CallExpr(idxleft, rparxright, id, l); :} - | pexpr:p1 bin_op:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo.value, p2); :} + | pexpr:p1 PLUS:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :} + | pexpr:p1 MINUS:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :} + | pexpr:p1 MUL:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :} + | pexpr:p1 DIV:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :} + | pexpr:p1 MOD:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :} | MINUS:m pexpr:p {: RESULT = new UnaryExpr(mxleft, pxright, m, p); :} ; expr_list ::= expr:e {: RESULT = single(e); :} | expr_list:el COMMA expr:e {: RESULT = combine(el, e); :} - | {: RESULT = null; :} + | {: RESULT = new ArrayList(); :} ; From b2536564bcdb65b25b40ce99e644f391c507a64d Mon Sep 17 00:00:00 2001 From: bill Date: Sat, 20 Feb 2021 23:24:11 +0800 Subject: [PATCH 15/26] Cleanup --- src/main/cup/chocopy/pa1/ChocoPy.cup | 21 ++++++++++----------- src/main/jflex/chocopy/pa1/ChocoPy.jflex | 3 --- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/src/main/cup/chocopy/pa1/ChocoPy.cup b/src/main/cup/chocopy/pa1/ChocoPy.cup index bb26a36..15340a3 100644 --- a/src/main/cup/chocopy/pa1/ChocoPy.cup +++ b/src/main/cup/chocopy/pa1/ChocoPy.cup @@ -316,11 +316,11 @@ class_body_defs ::= class_body_defs:defs var_def:vd {: RESULT = combine(de /* fun_def */ fun_def ::= DEF:def identifier:id LPAR typed_vars:params RPAR ret_type:rt COLON:col NEWLINE INDENT fun_body_decs:fbd stmt_list:sl DEDENT - {: TypeAnnotation _rt = rt;if(rt.getLocation()[0] == -2) _rt = new ClassType( colxright, colxright, "");RESULT = new FuncDef(defxleft, getRight(sl), id, params, _rt, fbd, sl); :} + {: TypeAnnotation _rt = rt;if((rt instanceof ClassType) && ((ClassType)rt).className == "") _rt = new ClassType( colxright, colxright, "");RESULT = new FuncDef(defxleft, getRight(sl), id, params, _rt, fbd, sl); :} ; ret_type ::= ARROW type:t {: RESULT= t; :} - | {: RESULT= new ClassType(new ComplexSymbolFactory.Location(-2,-1), new ComplexSymbolFactory.Location(-1,-1),""); :} + | {: RESULT= new ClassType(null, null,""); :} ; typed_vars ::= typed_var:tv {: RESULT= single(tv); :} @@ -427,11 +427,11 @@ pexpr ::= identifier:id {: RESULT = id; :} | index_expr:i {: RESULT = i; :} | member_expr:m LPAR expr_list:l RPAR:rpar {: RESULT = new MethodCallExpr(mxleft, rparxright, m, l); :} | identifier:id LPAR expr_list:l RPAR:rpar {: RESULT = new CallExpr(idxleft, rparxright, id, l); :} - | pexpr:p1 PLUS:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :} - | pexpr:p1 MINUS:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :} - | pexpr:p1 MUL:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :} - | pexpr:p1 DIV:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :} - | pexpr:p1 MOD:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :} + | pexpr:p1 PLUS:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :} + | pexpr:p1 MINUS:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :} + | pexpr:p1 MUL:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :} + | pexpr:p1 DIV:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :} + | pexpr:p1 MOD:bo pexpr:p2 {: RESULT = new BinaryExpr(p1xleft, p2xright, p1, bo, p2); :} | MINUS:m pexpr:p {: RESULT = new UnaryExpr(mxleft, pxright, m, p); :} ; @@ -440,17 +440,16 @@ expr_list ::= expr:e {: RESULT = single(e); | {: RESULT = new ArrayList(); :} ; - -/* bin_op */ +/* bin_op */ //We may still be able to use bin_op, so I left it here. bin_op ::= PLUS:a {: RESULT = new StringLiteral(axleft, axright, "+"); :} | MINUS:a {: RESULT = new StringLiteral(axleft, axright, "-"); :} | MUL:a {: RESULT = new StringLiteral(axleft, axright, "*"); :} - | DIV:a {: RESULT = new StringLiteral(axleft, axright, "/"); :} + | DIV:a {: RESULT = new StringLiteral(axleft, axright, "//"); :} //Section 2.6.3 in chocopy language reference | MOD:a {: RESULT = new StringLiteral(axleft, axright, "%"); :} ; -/* comp_op */ +/* comp_op */ //this might also need some change in order not to break left associativity comp_op ::= EQUAL:a {: RESULT = new StringLiteral(axleft, axright, "=="); :} | NEQ:a {: RESULT = new StringLiteral(axleft, axright, "!="); :} | LEQ:a {: RESULT = new StringLiteral(axleft, axright, "<="); :} diff --git a/src/main/jflex/chocopy/pa1/ChocoPy.jflex b/src/main/jflex/chocopy/pa1/ChocoPy.jflex index 8699d5c..43a7f03 100644 --- a/src/main/jflex/chocopy/pa1/ChocoPy.jflex +++ b/src/main/jflex/chocopy/pa1/ChocoPy.jflex @@ -46,7 +46,6 @@ import java.util.ArrayList; /** Return a terminal symbol of syntactic category TYPE and semantic * value VALUE at the current source location. */ private Symbol symbol(int type, Object value) { - //System.out.println(yytext() + ChocoPyTokens.terminalNames[type]); return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[type], type, new ComplexSymbolFactory.Location(yyline + 1, yycolumn + 1), new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()), @@ -136,7 +135,6 @@ if True: AFTER state. */ pop(); - //System.out.println("dedent"); return symbol(ChocoPyTokens.DEDENT, currIndent); } /*Otherwise, we will start dealing with the rest @@ -149,7 +147,6 @@ if True: current level should have, start a new level which will have `currIndent' indents. */ - //System.out.println("indent"); push(currIndent); return symbol(ChocoPyTokens.INDENT, currIndent); From ca13d4c461509ea07cb08a6f253108424bf0ef79 Mon Sep 17 00:00:00 2001 From: bill Date: Sun, 21 Feb 2021 17:55:07 +0800 Subject: [PATCH 16/26] Fixed failure on test bad_indentation.py --- src/main/jflex/chocopy/pa1/ChocoPy.jflex | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main/jflex/chocopy/pa1/ChocoPy.jflex b/src/main/jflex/chocopy/pa1/ChocoPy.jflex index 43a7f03..55446aa 100644 --- a/src/main/jflex/chocopy/pa1/ChocoPy.jflex +++ b/src/main/jflex/chocopy/pa1/ChocoPy.jflex @@ -149,7 +149,10 @@ if True: */ push(currIndent); - return symbol(ChocoPyTokens.INDENT, currIndent); + return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[ChocoPyTokens.INDENT], ChocoPyTokens.INDENT, + new ComplexSymbolFactory.Location(yyline + 1, yycolumn - 1), + new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()), + currIndent); } } } From 0c5f9b6d8b7ef216c23dffe592036e4e202e1b3e Mon Sep 17 00:00:00 2001 From: Apoorva Ranade Date: Mon, 22 Feb 2021 10:18:59 +0530 Subject: [PATCH 17/26] Adding tests --- src/test/data/pa1/student_contributed/bad.py | 30 ++++++++ src/test/data/pa1/student_contributed/good.py | 69 ++++++++++++++++++- 2 files changed, 98 insertions(+), 1 deletion(-) diff --git a/src/test/data/pa1/student_contributed/bad.py b/src/test/data/pa1/student_contributed/bad.py index b85905e..56f38a7 100644 --- a/src/test/data/pa1/student_contributed/bad.py +++ b/src/test/data/pa1/student_contributed/bad.py @@ -1 +1,31 @@ 1 2 3 + +def fun5(): + c = 6 + def fun6(): + print("Hello") + c = 4 + 5 + +if True: + if True: + print("Hello") + if True: + print("Maybe") + else: + print("World") +else: + print("Again") +else: + print("And Again") + +class Thor(object): + y:int = 0 + print("Right place?") + +class Stones(object): + y:int = 0 + def fun(x:int): + print("Right place?") + def bar(): + return 2+3 + print("Wrong Place") diff --git a/src/test/data/pa1/student_contributed/good.py b/src/test/data/pa1/student_contributed/good.py index 8138b36..6d27ff6 100644 --- a/src/test/data/pa1/student_contributed/good.py +++ b/src/test/data/pa1/student_contributed/good.py @@ -1 +1,68 @@ -1 + 2 + 3 + +print("Hi") +def fun1(): + def fun2(): + print("Hello") + +print("World") + +def fun3(): + def fun4(): + print("Hello") + c = 4 + 5 + +def fun5(): + c = 6 + def fun6(): + print("Hello") + c = 4 + 5 + + +if True: + if True: + if True: + print("Hello") +print("World") + +if True: + if True: + if True: + print("Hello") + print("World") + +if True: + if True: + if True: + print("Hello") + print("World") + +if True: + if True: + if True: + print("Hello") + else: + print("World") + +if True: + if True: + if True: + print("Hello") +else: + print("World") + +class Foo(object): + x:int = 0 + + def __init__(self:"Foo", x:int): + self.x = x + + def bar(y:int)->None: + print("Hello World!",self.x+y) + y = 10 + +f = Foo(1) +print(f.x) +f.bar(4) + +a=[[[1],[2]][[3],[4]]] +print(a[0][0][1]*a[1][1][0]) \ No newline at end of file From f6091d744f9ab13fa24088aea881aeec751a9259 Mon Sep 17 00:00:00 2001 From: ar6496 <78793916+ar6496@users.noreply.github.com> Date: Mon, 22 Feb 2021 10:39:50 +0530 Subject: [PATCH 18/26] Create WORKLOG.md --- WORKLOG.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 WORKLOG.md diff --git a/WORKLOG.md b/WORKLOG.md new file mode 100644 index 0000000..612c819 --- /dev/null +++ b/WORKLOG.md @@ -0,0 +1,17 @@ +Compiler Construction PA1 Worklog +
+
Team: +
Apoorva Ranade(ar6496) +
Sanjar Ahmadov(sa5640) +
Yinqi Sun(ys3540) +
+
Acknowledgments: Provide attribution to any collaborations, external resources, or out-side help. +
+
Indentation: Describe your strategy for handling INDENT and DEDENT tokens. Point to the relevant source files and line numbers. +
A stack is maintained by the lexer to keep track of indentations. A count is accumulated for the number of whitespace characters before the first token. If the count changes from the previous line count, a stack operation is performed. If count increases, another value is added to the stack. If count decreases, the topmost value is popped from the stack. +
+
Challenges: Describe any challenges (besides indentation) you encountered and the way you solved them. Mention the approaches that did not work, if any. +
+
+
Improvements: Describe any improvements you introduced that were not strictly necessary to pass the tests, such as implementing additional functionality, adding new tests, or enabling static analysis. +
Added more tests to rigorously check program flow and indentation. From 609903851d29089cf143810f28bb5e95e0c9fa22 Mon Sep 17 00:00:00 2001 From: bill Date: Mon, 22 Feb 2021 16:42:06 +0800 Subject: [PATCH 19/26] Bug fixes: Catching Indentation Errors. A typo on regex of Identifiers Potential fixes on allowing function body with only definations. (not applied) TODO: Allowing a program to have interleaving definations and statements? --- src/main/cup/chocopy/pa1/ChocoPy.cup | 31 +++++++++++++++++++----- src/main/jflex/chocopy/pa1/ChocoPy.jflex | 28 ++++++++++++++++++--- 2 files changed, 50 insertions(+), 9 deletions(-) diff --git a/src/main/cup/chocopy/pa1/ChocoPy.cup b/src/main/cup/chocopy/pa1/ChocoPy.cup index 15340a3..20db76b 100644 --- a/src/main/cup/chocopy/pa1/ChocoPy.cup +++ b/src/main/cup/chocopy/pa1/ChocoPy.cup @@ -1,4 +1,5 @@ import java.util.ArrayList; +import java.util.Iterator; import java.util.List; import java_cup.runtime.*; @@ -113,12 +114,26 @@ action code {: } return list; } - + List combine(List list, List item) { + if (item != null) { + Iterator it = item.iterator(); + while(it.hasNext()) + list.add(it.next()); + } + return list; + } /** Return a mutable empty list. */ List empty() { return new ArrayList(); } - + class FuncBody { + public List fbd; + public List sl; + public FuncBody(List fbd, List sl){ + this.fbd = fbd; + this.sl = sl; + } + } /** Return the leftmost non-whitespace location in NODES, or null if NODES * is empty. Assumes that the nodes of NODES are ordered in increasing * order of location, from left to right. */ @@ -228,7 +243,7 @@ terminal String IS; /* Returned by the lexer for erroneous tokens. Since it does not appear in * the grammar, it indicates a syntax error. */ -terminal UNRECOGNIZED; +terminal String UNRECOGNIZED; /* Nonterminal symbols (defined in production rules below). * As for terminal symbols, @@ -236,7 +251,7 @@ terminal UNRECOGNIZED; * defines the listed nonterminal identifier symbols to have semantic values * of type . */ non terminal Program program; -non terminal List program_head, class_body, class_body_defs, fun_body_decs; +non terminal List defs, program_head, opt_program_head, class_body, class_body_defs, fun_body_decs; non terminal List stmt_list, opt_stmt_list, block, else_body; non terminal Stmt stmt, simple_stmt; non terminal Expr expr, pexpr, cexpr; @@ -255,7 +270,7 @@ non terminal List opt_target, expr_list; non terminal Expr target; non terminal MemberExpr member_expr; non terminal IndexExpr index_expr; - +non terminal FuncBody fun_body; @@ -330,7 +345,11 @@ typed_vars ::= typed_var:tv {: RESULT= single(tv ; -/* fun_body */ +/* fun_body */ +fun_body ::= fun_body_decs:fbd stmt_list:sl {: RESULT = new FuncBody(fbd, sl);:} + | fun_body_decs:fbd {: RESULT = new FuncBody(fbd, new ArrayList());:} + ; + fun_body_decs ::= fun_body_decs:fbd global_decl:gd {: RESULT= combine(fbd, gd); :} | fun_body_decs:fbd nonlocal_decl:nd {: RESULT= combine(fbd, nd); :} | fun_body_decs:fbd var_def:vd {: RESULT= combine(fbd, vd); :} diff --git a/src/main/jflex/chocopy/pa1/ChocoPy.jflex b/src/main/jflex/chocopy/pa1/ChocoPy.jflex index 55446aa..de6ec6c 100644 --- a/src/main/jflex/chocopy/pa1/ChocoPy.jflex +++ b/src/main/jflex/chocopy/pa1/ChocoPy.jflex @@ -1,6 +1,7 @@ package chocopy.pa1; import java_cup.runtime.*; import java.util.ArrayList; +import java.util.Iterator; %% /*** Do not change the flags below unless you know what you are doing. ***/ @@ -37,6 +38,7 @@ import java.util.ArrayList; private int str_l = 0, str_c = 0; //Start location of a string. /*A stack that keeps track of the spaces in each Indentation Level*/ private ArrayList stack = new ArrayList(20); + private boolean indentErrorUnchecked = true; /** Return a terminal symbol of syntactic category TYPE and no * semantic value at the current source location. */ private Symbol symbol(int type) { @@ -63,6 +65,15 @@ import java.util.ArrayList; if(stack.isEmpty()) return 0; return stack.get(stack.size() - 1); } + private boolean find(int indent){ + if(indent == 0) return true; + Iterator it = stack.iterator(); + while(it.hasNext()){ + if(it.next() == indent) + return true; + } + return false; + } %} /* Macros (regexes used in rules below) */ @@ -72,7 +83,7 @@ LineBreak = \r|\n|\r\n IntegerLiteral = 0|[1-9][0-9]* // Accroding to the manual, 00+ is illeagal StringLiteral = ([^\"\\]|(\\\")|(\\t)|(\\r)|(\\n)|(\\\\))+ // \n, \r, \t, \\, \" and Anything except \ and " -Identifiers = (_|[a-z]|[A-Z])(_|[a-z]|[A-Z][0-9])* +Identifiers = (_|[a-z]|[A-Z])(_|[a-z]|[A-Z]|[0-9])* Comments = #[^\r\n]* %% //YYINITIAL state is where we're dealing with indentations. @@ -135,7 +146,18 @@ if True: AFTER state. */ pop(); - return symbol(ChocoPyTokens.DEDENT, currIndent); + if(top() < currIndent) + { + currIndent = top(); + return symbolFactory.newSymbol("", ChocoPyTokens.UNRECOGNIZED, + new ComplexSymbolFactory.Location(yyline + 1, yycolumn - 1), + new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()), + currIndent); + } + return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[ChocoPyTokens.DEDENT], ChocoPyTokens.DEDENT, + new ComplexSymbolFactory.Location(yyline + 1, yycolumn - 1), + new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()), + currIndent); } /*Otherwise, we will start dealing with the rest of the line after indentation in AFTER state. */ @@ -159,7 +181,7 @@ if True: { /* Delimiters. */ - {LineBreak} { yybegin(YYINITIAL); currIndent = 0;return symbol(ChocoPyTokens.NEWLINE);} + {LineBreak} { yybegin(YYINITIAL); currIndent = 0;indentErrorUnchecked = true; return symbol(ChocoPyTokens.NEWLINE);} ":" { return symbol(ChocoPyTokens.COLON); } "," { return symbol(ChocoPyTokens.COMMA); } From 6bcd40cbf9e2c1160ee57f291063c0dafc4ab392 Mon Sep 17 00:00:00 2001 From: Apoorva Ranade Date: Mon, 22 Feb 2021 19:32:37 +0530 Subject: [PATCH 20/26] Updating tests --- src/test/data/pa1/student_contributed/bad.py | 4 ++++ src/test/data/pa1/student_contributed/good.py | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/test/data/pa1/student_contributed/bad.py b/src/test/data/pa1/student_contributed/bad.py index 56f38a7..68e3773 100644 --- a/src/test/data/pa1/student_contributed/bad.py +++ b/src/test/data/pa1/student_contributed/bad.py @@ -29,3 +29,7 @@ class Stones(object): def bar(): return 2+3 print("Wrong Place") + +def fun1(): + def fun2(): + print("Hello") diff --git a/src/test/data/pa1/student_contributed/good.py b/src/test/data/pa1/student_contributed/good.py index 6d27ff6..b0812da 100644 --- a/src/test/data/pa1/student_contributed/good.py +++ b/src/test/data/pa1/student_contributed/good.py @@ -3,7 +3,8 @@ print("Hi") def fun1(): def fun2(): print("Hello") - + fun2() + print("World") def fun3(): From 534b043c62d1b0d6ff38dea2fed42a9cd233d2be Mon Sep 17 00:00:00 2001 From: ar6496 <78793916+ar6496@users.noreply.github.com> Date: Mon, 22 Feb 2021 20:02:16 +0530 Subject: [PATCH 21/26] Update WORKLOG.md --- WORKLOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/WORKLOG.md b/WORKLOG.md index 612c819..72ea5a9 100644 --- a/WORKLOG.md +++ b/WORKLOG.md @@ -11,7 +11,10 @@ Compiler Construction PA1 Worklog
A stack is maintained by the lexer to keep track of indentations. A count is accumulated for the number of whitespace characters before the first token. If the count changes from the previous line count, a stack operation is performed. If count increases, another value is added to the stack. If count decreases, the topmost value is popped from the stack.

Challenges: Describe any challenges (besides indentation) you encountered and the way you solved them. Mention the approaches that did not work, if any. -
+
Shift-reduce errors while parsing the grammar. One approach to fix is to change the grammar. We chose to fix this issue by adding a precedence as in the case of expr by adding right precedence for if and else. +
Handling errors was another challenge. This required debugging and small changes to program flow. +
Understanding the giving code was a small challenge and took some time before we could start coding.

Improvements: Describe any improvements you introduced that were not strictly necessary to pass the tests, such as implementing additional functionality, adding new tests, or enabling static analysis.
Added more tests to rigorously check program flow and indentation. +
Function body must have atleast oone statement which is not a part of a nested function. From 53aaa5c140ca319fe8efe16c901b8a878861ad5f Mon Sep 17 00:00:00 2001 From: Apoorva Ranade Date: Mon, 22 Feb 2021 20:13:27 +0530 Subject: [PATCH 22/26] Updating tests for multi-line strings --- src/test/data/pa1/student_contributed/good.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/test/data/pa1/student_contributed/good.py b/src/test/data/pa1/student_contributed/good.py index b0812da..4c01c88 100644 --- a/src/test/data/pa1/student_contributed/good.py +++ b/src/test/data/pa1/student_contributed/good.py @@ -66,4 +66,12 @@ print(f.x) f.bar(4) a=[[[1],[2]][[3],[4]]] -print(a[0][0][1]*a[1][1][0]) \ No newline at end of file +print(a[0][0][1]*a[1][1][0]) + +multiline_string="Hi World, +Here I am" + +""" +This is a +multi-line comment. +""" \ No newline at end of file From c21b761c55fff456c8aaa3f3454bc788a28548cd Mon Sep 17 00:00:00 2001 From: Apoorva Ranade Date: Wed, 24 Feb 2021 10:48:45 +0530 Subject: [PATCH 23/26] Updating tests yet again --- src/test/data/pa1/student_contributed/good.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/test/data/pa1/student_contributed/good.py b/src/test/data/pa1/student_contributed/good.py index 4c01c88..88f20ca 100644 --- a/src/test/data/pa1/student_contributed/good.py +++ b/src/test/data/pa1/student_contributed/good.py @@ -74,4 +74,15 @@ Here I am" """ This is a multi-line comment. -""" \ No newline at end of file +""" + + +def get_stones(name:str)->str: + color=['Red','Blue'] + stones=['Mind','Soul'] + def map_name(nm:str)->str: + return stones[color.index(nm)] + return map_name(name) + +stone="Blue" +print(get_stones(stone)) \ No newline at end of file From 9f6d969d2dbb0e8d1f579fab246ec76163fc3e64 Mon Sep 17 00:00:00 2001 From: ar6496 <78793916+ar6496@users.noreply.github.com> Date: Wed, 24 Feb 2021 18:59:14 +0530 Subject: [PATCH 24/26] Update WORKLOG.md --- WORKLOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/WORKLOG.md b/WORKLOG.md index 72ea5a9..7b7b13f 100644 --- a/WORKLOG.md +++ b/WORKLOG.md @@ -18,3 +18,4 @@ Compiler Construction PA1 Worklog
Improvements: Describe any improvements you introduced that were not strictly necessary to pass the tests, such as implementing additional functionality, adding new tests, or enabling static analysis.
Added more tests to rigorously check program flow and indentation.
Function body must have atleast oone statement which is not a part of a nested function. +
Support for multi-line strings. From 0a0b308dcc60a21f880beafdf293208151ce8e87 Mon Sep 17 00:00:00 2001 From: Apoorva Ranade Date: Fri, 26 Feb 2021 19:10:23 +0530 Subject: [PATCH 25/26] Modifying Tests --- src/test/data/pa1/student_contributed/good.py | 42 ++++++++----------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/src/test/data/pa1/student_contributed/good.py b/src/test/data/pa1/student_contributed/good.py index 88f20ca..000fffe 100644 --- a/src/test/data/pa1/student_contributed/good.py +++ b/src/test/data/pa1/student_contributed/good.py @@ -1,11 +1,24 @@ +class Foo(object): + x:int = 0 + + def __init__(self:"Foo", x:int): + self.x = x + + def bar(y:int): + print("Hello World!",self.x+y) + y = 10 + +def get_stones(name:str)->str: + color=["Red","Blue"] + stones=["Mind","Soul"] + def map_name(nm:str)->str: + return stones[color.index(nm)] + return map_name(name) -print("Hi") def fun1(): def fun2(): print("Hello") fun2() - -print("World") def fun3(): def fun4(): @@ -51,38 +64,17 @@ if True: else: print("World") -class Foo(object): - x:int = 0 - def __init__(self:"Foo", x:int): - self.x = x - - def bar(y:int)->None: - print("Hello World!",self.x+y) - y = 10 f = Foo(1) print(f.x) f.bar(4) -a=[[[1],[2]][[3],[4]]] +a=[[[1],[2]],[[3],[4]]] print(a[0][0][1]*a[1][1][0]) multiline_string="Hi World, Here I am" -""" -This is a -multi-line comment. -""" - - -def get_stones(name:str)->str: - color=['Red','Blue'] - stones=['Mind','Soul'] - def map_name(nm:str)->str: - return stones[color.index(nm)] - return map_name(name) - stone="Blue" print(get_stones(stone)) \ No newline at end of file From d7a9aab8a47eb6177d0a00bf532e7af7e3c5fb43 Mon Sep 17 00:00:00 2001 From: Apoorva Ranade Date: Fri, 26 Feb 2021 19:14:42 +0530 Subject: [PATCH 26/26] Working tests --- src/test/data/pa1/student_contributed/good.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/test/data/pa1/student_contributed/good.py b/src/test/data/pa1/student_contributed/good.py index 000fffe..093076f 100644 --- a/src/test/data/pa1/student_contributed/good.py +++ b/src/test/data/pa1/student_contributed/good.py @@ -9,26 +9,26 @@ class Foo(object): y = 10 def get_stones(name:str)->str: - color=["Red","Blue"] - stones=["Mind","Soul"] def map_name(nm:str)->str: return stones[color.index(nm)] + color=["Red","Blue"] + stones=["Mind","Soul"] return map_name(name) -def fun1(): - def fun2(): +def funa(): + def funb(): print("Hello") - fun2() + funb() -def fun3(): - def fun4(): +def fund(): + def fune(): print("Hello") c = 4 + 5 -def fun5(): - c = 6 - def fun6(): +def funf(): + def fung(): print("Hello") + c = 6 c = 4 + 5