package chocopy.pa1; import java_cup.runtime.*; import java.util.ArrayList; %% /*** Do not change the flags below unless you know what you are doing. ***/ %unicode %line %column %states AFTER, STR %class ChocoPyLexer %public %cupsym ChocoPyTokens %cup %cupdebug %eofclose false /*** Do not change the flags above unless you know what you are doing. ***/ /* The following code section is copied verbatim to the * generated lexer class. */ %{ /* The code below includes some convenience methods to create tokens * of a given type and optionally a value that the CUP parser can * understand. Specifically, a lot of the logic below deals with * embedded information about where in the source code a given token * was recognized, so that the parser can report errors accurately. * (It need not be modified for this project.) */ /** Producer of token-related values for the parser. */ final ComplexSymbolFactory symbolFactory = new ComplexSymbolFactory(); private int currIndent = 0; //Current Indentation Level private String currString = ""; private int str_l = 0, str_c = 0; //Start location of a string. /*A stack that keeps track of the spaces in each Indentation Level*/ private ArrayList stack = new ArrayList(20); /** Return a terminal symbol of syntactic category TYPE and no * semantic value at the current source location. */ private Symbol symbol(int type) { return symbol(type, yytext()); } /** Return a terminal symbol of syntactic category TYPE and semantic * value VALUE at the current source location. */ private Symbol symbol(int type, Object value) { //System.out.println(yytext() + ChocoPyTokens.terminalNames[type]); return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[type], type, new ComplexSymbolFactory.Location(yyline + 1, yycolumn + 1), new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()), value); } private void push(int indent){ stack.add(indent); } private int pop(){ if(stack.isEmpty()) return 0; return stack.remove(stack.size() - 1); } private int top(){ if(stack.isEmpty()) return 0; return stack.get(stack.size() - 1); } %} /* Macros (regexes used in rules below) */ WhiteSpace = [ \t] LineBreak = \r|\n|\r\n IntegerLiteral = 0|[1-9][0-9]* // Accroding to the manual, 00+ is illeagal StringLiteral = ([^\"\\]|(\\\")|(\\t)|(\\r)|(\\n)|(\\\\))+ // \n, \r, \t, \\, \" and Anything except \ and " Identifiers = (_|[a-z]|[A-Z])(_|[a-z]|[A-Z][0-9])* Comments = #[^\r\n]* %% //YYINITIAL state is where we're dealing with indentations. //We will set the state to YYINITIAL when starting a //new line unless this line is within a string, e.g.: /* "this is \ a string across \ multiple lines\ " */ { {WhiteSpace} { /*Add indentation */ if(yytext() == "\t") currIndent += 8; //'\t' = 8 spaces else currIndent ++; } /* # This python code will test if '\t' is 8 spaces # It will run and print '1\n2' # Please tell me if your Python reports an error # Or you find documentations that says otherwise if True: print(1) # \t print(2) # 8 spaces */ {LineBreak} { /* If this is a blank line, start over on the next line. An empty line should just be ignored, therefore we don't pass a NEWLINE to Cup. */ currIndent = 0; } {Comments} { /* ignored */ } //Ignore blank lines /*If it's not a blank line (Current character isn't a Whitespace/linebreak/comment), deal with indentation here and start accepting whatever is on this line in `AFTER' state*/ [^ \t\r\n#] { //rewind the current character. yypushback(1); if(top() > currIndent) { /* If the indentation of the line is less than number of indents current level should have, keep dedenting until it reaches the level with the same number of indents. It's like a loop, because we're not changing the state and we rewinded the current character. So it will keep going until top()<= currIndent and it will switch to AFTER state. */ pop(); //System.out.println("dedent"); return symbol(ChocoPyTokens.DEDENT, currIndent); } /*Otherwise, we will start dealing with the rest of the line after indentation in AFTER state. */ yybegin(AFTER); if(top()< currIndent) { /* If current indentation is more than the number of indents current level should have, start a new level which will have `currIndent' indents. */ //System.out.println("indent"); push(currIndent); return symbol(ChocoPyTokens.INDENT, currIndent); } } } { /* Delimiters. */ {LineBreak} { yybegin(YYINITIAL); currIndent = 0;return symbol(ChocoPyTokens.NEWLINE);} ":" { return symbol(ChocoPyTokens.COLON); } "," { return symbol(ChocoPyTokens.COMMA); } /* Literals. */ {IntegerLiteral} { return symbol(ChocoPyTokens.NUMBER, Integer.parseInt(yytext())); } "\"" {yybegin(STR); str_l = yyline + 1; str_c = yycolumn + 1; currString = "";} //Start taking a string when see a " "False" { return symbol(ChocoPyTokens.BOOL, false); } "True" { return symbol(ChocoPyTokens.BOOL, true); } "None" { return symbol(ChocoPyTokens.NONE); } /*Keywords*/ "if" {return symbol(ChocoPyTokens.IF);} "else" {return symbol(ChocoPyTokens.ELSE);} "elif" {return symbol(ChocoPyTokens.ELIF);} "while" {return symbol(ChocoPyTokens.WHILE);} "class" {return symbol(ChocoPyTokens.CLASS);} "def" {return symbol(ChocoPyTokens.DEF);} "lambda" {return symbol(ChocoPyTokens.LAMBDA);} "as" { return symbol(ChocoPyTokens.AS); } "for" { return symbol(ChocoPyTokens.FOR); } "global" { return symbol(ChocoPyTokens.GLOBAL); } "in" { return symbol(ChocoPyTokens.IN); } "nonlocal" { return symbol(ChocoPyTokens.NONLOCAL); } "pass" { return symbol(ChocoPyTokens.PASS); } "return" { return symbol(ChocoPyTokens.RETURN); } "assert" { return symbol(ChocoPyTokens.ASSERT); } "await" { return symbol(ChocoPyTokens.AWAIT); } "break" { return symbol(ChocoPyTokens.BREAK); } "continue" { return symbol(ChocoPyTokens.CONTINUE); } "del" { return symbol(ChocoPyTokens.DEL); } "except" { return symbol(ChocoPyTokens.EXCEPT); } "finally" { return symbol(ChocoPyTokens.FINALLY); } "from" { return symbol(ChocoPyTokens.FROM); } "import" { return symbol(ChocoPyTokens.IMPORT); } "raise" { return symbol(ChocoPyTokens.RAISE); } "try" { return symbol(ChocoPyTokens.TRY); } "with" { return symbol(ChocoPyTokens.WITH); } "yield" { return symbol(ChocoPyTokens.YIELD); } /* Operators. */ "+" { return symbol(ChocoPyTokens.PLUS); } "-" { return symbol(ChocoPyTokens.MINUS); } "*" { return symbol(ChocoPyTokens.MUL); } "//" { return symbol(ChocoPyTokens.DIV); } "/" { return symbol(ChocoPyTokens.DIV); } //Accroding to manual, chocopy don't have fp division, '/', '//' should be integr division "%" { return symbol(ChocoPyTokens.MOD); } ">" { return symbol(ChocoPyTokens.GT); } "<" { return symbol(ChocoPyTokens.LT); } "==" { return symbol(ChocoPyTokens.EQUAL); } "!=" { return symbol(ChocoPyTokens.NEQ); } ">=" { return symbol(ChocoPyTokens.GEQ); } "<=" { return symbol(ChocoPyTokens.LEQ); } "=" { return symbol(ChocoPyTokens.ASSIGN); } "and" { return symbol(ChocoPyTokens.AND); } "or" { return symbol(ChocoPyTokens.OR); } "not" { return symbol(ChocoPyTokens.NOT); } "." { return symbol(ChocoPyTokens.DOT); } "(" { return symbol(ChocoPyTokens.LPAR); } ")" { return symbol(ChocoPyTokens.RPAR); } "[" { return symbol(ChocoPyTokens.LBR); } "]" { return symbol(ChocoPyTokens.RBR); } "->" { return symbol(ChocoPyTokens.ARROW); } "is" { return symbol(ChocoPyTokens.IS); } /*Identifiers*/ {Identifiers} {return symbol(ChocoPyTokens.ID, yytext());} /* Whitespace. */ {WhiteSpace} { /* ignore */ } /* Comment. */ {Comments} { /* ignore */ } } { {StringLiteral} {currString += yytext();} \\$ { /*'\' at the end of line, do nothing.*/ } "\"" {yybegin(AFTER); return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[ChocoPyTokens.STRING], ChocoPyTokens.STRING, new ComplexSymbolFactory.Location(str_l, str_c), new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()), currString);} // accepted a ", return to AFTER state } <> { if(!stack.isEmpty()){ return symbol(ChocoPyTokens.DEDENT, pop());} return symbol(ChocoPyTokens.EOF);} /* Error fallback. */ [^] { return symbol(ChocoPyTokens.UNRECOGNIZED); }