|
|
|
package chocopy.pa1;
|
|
|
|
import java_cup.runtime.*;
|
|
|
|
import java.util.ArrayList;
|
|
|
|
%%
|
|
|
|
|
|
|
|
/*** Do not change the flags below unless you know what you are doing. ***/
|
|
|
|
|
|
|
|
%unicode
|
|
|
|
%line
|
|
|
|
%column
|
|
|
|
%states AFTER, STR
|
|
|
|
%class ChocoPyLexer
|
|
|
|
%public
|
|
|
|
|
|
|
|
%cupsym ChocoPyTokens
|
|
|
|
%cup
|
|
|
|
%cupdebug
|
|
|
|
|
|
|
|
%eofclose false
|
|
|
|
|
|
|
|
/*** Do not change the flags above unless you know what you are doing. ***/
|
|
|
|
|
|
|
|
/* The following code section is copied verbatim to the
|
|
|
|
* generated lexer class. */
|
|
|
|
%{
|
|
|
|
/* The code below includes some convenience methods to create tokens
|
|
|
|
* of a given type and optionally a value that the CUP parser can
|
|
|
|
* understand. Specifically, a lot of the logic below deals with
|
|
|
|
* embedded information about where in the source code a given token
|
|
|
|
* was recognized, so that the parser can report errors accurately.
|
|
|
|
* (It need not be modified for this project.) */
|
|
|
|
|
|
|
|
/** Producer of token-related values for the parser. */
|
|
|
|
final ComplexSymbolFactory symbolFactory = new ComplexSymbolFactory();
|
|
|
|
private int currIndent = 0; //Current Indentation Level
|
|
|
|
private String currString = "";
|
|
|
|
private int str_l = 0, str_c = 0; //Start location of a string.
|
|
|
|
/*A stack that keeps track of the spaces in each Indentation Level*/
|
|
|
|
private ArrayList<Integer> stack = new ArrayList<Integer>(20);
|
|
|
|
/** Return a terminal symbol of syntactic category TYPE and no
|
|
|
|
* semantic value at the current source location. */
|
|
|
|
private Symbol symbol(int type) {
|
|
|
|
return symbol(type, yytext());
|
|
|
|
}
|
|
|
|
|
|
|
|
/** Return a terminal symbol of syntactic category TYPE and semantic
|
|
|
|
* value VALUE at the current source location. */
|
|
|
|
private Symbol symbol(int type, Object value) {
|
|
|
|
return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[type], type,
|
|
|
|
new ComplexSymbolFactory.Location(yyline + 1, yycolumn + 1),
|
|
|
|
new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()),
|
|
|
|
value);
|
|
|
|
}
|
|
|
|
|
|
|
|
private void push(int indent){
|
|
|
|
stack.add(indent);
|
|
|
|
}
|
|
|
|
private int pop(){
|
|
|
|
if(stack.isEmpty()) return 0;
|
|
|
|
return stack.remove(stack.size() - 1);
|
|
|
|
}
|
|
|
|
private int top(){
|
|
|
|
if(stack.isEmpty()) return 0;
|
|
|
|
return stack.get(stack.size() - 1);
|
|
|
|
}
|
|
|
|
%}
|
|
|
|
|
|
|
|
/* Macros (regexes used in rules below) */
|
|
|
|
|
|
|
|
WhiteSpace = [ \t]
|
|
|
|
LineBreak = \r|\n|\r\n
|
|
|
|
|
|
|
|
IntegerLiteral = 0|[1-9][0-9]* // Accroding to the manual, 00+ is illeagal
|
|
|
|
StringLiteral = ([^\"\\]|(\\\")|(\\t)|(\\r)|(\\n)|(\\\\))+ // \n, \r, \t, \\, \" and Anything except \ and "
|
|
|
|
Identifiers = (_|[a-z]|[A-Z])(_|[a-z]|[A-Z][0-9])*
|
|
|
|
Comments = #[^\r\n]*
|
|
|
|
%%
|
|
|
|
//YYINITIAL state is where we're dealing with indentations.
|
|
|
|
//We will set the state to YYINITIAL when starting a
|
|
|
|
//new line unless this line is within a string, e.g.:
|
|
|
|
/*
|
|
|
|
"this is \
|
|
|
|
a string across \
|
|
|
|
multiple lines\
|
|
|
|
"
|
|
|
|
*/
|
|
|
|
<YYINITIAL>{
|
|
|
|
{WhiteSpace}
|
|
|
|
{
|
|
|
|
/*Add indentation */
|
|
|
|
if(yytext() == "\t")
|
|
|
|
currIndent += 8; //'\t' = 8 spaces
|
|
|
|
else
|
|
|
|
currIndent ++;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
# This python code will test if '\t' is 8 spaces
|
|
|
|
# It will run and print '1\n2'
|
|
|
|
# Please tell me if your Python reports an error
|
|
|
|
# Or you find documentations that says otherwise
|
|
|
|
|
|
|
|
if True:
|
|
|
|
print(1) # \t
|
|
|
|
print(2) # 8 spaces
|
|
|
|
*/
|
|
|
|
|
|
|
|
{LineBreak}
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
If this is a blank line, start over on the next line.
|
|
|
|
An empty line should just be ignored, therefore we don't
|
|
|
|
pass a NEWLINE to Cup.
|
|
|
|
*/
|
|
|
|
currIndent = 0;
|
|
|
|
}
|
|
|
|
{Comments} { /* ignored */ } //Ignore blank lines
|
|
|
|
|
|
|
|
/*If it's not a blank line (Current character isn't a
|
|
|
|
Whitespace/linebreak/comment), deal with indentation here and
|
|
|
|
start accepting whatever is on this line in `AFTER' state*/
|
|
|
|
[^ \t\r\n#]
|
|
|
|
{
|
|
|
|
//rewind the current character.
|
|
|
|
yypushback(1);
|
|
|
|
if(top() > currIndent)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
If the indentation of the line is less than number of
|
|
|
|
indents current level should have,
|
|
|
|
keep dedenting until it reaches the level with the same
|
|
|
|
number of indents.
|
|
|
|
It's like a loop, because we're not changing the state
|
|
|
|
and we rewinded the current character. So it will keep
|
|
|
|
going until top()<= currIndent and it will switch to
|
|
|
|
AFTER state.
|
|
|
|
*/
|
|
|
|
pop();
|
|
|
|
return symbol(ChocoPyTokens.DEDENT, currIndent);
|
|
|
|
}
|
|
|
|
/*Otherwise, we will start dealing with the rest
|
|
|
|
of the line after indentation in AFTER state. */
|
|
|
|
yybegin(AFTER);
|
|
|
|
if(top()< currIndent)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
If current indentation is more than the number of indents
|
|
|
|
current level should have, start a new level which will have
|
|
|
|
`currIndent' indents.
|
|
|
|
*/
|
|
|
|
|
|
|
|
push(currIndent);
|
|
|
|
return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[ChocoPyTokens.INDENT], ChocoPyTokens.INDENT,
|
|
|
|
new ComplexSymbolFactory.Location(yyline + 1, yycolumn - 1),
|
|
|
|
new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()),
|
|
|
|
currIndent);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
<AFTER> {
|
|
|
|
|
|
|
|
/* Delimiters. */
|
|
|
|
{LineBreak} { yybegin(YYINITIAL); currIndent = 0;return symbol(ChocoPyTokens.NEWLINE);}
|
|
|
|
":" { return symbol(ChocoPyTokens.COLON); }
|
|
|
|
"," { return symbol(ChocoPyTokens.COMMA); }
|
|
|
|
|
|
|
|
/* Literals. */
|
|
|
|
{IntegerLiteral} { return symbol(ChocoPyTokens.NUMBER,
|
|
|
|
Integer.parseInt(yytext())); }
|
|
|
|
"\"" {yybegin(STR); str_l = yyline + 1; str_c = yycolumn + 1; currString = "";} //Start taking a string when see a "
|
|
|
|
"False" { return symbol(ChocoPyTokens.BOOL, false); }
|
|
|
|
"True" { return symbol(ChocoPyTokens.BOOL, true); }
|
|
|
|
"None" { return symbol(ChocoPyTokens.NONE); }
|
|
|
|
|
|
|
|
/*Keywords*/
|
|
|
|
"if" {return symbol(ChocoPyTokens.IF);}
|
|
|
|
"else" {return symbol(ChocoPyTokens.ELSE);}
|
|
|
|
"elif" {return symbol(ChocoPyTokens.ELIF);}
|
|
|
|
"while" {return symbol(ChocoPyTokens.WHILE);}
|
|
|
|
"class" {return symbol(ChocoPyTokens.CLASS);}
|
|
|
|
"def" {return symbol(ChocoPyTokens.DEF);}
|
|
|
|
"lambda" {return symbol(ChocoPyTokens.LAMBDA);}
|
|
|
|
"as" { return symbol(ChocoPyTokens.AS); }
|
|
|
|
"for" { return symbol(ChocoPyTokens.FOR); }
|
|
|
|
"global" { return symbol(ChocoPyTokens.GLOBAL); }
|
|
|
|
"in" { return symbol(ChocoPyTokens.IN); }
|
|
|
|
"nonlocal" { return symbol(ChocoPyTokens.NONLOCAL); }
|
|
|
|
"pass" { return symbol(ChocoPyTokens.PASS); }
|
|
|
|
"return" { return symbol(ChocoPyTokens.RETURN); }
|
|
|
|
"assert" { return symbol(ChocoPyTokens.ASSERT); }
|
|
|
|
"await" { return symbol(ChocoPyTokens.AWAIT); }
|
|
|
|
"break" { return symbol(ChocoPyTokens.BREAK); }
|
|
|
|
"continue" { return symbol(ChocoPyTokens.CONTINUE); }
|
|
|
|
"del" { return symbol(ChocoPyTokens.DEL); }
|
|
|
|
"except" { return symbol(ChocoPyTokens.EXCEPT); }
|
|
|
|
"finally" { return symbol(ChocoPyTokens.FINALLY); }
|
|
|
|
"from" { return symbol(ChocoPyTokens.FROM); }
|
|
|
|
"import" { return symbol(ChocoPyTokens.IMPORT); }
|
|
|
|
"raise" { return symbol(ChocoPyTokens.RAISE); }
|
|
|
|
"try" { return symbol(ChocoPyTokens.TRY); }
|
|
|
|
"with" { return symbol(ChocoPyTokens.WITH); }
|
|
|
|
"yield" { return symbol(ChocoPyTokens.YIELD); }
|
|
|
|
|
|
|
|
|
|
|
|
/* Operators. */
|
|
|
|
"+" { return symbol(ChocoPyTokens.PLUS); }
|
|
|
|
"-" { return symbol(ChocoPyTokens.MINUS); }
|
|
|
|
"*" { return symbol(ChocoPyTokens.MUL); }
|
|
|
|
"//" { return symbol(ChocoPyTokens.DIV); }
|
|
|
|
"/" { return symbol(ChocoPyTokens.DIV); } //Accroding to manual, chocopy don't have fp division, '/', '//' should be integr division
|
|
|
|
"%" { return symbol(ChocoPyTokens.MOD); }
|
|
|
|
">" { return symbol(ChocoPyTokens.GT); }
|
|
|
|
"<" { return symbol(ChocoPyTokens.LT); }
|
|
|
|
"==" { return symbol(ChocoPyTokens.EQUAL); }
|
|
|
|
"!=" { return symbol(ChocoPyTokens.NEQ); }
|
|
|
|
">=" { return symbol(ChocoPyTokens.GEQ); }
|
|
|
|
"<=" { return symbol(ChocoPyTokens.LEQ); }
|
|
|
|
"=" { return symbol(ChocoPyTokens.ASSIGN); }
|
|
|
|
"and" { return symbol(ChocoPyTokens.AND); }
|
|
|
|
"or" { return symbol(ChocoPyTokens.OR); }
|
|
|
|
"not" { return symbol(ChocoPyTokens.NOT); }
|
|
|
|
"." { return symbol(ChocoPyTokens.DOT); }
|
|
|
|
"(" { return symbol(ChocoPyTokens.LPAR); }
|
|
|
|
")" { return symbol(ChocoPyTokens.RPAR); }
|
|
|
|
"[" { return symbol(ChocoPyTokens.LBR); }
|
|
|
|
"]" { return symbol(ChocoPyTokens.RBR); }
|
|
|
|
"->" { return symbol(ChocoPyTokens.ARROW); }
|
|
|
|
"is" { return symbol(ChocoPyTokens.IS); }
|
|
|
|
|
|
|
|
|
|
|
|
/*Identifiers*/
|
|
|
|
{Identifiers} {return symbol(ChocoPyTokens.ID, yytext());}
|
|
|
|
/* Whitespace. */
|
|
|
|
{WhiteSpace} { /* ignore */ }
|
|
|
|
/* Comment. */
|
|
|
|
{Comments} { /* ignore */ }
|
|
|
|
}
|
|
|
|
<STR>{
|
|
|
|
{StringLiteral} {currString += yytext();}
|
|
|
|
\\$ { /*'\' at the end of line, do nothing.*/ }
|
|
|
|
"\"" {yybegin(AFTER); return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[ChocoPyTokens.STRING], ChocoPyTokens.STRING,
|
|
|
|
new ComplexSymbolFactory.Location(str_l, str_c),
|
|
|
|
new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()),
|
|
|
|
currString);} // accepted a ", return to AFTER state
|
|
|
|
}
|
|
|
|
<<EOF>> { if(!stack.isEmpty()){ return symbol(ChocoPyTokens.DEDENT, pop());} return symbol(ChocoPyTokens.EOF);}
|
|
|
|
|
|
|
|
/* Error fallback. */
|
|
|
|
[^] { return symbol(ChocoPyTokens.UNRECOGNIZED); }
|