You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ChocoPy/src/main/jflex/chocopy/pa1/ChocoPy.jflex

240 lines
9.6 KiB

package chocopy.pa1;
import java_cup.runtime.*;
import java.util.ArrayList;
%%
/*** Do not change the flags below unless you know what you are doing. ***/
%unicode
%line
%column
%states AFTER, STR
%class ChocoPyLexer
%public
%cupsym ChocoPyTokens
%cup
%cupdebug
%eofclose false
/*** Do not change the flags above unless you know what you are doing. ***/
/* The following code section is copied verbatim to the
* generated lexer class. */
%{
/* The code below includes some convenience methods to create tokens
* of a given type and optionally a value that the CUP parser can
* understand. Specifically, a lot of the logic below deals with
* embedded information about where in the source code a given token
* was recognized, so that the parser can report errors accurately.
* (It need not be modified for this project.) */
/** Producer of token-related values for the parser. */
final ComplexSymbolFactory symbolFactory = new ComplexSymbolFactory();
private int currIndent = 0; //Current Indentation Level
private String currString = "";
/*A stack that keeps track of the spaces in each Indentation Level*/
private ArrayList<Integer> stack = new ArrayList<Integer>(20);
/** Return a terminal symbol of syntactic category TYPE and no
* semantic value at the current source location. */
private Symbol symbol(int type) {
return symbol(type);
}
/** Return a terminal symbol of syntactic category TYPE and semantic
* value VALUE at the current source location. */
private Symbol symbol(int type, Object value) {
return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[type], type,
new ComplexSymbolFactory.Location(yyline + 1, yycolumn + 1),
new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()),
value);
}
private void push(int indent){
stack.add(indent);
}
private int pop(){
if(stack.isEmpty()) return 0;
return stack.remove(stack.size() - 1);
}
private int top(){
if(stack.isEmpty()) return 0;
return stack.get(stack.size() - 1);
}
%}
/* Macros (regexes used in rules below) */
WhiteSpace = [ \t]
LineBreak = \r|\n|\r\n
IntegerLiteral = 0|[1-9][0-9]* // Accroding to the manual, 00+ is illeagal
StringLiteral = ([^\"\\]|(\\\")|(\\t)|(\\r)|(\\n)|(\\\\))+ // \n, \r, \t, \\, \" and Anything except \ and "
Identifiers = (_|[a-z]|[A-Z])(_|[a-z]|[A-Z][0-9])*
Comments = #[^\r\n]*
%%
//YYINITIAL state is where we're dealing with indentations.
//We will set the state to YYINITIAL when starting a
//new line unless this line is within a string, e.g.:
/*
"this is \
a string across \
multiple lines\
"
*/
<YYINITIAL>{
{WhiteSpace}
{
/*Add indentation */
if(yytext() == "\t")
currIndent += 8; //'\t' = 8 spaces
else
currIndent ++;
}
/*
# This python code will test if '\t' is 8 spaces
# It will run and print '1\n2'
# Please tell me if your Python reports an error
# Or you find documentations that says otherwise
if True:
print(1) # \t
print(2) # 8 spaces
*/
{LineBreak}
{
/*
If this is a blank line, start over on the next line.
An empty line should just be ignored, therefore we don't
pass a NEWLINE to Cup.
*/
currIndent = 0;
}
{Comments} { /* ignored */ } //Ignore blank lines
/*If it's not a blank line (Current character isn't a
Whitespace/linebreak/comment), deal with indentation here and
start accepting whatever is on this line in `AFTER' state*/
[^ \t\r\n#]
{
//rewind the current character.
yypushback(1);
if(top() > currIndent)
{
/*
If the indentation of the line is less than number of
indents current level should have,
keep dedenting until it reaches the level with the same
number of indents.
It's like a loop, because we're not changing the state
and we rewinded the current character. So it will keep
going until top()<= currIndent and it will switch to
AFTER state.
*/
pop();
return symbol(ChocoPyTokens.DEDENT, currIndent);
}
/*Otherwise, we will start dealing with the rest
of the line after indentation in AFTER state. */
yybegin(AFTER);
if(top()< currIndent)
{
/*
If current indentation is more than indents current level should have,
start a new level which will have `currIndent' spaces.
*/
push(currIndent);
return symbol(ChocoPyTokens.INDENT, currIndent);
}
}
}
<AFTER> {
/* Delimiters. */
{LineBreak} { return symbol(ChocoPyTokens.NEWLINE); yybegin(YYINITIAL); currIndent = 0;}
":" { return symbol(ChocoPyTokens.COLON); }
"," { return symbol(ChocoPyTokens.COMMA); }
/* Literals. */
{IntegerLiteral} { return symbol(ChocoPyTokens.NUMBER,
Integer.parseInt(yytext())); }
"\"" {yybegin(STR); currString = "";} //Start taking a string when see a "
"False" { return symbol(ChocoPyTokens.BOOL, false); }
"True" { return symbol(ChocoPyTokens.BOOL, true); }
"None" { return symbol(ChocoPyTokens.NONE); }
/*Keywords*/
"if" {return symbol(ChocoPyTokens.IF);}
"else" {return symbol(ChocoPyTokens.ELSE);}
"elif" {return symbol(ChocoPyTokens.ELIF);}
"while" {return symbol(ChocoPyTokens.WHILE);}
"class" {return symbol(ChocoPyTokens.CLASS);}
"def" {return symbol(ChocoPyTokens.DEF);}
"lambda" {return symbol(ChocoPyTokens.LAMBDA);}
"as" { return symbol(ChocoPyTokens.AS); }
"for" { return symbol(ChocoPyTokens.FOR); }
"global" { return symbol(ChocoPyTokens.GLOBAL); }
"in" { return symbol(ChocoPyTokens.IN); }
"nonlocal" { return symbol(ChocoPyTokens.NONLOCAL); }
"pass" { return symbol(ChocoPyTokens.PASS); }
"return" { return symbol(ChocoPyTokens.RETURN); }
"assert" { return symbol(ChocoPyTokens.ASSERT); }
"await" { return symbol(ChocoPyTokens.AWAIT); }
"break" { return symbol(ChocoPyTokens.BREAK); }
"continue" { return symbol(ChocoPyTokens.CONTINUE); }
"del" { return symbol(ChocoPyTokens.DEL); }
"except" { return symbol(ChocoPyTokens.EXCEPT); }
"finally" { return symbol(ChocoPyTokens.FINALLY); }
"from" { return symbol(ChocoPyTokens.FROM); }
"import" { return symbol(ChocoPyTokens.IMPORT); }
"raise" { return symbol(ChocoPyTokens.RAISE); }
"try" { return symbol(ChocoPyTokens.TRY); }
"with" { return symbol(ChocoPyTokens.WITH); }
"yield" { return symbol(ChocoPyTokens.YIELD); }
/* Operators. */
"+" { return symbol(ChocoPyTokens.PLUS); }
"-" { return symbol(ChocoPyTokens.MINUS); }
"*" { return symbol(ChocoPyTokens.MUL); }
"//" { return symbol(ChocoPyTokens.DIV); }
"/" { return symbol(ChocoPyTokens.DIV); } //Accroding to manual, chocopy don't have fp division, '/', '//' should be integr division
"%" { return symbol(ChocoPyTokens.MOD); }
">" { return symbol(ChocoPyTokens.GT); }
"<" { return symbol(ChocoPyTokens.LT); }
"==" { return symbol(ChocoPyTokens.EQUAL); }
"!=" { return symbol(ChocoPyTokens.NEQ); }
">=" { return symbol(ChocoPyTokens.GEQ); }
"<=" { return symbol(ChocoPyTokens.LEQ); }
"=" { return symbol(ChocoPyTokens.ASSIGN); }
"and" { return symbol(ChocoPyTokens.AND); }
"or" { return symbol(ChocoPyTokens.OR); }
"not" { return symbol(ChocoPyTokens.NOT); }
"." { return symbol(ChocoPyTokens.DOT); }
"(" { return symbol(ChocoPyTokens.LPAR); }
")" { return symbol(ChocoPyTokens.RPAR); }
"[" { return symbol(ChocoPyTokens.LBR); }
"]" { return symbol(ChocoPyTokens.RBR); }
"->" { return symbol(ChocoPyTokens.ARROW); }
"is" { return symbol(ChocoPyTokens.IS); }
/*Identifiers*/
{Identifiers} {return symbol(ChocoPyTokens.ID, yytext());}
/* Whitespace. */
{WhiteSpace} { /* ignore */ }
/* Comment. */
{Comments} { /* ignore */ }
}
<STR>{
{StringLiteral} {currString += yytext();}
\\$ { /*'\' at the end of line, do nothing.*/ }
"\"" {yybegin(AFTER); return symbol(ChocoPyTokens.STRING, currString);} // accepted a ", return to AFTER state
}
<<EOF>> { return symbol(ChocoPyTokens.EOF); }
/* Error fallback. */
[^] { return symbol(ChocoPyTokens.UNRECOGNIZED); }