You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ChocoPy/src/main/jflex/chocopy/pa1/ChocoPy.jflex

271 lines
11 KiB

4 years ago
package chocopy.pa1;
import java_cup.runtime.*;
import java.util.ArrayList;
import java.util.Iterator;
4 years ago
%%
/*** Do not change the flags below unless you know what you are doing. ***/
%unicode
%line
%column
%states AFTER, STR
4 years ago
%class ChocoPyLexer
%public
%cupsym ChocoPyTokens
%cup
%cupdebug
%eofclose false
/*** Do not change the flags above unless you know what you are doing. ***/
/* The following code section is copied verbatim to the
* generated lexer class. */
%{
/* The code below includes some convenience methods to create tokens
* of a given type and optionally a value that the CUP parser can
* understand. Specifically, a lot of the logic below deals with
* embedded information about where in the source code a given token
* was recognized, so that the parser can report errors accurately.
* (It need not be modified for this project.) */
/** Producer of token-related values for the parser. */
final ComplexSymbolFactory symbolFactory = new ComplexSymbolFactory();
private int currIndent = 0; //Current Indentation Level
4 years ago
private String currString = "";
private int str_l = 0, str_c = 0; //Start location of a string.
/*A stack that keeps track of the spaces in each Indentation Level*/
private ArrayList<Integer> stack = new ArrayList<Integer>(20);
private boolean indentErrorUnchecked = true;
4 years ago
/** Return a terminal symbol of syntactic category TYPE and no
* semantic value at the current source location. */
private Symbol symbol(int type) {
return symbol(type, yytext());
4 years ago
}
/** Return a terminal symbol of syntactic category TYPE and semantic
* value VALUE at the current source location. */
private Symbol symbol(int type, Object value) {
return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[type], type,
new ComplexSymbolFactory.Location(yyline + 1, yycolumn + 1),
new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()),
value);
}
4 years ago
private void push(int indent){
stack.add(indent);
}
private int pop(){
if(stack.isEmpty()) return 0;
return stack.remove(stack.size() - 1);
}
private int top(){
if(stack.isEmpty()) return 0;
return stack.get(stack.size() - 1);
}
private boolean find(int indent){
if(indent == 0) return true;
Iterator<Integer> it = stack.iterator();
while(it.hasNext()){
if(it.next() == indent)
return true;
}
return false;
}
4 years ago
%}
/* Macros (regexes used in rules below) */
WhiteSpace = [ \t]
LineBreak = \r|\n|\r\n
IntegerLiteral = 0|[1-9][0-9]* // Accroding to the manual, 00+ is illeagal
4 years ago
StringLiteral = ([^\"\\]|(\\\")|(\\t)|(\\r)|(\\n)|(\\\\))+ // \n, \r, \t, \\, \" and Anything except \ and "
Identifiers = (_|[a-z]|[A-Z])(_|[a-z]|[A-Z]|[0-9])*
Comments = #[^\r\n]*
4 years ago
%%
4 years ago
//YYINITIAL state is where we're dealing with indentations.
//We will set the state to YYINITIAL when starting a
//new line unless this line is within a string, e.g.:
/*
"this is \
a string across \
multiple lines\
"
*/
<YYINITIAL>{
{WhiteSpace}
{
/*Add indentation */
if(yytext() == "\t")
currIndent += 8; //'\t' = 8 spaces
else
currIndent ++;
}
/*
# This python code will test if '\t' is 8 spaces
# It will run and print '1\n2'
# Please tell me if your Python reports an error
# Or you find documentations that says otherwise
if True:
print(1) # \t
print(2) # 8 spaces
*/
{LineBreak}
{
/*
If this is a blank line, start over on the next line.
An empty line should just be ignored, therefore we don't
pass a NEWLINE to Cup.
*/
currIndent = 0;
}
{Comments} { /* ignored */ } //Ignore blank lines
/*If it's not a blank line (Current character isn't a
Whitespace/linebreak/comment), deal with indentation here and
start accepting whatever is on this line in `AFTER' state*/
[^ \t\r\n#]
{
//rewind the current character.
yypushback(1);
if(top() > currIndent)
{
/*
4 years ago
If the indentation of the line is less than number of
indents current level should have,
keep dedenting until it reaches the level with the same
number of indents.
It's like a loop, because we're not changing the state
and we rewinded the current character. So it will keep
going until top()<= currIndent and it will switch to
AFTER state.
*/
pop();
if(top() < currIndent)
{
currIndent = top();
return symbolFactory.newSymbol("<bad indentation>", ChocoPyTokens.UNRECOGNIZED,
new ComplexSymbolFactory.Location(yyline + 1, yycolumn - 1),
new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()),
currIndent);
}
return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[ChocoPyTokens.DEDENT], ChocoPyTokens.DEDENT,
new ComplexSymbolFactory.Location(yyline + 1, yycolumn - 1),
new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()),
currIndent);
}
/*Otherwise, we will start dealing with the rest
4 years ago
of the line after indentation in AFTER state. */
4 years ago
yybegin(AFTER);
if(top()< currIndent)
{
/*
If current indentation is more than the number of indents
current level should have, start a new level which will have
`currIndent' indents.
*/
push(currIndent);
return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[ChocoPyTokens.INDENT], ChocoPyTokens.INDENT,
new ComplexSymbolFactory.Location(yyline + 1, yycolumn - 1),
new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()),
currIndent);
}
}
}
<AFTER> {
4 years ago
/* Delimiters. */
{LineBreak} { yybegin(YYINITIAL); currIndent = 0;indentErrorUnchecked = true; return symbol(ChocoPyTokens.NEWLINE);}
":" { return symbol(ChocoPyTokens.COLON); }
"," { return symbol(ChocoPyTokens.COMMA); }
4 years ago
/* Literals. */
{IntegerLiteral} { return symbol(ChocoPyTokens.NUMBER,
Integer.parseInt(yytext())); }
"\"" {yybegin(STR); str_l = yyline + 1; str_c = yycolumn + 1; currString = "";} //Start taking a string when see a "
"False" { return symbol(ChocoPyTokens.BOOL, false); }
"True" { return symbol(ChocoPyTokens.BOOL, true); }
"None" { return symbol(ChocoPyTokens.NONE); }
/*Keywords*/
"if" {return symbol(ChocoPyTokens.IF);}
"else" {return symbol(ChocoPyTokens.ELSE);}
"elif" {return symbol(ChocoPyTokens.ELIF);}
"while" {return symbol(ChocoPyTokens.WHILE);}
"class" {return symbol(ChocoPyTokens.CLASS);}
"def" {return symbol(ChocoPyTokens.DEF);}
"lambda" {return symbol(ChocoPyTokens.LAMBDA);}
"as" { return symbol(ChocoPyTokens.AS); }
"for" { return symbol(ChocoPyTokens.FOR); }
"global" { return symbol(ChocoPyTokens.GLOBAL); }
"in" { return symbol(ChocoPyTokens.IN); }
"nonlocal" { return symbol(ChocoPyTokens.NONLOCAL); }
"pass" { return symbol(ChocoPyTokens.PASS); }
"return" { return symbol(ChocoPyTokens.RETURN); }
"assert" { return symbol(ChocoPyTokens.ASSERT); }
"await" { return symbol(ChocoPyTokens.AWAIT); }
"break" { return symbol(ChocoPyTokens.BREAK); }
"continue" { return symbol(ChocoPyTokens.CONTINUE); }
"del" { return symbol(ChocoPyTokens.DEL); }
"except" { return symbol(ChocoPyTokens.EXCEPT); }
"finally" { return symbol(ChocoPyTokens.FINALLY); }
"from" { return symbol(ChocoPyTokens.FROM); }
"import" { return symbol(ChocoPyTokens.IMPORT); }
"raise" { return symbol(ChocoPyTokens.RAISE); }
"try" { return symbol(ChocoPyTokens.TRY); }
"with" { return symbol(ChocoPyTokens.WITH); }
"yield" { return symbol(ChocoPyTokens.YIELD); }
4 years ago
/* Operators. */
"+" { return symbol(ChocoPyTokens.PLUS); }
"-" { return symbol(ChocoPyTokens.MINUS); }
"*" { return symbol(ChocoPyTokens.MUL); }
"//" { return symbol(ChocoPyTokens.DIV); }
"/" { return symbol(ChocoPyTokens.DIV); } //Accroding to manual, chocopy don't have fp division, '/', '//' should be integr division
"%" { return symbol(ChocoPyTokens.MOD); }
">" { return symbol(ChocoPyTokens.GT); }
"<" { return symbol(ChocoPyTokens.LT); }
"==" { return symbol(ChocoPyTokens.EQUAL); }
"!=" { return symbol(ChocoPyTokens.NEQ); }
">=" { return symbol(ChocoPyTokens.GEQ); }
"<=" { return symbol(ChocoPyTokens.LEQ); }
"=" { return symbol(ChocoPyTokens.ASSIGN); }
"and" { return symbol(ChocoPyTokens.AND); }
"or" { return symbol(ChocoPyTokens.OR); }
"not" { return symbol(ChocoPyTokens.NOT); }
"." { return symbol(ChocoPyTokens.DOT); }
"(" { return symbol(ChocoPyTokens.LPAR); }
")" { return symbol(ChocoPyTokens.RPAR); }
"[" { return symbol(ChocoPyTokens.LBR); }
"]" { return symbol(ChocoPyTokens.RBR); }
"->" { return symbol(ChocoPyTokens.ARROW); }
"is" { return symbol(ChocoPyTokens.IS); }
/*Identifiers*/
{Identifiers} {return symbol(ChocoPyTokens.ID, yytext());}
4 years ago
/* Whitespace. */
{WhiteSpace} { /* ignore */ }
/* Comment. */
{Comments} { /* ignore */ }
4 years ago
}
<STR>{
{StringLiteral} {currString += yytext();}
\\$ { /*'\' at the end of line, do nothing.*/ }
"\"" {yybegin(AFTER); return symbolFactory.newSymbol(ChocoPyTokens.terminalNames[ChocoPyTokens.STRING], ChocoPyTokens.STRING,
new ComplexSymbolFactory.Location(str_l, str_c),
new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()),
currString);} // accepted a ", return to AFTER state
}
<<EOF>> { if(!stack.isEmpty()){ return symbol(ChocoPyTokens.DEDENT, pop());} return symbol(ChocoPyTokens.EOF);}
4 years ago
/* Error fallback. */
[^] { return symbol(ChocoPyTokens.UNRECOGNIZED); }