initial works on the lexer

master
bill 3 years ago
parent 54e35e2b89
commit 997178ba9c

@ -144,6 +144,11 @@ action code {:
*/
terminal NEWLINE;
terminal String PLUS;
terminal String MINUS;
terminal String MULTIPLY;
terminal String DIVIDE;
terminal String NAMES;
terminal Integer NUMBER;
/* Returned by the lexer for erroneous tokens. Since it does not appear in
* the grammar, it indicates a syntax error. */

@ -1,6 +1,6 @@
package chocopy.pa1;
import java_cup.runtime.*;
import java.util.ArrayList;
%%
/*** Do not change the flags below unless you know what you are doing. ***/
@ -8,7 +8,7 @@ import java_cup.runtime.*;
%unicode
%line
%column
%states AFTER
%class ChocoPyLexer
%public
@ -32,11 +32,12 @@ import java_cup.runtime.*;
/** Producer of token-related values for the parser. */
final ComplexSymbolFactory symbolFactory = new ComplexSymbolFactory();
private int currIndent = 0;
private ArrayList<Integer> stack = new ArrayList<Integer>(20);
/** Return a terminal symbol of syntactic category TYPE and no
* semantic value at the current source location. */
private Symbol symbol(int type) {
return symbol(type, yytext());
return symbol(type);
}
/** Return a terminal symbol of syntactic category TYPE and semantic
@ -47,7 +48,17 @@ import java_cup.runtime.*;
new ComplexSymbolFactory.Location(yyline + 1,yycolumn + yylength()),
value);
}
private void push(int indent){
stack.add(indent);
}
private int pop(){
if(stack.isEmpty()) return 0;
return stack.remove(stack.size() - 1);
}
private int top(){
if(stack.isEmpty) return 0;
return stack.get(stack.size() - 1);
}
%}
/* Macros (regexes used in rules below) */
@ -55,25 +66,119 @@ import java_cup.runtime.*;
WhiteSpace = [ \t]
LineBreak = \r|\n|\r\n
IntegerLiteral = 0 | [1-9][0-9]*
IntegerLiteral = 0|[1-9][0-9]*
StringLiteral = \"([^\"\\]|(\\\")|(\\t)|(\\r)|(\\n)|(\\\\))*\"
Names = (_|[a-z]|[A-Z])(_|[a-z]|[A-Z])*
Comments = #[^\r\n]*
%%
<YYINITIAL> {
<YYINITIAL>{
{WhiteSpace}
{
String space = yytext();
if(space == "\t")
currIndent += 8;
else
currIndent ++;
}
{LineBreak}
{
currIndent = 0;
}
{Comments} { /* ignored */ }
[^ \t\r\n#]
{
yypushback(1);
if(top() > currIndent)
{
pop();
return symbol(ChocoPyTokens.DEDENT);
}
yystart(AFTER);
if(top()< currIndent)
{
push(currIndent);
return symbol(ChocoPyTokens.INDENT);
}
}
}
<AFTER> {
/* Delimiters. */
{LineBreak} { return symbol(ChocoPyTokens.NEWLINE); }
{LineBreak} { return symbol(ChocoPyTokens.NEWLINE); yybegin(YYINITIAL); currIndent = 0;}
":" { return symbol(ChocoPyTokens.COLON); }
"," { return symbol(ChocoPyTokens.COMMA); }
/* Literals. */
{IntegerLiteral} { return symbol(ChocoPyTokens.NUMBER,
Integer.parseInt(yytext())); }
{StringLiteral} { return symbol(ChocoPyTokens.STRING, yytext());}
"False" { return symbol(ChocoPyTokens.BOOL, false); }
"True" { return symbol(ChocoPyTokens.BOOL, true); }
"None" { return symbol(ChocoPyTokens.NONE); }
/*Keywords*/
"if" {return symbol(ChocoPyTokens.IF);}
"else" {return symbol(ChocoPyTokens.ELSE);}
"elif" {return symbol(ChocoPyTokens.ELIF);}
"while" {return symbol(ChocoPyTokens.WHILE);}
"class" {return symbol(ChocoPyTokens.CLASS);}
"def" {return symbol(ChocoPyTokens.DEF);}
"lambda" {return symbol(ChocoPyTokens.LAMBDA);}
"as" { return symbol(ChocoPyTokens.AS); }
"for" { return symbol(ChocoPyTokens.FOR); }
"global" { return symbol(ChocoPyTokens.GLOBAL); }
"in" { return symbol(ChocoPyTokens.IN); }
"nonlocal" { return symbol(ChocoPyTokens.NONLOCAL); }
"pass" { return symbol(ChocoPyTokens.PASS); }
"return" { return symbol(ChocoPyTokens.RETURN); }
"assert" { return symbol(ChocoPyTokens.ASSERT); }
"await" { return symbol(ChocoPyTokens.AWAIT); }
"break" { return symbol(ChocoPyTokens.BREAK); }
"continue" { return symbol(ChocoPyTokens.CONTINUE); }
"del" { return symbol(ChocoPyTokens.DEL); }
"except" { return symbol(ChocoPyTokens.EXCEPT); }
"finally" { return symbol(ChocoPyTokens.FINALLY); }
"from" { return symbol(ChocoPyTokens.FROM); }
"import" { return symbol(ChocoPyTokens.IMPORT); }
"raise" { return symbol(ChocoPyTokens.RAISE); }
"try" { return symbol(ChocoPyTokens.TRY); }
"with" { return symbol(ChocoPyTokens.WITH); }
"yield" { return symbol(ChocoPyTokens.YIELD); }
/* Operators. */
"+" { return symbol(ChocoPyTokens.PLUS, yytext()); }
/* Operators. */
"+" { return symbol(ChocoPyTokens.PLUS); }
"-" { return symbol(ChocoPyTokens.MINUS); }
"*" { return symbol(ChocoPyTokens.MUL); }
"//" { return symbol(ChocoPyTokens.DIV); }
"/" { return symbol(ChocoPyTokens.DIV); }
"%" { return symbol(ChocoPyTokens.MOD); }
">" { return symbol(ChocoPyTokens.GT); }
"<" { return symbol(ChocoPyTokens.LT); }
"==" { return symbol(ChocoPyTokens.EQUAL); }
"!=" { return symbol(ChocoPyTokens.NEQ); }
">=" { return symbol(ChocoPyTokens.GEQ); }
"<=" { return symbol(ChocoPyTokens.LEQ); }
"=" { return symbol(ChocoPyTokens.ASSIGN); }
"and" { return symbol(ChocoPyTokens.AND); }
"or" { return symbol(ChocoPyTokens.OR); }
"not" { return symbol(ChocoPyTokens.NOT); }
"." { return symbol(ChocoPyTokens.DOT); }
"(" { return symbol(ChocoPyTokens.LPAR); }
")" { return symbol(ChocoPyTokens.RPAR); }
"[" { return symbol(ChocoPyTokens.LBR); }
"]" { return symbol(ChocoPyTokens.RBR); }
"->" { return symbol(ChocoPyTokens.ARROW); }
"is" { return symbol(ChocoPyTokens.IS); }
/*Identifiers*/
{Names} {return symbol(ChocoPyTokens.NAMES, yytext());}
/* Whitespace. */
{WhiteSpace} { /* ignore */ }
/* Comment. */
{Comments} { /* ignore */ }
}
<<EOF>> { return symbol(ChocoPyTokens.EOF); }

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save