/* zweic -- a compiler for zwei
*
* Stephane Micheloud & LAMP
*
* $Id$
*/
package zweic;
import java.io.{InputStream, IOException};
/**
* This class implements the scanner of the zwei compiler.
*/
class Scanner(in: InputStream) {
val debug: boolean = false;
import scala.collection.mutable.HashMap;
import Tokens._;
/**
* This character represents the end of the input.
*/
private val EOF_CH = java.lang.Character.MAX_VALUE;
/**
* A table that associates a keyword with its corresponding
* token class.
*/
private val keywords = new HashMap[String, Token]();
keywords("true") = TRUE;
keywords("false") = FALSE;
keywords("this") = THIS;
keywords("Null") = NULLTYPE;
keywords("null") = NULLFACTOR;
keywords("readInt") = READINT;
keywords("readChar") = READCHAR;
keywords("printInt") = PRINTINT;
keywords("printChar") = PRINTCHAR;
keywords("Int") = INT;
keywords("class") = CLASS;
keywords("new") = NEW;
keywords("extends") = EXTENDS;
keywords("return") = RETURN;
keywords("and") = AND;
keywords("or") = OR;
keywords("if") = IF;
keywords("else") = ELSE;
keywords("while") = WHILE;
/** The current token class.
*/
var token: Token = BAD;
/** The position of the first character of the current token.
*/
var start: Int = 0;
/**
* The string representation of the current token. This variable
* is only set if the current token class has several possible
* textual representations.
*/
var chars: String = _;
/**
* A buffer for constructing string representations of tokens.
*/
private val buf = new StringBuffer();
/**
* The current character.
*/
private var ch = ' ';
/**
* The line and the column of the current token.
*/
private var line = 1;
private var column = 0;
nextCh;
nextToken;
/**
* This method reads the next token and stores the token class
* in variable 'token'. If the token representation is not unique
* it will also leave a textual representation in variable 'chars'.
* Whitespaces and comments are skipped by this method.
*/
def nextToken: Unit = {
// initialize the position of the current token
buf.setLength (0);
// skip comments and whitespace characters
while (Character.isWhitespace (ch) || ch == '/') {
if (debug == true && ch == '\n'){
Console.println ("")
}
if (ch == '/'){
nextCh;
if (ch == '/') {
// comment
while (ch != '\n' && ch != EOF_CH) {
nextCh;
}
} else {
// division
start = Position.encode(line, column-1);
token = DIV;
return
}
} else {
// whitespace
nextCh;
}
}
start = Position.encode(line, column-1);
// read the current token
token = readToken;
}
/**
* Read the next token, store its representation (if its not
* unique) in variable 'chars' and return the token class.
*/
private def readToken: Token = ch match {
case '(' => nextCh; return LPAREN;
case ')' => nextCh; return RPAREN;
case '{' => nextCh; return LACCOLADE;
case '}' => nextCh; return RACCOLADE;
case '-' => nextCh; return SUB;
case '+' => nextCh; return ADD;
case '*' => nextCh; return MUL;
case '%' => nextCh; return MOD;
case '/' => nextCh; return DIV;
case ';' => nextCh; return SEMICOLON;
case ',' => nextCh; return PERIOD;
case '.' => nextCh; return DOT;
case '!' => nextCh;
if (ch == '=') {
nextCh;
return NE;
} else return NOT;
case '=' => nextCh;
if (ch == '=') {
nextCh;
return EQ;
} else return EQUALS;
case '<' => nextCh;
if (ch == '=') {
nextCh;
return LE;
} else return LT;
case '>' => nextCh;
if (ch == '=') {
nextCh;
return GE;
} else return GT;
case '&' => nextCh;
if (ch == '&') {
nextCh;
return AND;
} else {
Report.fail(start, "error: parse error on '&' token");
return BAD;
}
case '|' => nextCh;
if (ch == '|') {
nextCh;
return OR;
} else {
Report.fail(start, "error: parse error on '|' token");
return BAD;
}
case '"' => nextCh;
while(ch!='"' && ch!='\n' && ch!='\r' && ch!=EOF_CH) {
buf.append(ch);
nextCh;
}
if (ch=='"') {
chars = buf.toString();
nextCh;
return STRING;
} else {
if (ch == EOF_CH)
Report.fail(start, "error: unexpected EOF");
else {
nextCh;
Report.fail(start, "error: parse error on '"+ch+"' token. expected \"");
}
return BAD;
}
case _ if (Character.isLetter (ch)) =>
while(Character.isLetter(ch) || Character.isDigit(ch) || ch == '_') {
buf.append(ch);
nextCh;
}
chars = buf.toString ();
keywords.get(chars) match {
case Some(t) => return t;
case _ => return IDENT;
}
case _ if (Character.isDigit (ch)) =>
if (ch == '0') {
buf.append (ch);
nextCh;
} else {
while(Character.isDigit(ch)) {
buf.append(ch);
nextCh;
}
}
chars = buf.toString ();
NUMBER;
case EOF_CH =>
EOF;
case _ =>
nextCh;
Report.fail(start, "error: parse error on '"+ch+"' token");
return BAD;
}
/**
* Returns a textual representation of the current token.
*/
def representation = {
val representation = token.toString();
if (token == NUMBER || token == IDENT || token == STRING)
representation + "(\"" + chars + "\")";
else
representation
}
/**
* Puts the next character into 'ch' and updates the current position.
*/
private def nextCh: Unit = {
ch match {
case EOF_CH =>
return
case '\n' =>
column = 1;
line = line + 1
case _ =>
column = column + 1
}
try {
ch = readCh;
oldch = if ((oldch == '\r') && (ch == '\n')) readCh else ch;
ch = if (oldch == '\r') '\n' else oldch
}
catch {
case e: IOException => Report.fail(start, e.getMessage());
}
}
private def readCh: Char = {
val c = in.read();
if (c < 0) EOF_CH else c.asInstanceOf[Char]
}
private var oldch: Char = ' ';
}