/* zweic -- a compiler for zwei * * Stephane Micheloud & LAMP * * $Id$ */ package zweic; import java.io.{InputStream, IOException}; /** * This class implements the scanner of the zwei compiler. */ class Scanner(in: InputStream) { val debug: boolean = false; import scala.collection.mutable.HashMap; import Tokens._; /** * This character represents the end of the input. */ private val EOF_CH = java.lang.Character.MAX_VALUE; /** * A table that associates a keyword with its corresponding * token class. */ private val keywords = new HashMap[String, Token](); keywords("true") = TRUE; keywords("false") = FALSE; keywords("this") = THIS; keywords("Null") = NULLTYPE; keywords("null") = NULLFACTOR; keywords("readInt") = READINT; keywords("readChar") = READCHAR; keywords("printInt") = PRINTINT; keywords("printChar") = PRINTCHAR; keywords("Int") = INT; keywords("class") = CLASS; keywords("new") = NEW; keywords("extends") = EXTENDS; keywords("return") = RETURN; keywords("and") = AND; keywords("or") = OR; keywords("if") = IF; keywords("else") = ELSE; keywords("while") = WHILE; /** The current token class. */ var token: Token = BAD; /** The position of the first character of the current token. */ var start: Int = 0; /** * The string representation of the current token. This variable * is only set if the current token class has several possible * textual representations. */ var chars: String = _; /** * A buffer for constructing string representations of tokens. */ private val buf = new StringBuffer(); /** * The current character. */ private var ch = ' '; /** * The line and the column of the current token. */ private var line = 1; private var column = 0; nextCh; nextToken; /** * This method reads the next token and stores the token class * in variable 'token'. If the token representation is not unique * it will also leave a textual representation in variable 'chars'. * Whitespaces and comments are skipped by this method. */ def nextToken: Unit = { // initialize the position of the current token buf.setLength (0); // skip comments and whitespace characters while (Character.isWhitespace (ch) || ch == '/') { if (debug == true && ch == '\n'){ Console.println ("") } if (ch == '/'){ nextCh; if (ch == '/') { // comment while (ch != '\n' && ch != EOF_CH) { nextCh; } } else { // division start = Position.encode(line, column-1); token = DIV; return } } else { // whitespace nextCh; } } start = Position.encode(line, column-1); // read the current token token = readToken; } /** * Read the next token, store its representation (if its not * unique) in variable 'chars' and return the token class. */ private def readToken: Token = ch match { case '(' => nextCh; return LPAREN; case ')' => nextCh; return RPAREN; case '{' => nextCh; return LACCOLADE; case '}' => nextCh; return RACCOLADE; case '-' => nextCh; return SUB; case '+' => nextCh; return ADD; case '*' => nextCh; return MUL; case '%' => nextCh; return MOD; case '/' => nextCh; return DIV; case ';' => nextCh; return SEMICOLON; case ',' => nextCh; return PERIOD; case '.' => nextCh; return DOT; case '!' => nextCh; if (ch == '=') { nextCh; return NE; } else return NOT; case '=' => nextCh; if (ch == '=') { nextCh; return EQ; } else return EQUALS; case '<' => nextCh; if (ch == '=') { nextCh; return LE; } else return LT; case '>' => nextCh; if (ch == '=') { nextCh; return GE; } else return GT; case '&' => nextCh; if (ch == '&') { nextCh; return AND; } else { Report.fail(start, "error: parse error on '&' token"); return BAD; } case '|' => nextCh; if (ch == '|') { nextCh; return OR; } else { Report.fail(start, "error: parse error on '|' token"); return BAD; } case '"' => nextCh; while(ch!='"' && ch!='\n' && ch!='\r' && ch!=EOF_CH) { buf.append(ch); nextCh; } if (ch=='"') { chars = buf.toString(); nextCh; return STRING; } else { if (ch == EOF_CH) Report.fail(start, "error: unexpected EOF"); else { nextCh; Report.fail(start, "error: parse error on '"+ch+"' token. expected \""); } return BAD; } case _ if (Character.isLetter (ch)) => while(Character.isLetter(ch) || Character.isDigit(ch) || ch == '_') { buf.append(ch); nextCh; } chars = buf.toString (); keywords.get(chars) match { case Some(t) => return t; case _ => return IDENT; } case _ if (Character.isDigit (ch)) => if (ch == '0') { buf.append (ch); nextCh; } else { while(Character.isDigit(ch)) { buf.append(ch); nextCh; } } chars = buf.toString (); NUMBER; case EOF_CH => EOF; case _ => nextCh; Report.fail(start, "error: parse error on '"+ch+"' token"); return BAD; } /** * Returns a textual representation of the current token. */ def representation = { val representation = token.toString(); if (token == NUMBER || token == IDENT || token == STRING) representation + "(\"" + chars + "\")"; else representation } /** * Puts the next character into 'ch' and updates the current position. */ private def nextCh: Unit = { ch match { case EOF_CH => return case '\n' => column = 1; line = line + 1 case _ => column = column + 1 } try { ch = readCh; oldch = if ((oldch == '\r') && (ch == '\n')) readCh else ch; ch = if (oldch == '\r') '\n' else oldch } catch { case e: IOException => Report.fail(start, e.getMessage()); } } private def readCh: Char = { val c = in.read(); if (c < 0) EOF_CH else c.asInstanceOf[Char] } private var oldch: Char = ' '; }