Newer
Older
zweic / sources / zweic / Scanner.scala
/*  zweic -- a compiler for zwei
 *
 *  Stephane Micheloud & LAMP
 *
 *  $Id$
 */

package zweic;

import java.io.{InputStream, IOException};


/**
 * This class implements the scanner of the zwei compiler.
 */
class Scanner(in: InputStream) {
  val debug: boolean = false;

  import scala.collection.mutable.HashMap;
  import Tokens._;

  /**
   * This character represents the end of the input.
   */
  private val EOF_CH = java.lang.Character.MAX_VALUE;

  /**
   * A table that associates a keyword with its corresponding
   * token class.
   */
  private val keywords = new HashMap[String, Token]();

  keywords("true")      = TRUE;
  keywords("false")     = FALSE;
  keywords("this")      = THIS;
  keywords("Null")      = NULLTYPE;
  keywords("null")      = NULLFACTOR;

  keywords("readInt")   = READINT;
  keywords("readChar")  = READCHAR;
  keywords("printInt")  = PRINTINT;
  keywords("printChar") = PRINTCHAR;

  keywords("Int")       = INT;
  keywords("class")     = CLASS;
  keywords("new")       = NEW;
  keywords("extends")   = EXTENDS;
  keywords("return")    = RETURN;

  keywords("and")       = AND;
  keywords("or")        = OR;
  keywords("if")        = IF;
  keywords("else")      = ELSE;
  keywords("while")     = WHILE;

  /** The current token class.
   */
  var token: Token = BAD;

  /** The position of the first character of the current token.
   */
  var start: Int = 0;

  /**
   * The string representation of the current token. This variable
   * is only set if the current token class has several possible
   * textual representations.
   */
  var chars: String = _;

  /**
   * A buffer for constructing string representations of tokens.
   */
  private val buf = new StringBuffer();

  /**
   * The current character.
   */
  private var ch = ' ';

  /**
   * The line and the column of the current token.
   */
  private var line = 1;
  private var column = 0;

  nextCh;
  nextToken;

  /**
   * This method reads the next token and stores the token class
   * in variable 'token'. If the token representation is not unique
   * it will also leave a textual representation in variable 'chars'.
   * Whitespaces and comments are skipped by this method.
   */
  def nextToken: Unit = {
    // initialize the position of the current token
    buf.setLength (0);

    // skip comments and whitespace characters
    while (Character.isWhitespace (ch) || ch == '/') {
      if (debug == true && ch == '\n'){
		Console.println ("")
      }
      if (ch == '/'){
	nextCh;
	if (ch == '/') {
	  // comment
	  while (ch != '\n' && ch != EOF_CH)  {
	    nextCh;
	  }
	} else {
	  // division
      start = Position.encode(line, column-1);
	  token = DIV;
	  return
	}
      } else {
      	// whitespace
      	nextCh;
      }
    }

	start = Position.encode(line, column-1);
    // read the current token
    token = readToken;
  }

  /**
   * Read the next token, store its representation (if its not
   *  unique) in variable 'chars' and return the token class.
   */
  private def readToken: Token = ch match {

    case '(' => nextCh; return LPAREN;
    case ')' => nextCh; return RPAREN;
    case '{' => nextCh; return LACCOLADE;
    case '}' => nextCh; return RACCOLADE;

    case '-' => nextCh; return SUB;
    case '+' => nextCh; return ADD;
    case '*' => nextCh; return MUL;
    case '%' => nextCh; return MOD;
    case '/' => nextCh; return DIV;

    case ';' => nextCh; return SEMICOLON;
    case ',' => nextCh; return PERIOD;
    case '.' => nextCh; return DOT;

    case '!' => nextCh;
      if (ch == '=') {
	nextCh;
	return NE;
      } else return NOT;

    case '=' => nextCh;
      if (ch == '=') {
	nextCh;
	return EQ;
      } else return EQUALS;

    case '<' => nextCh;
      if (ch == '=') {
	nextCh;
	return LE;
      } else return LT;

    case '>' => nextCh;
      if (ch == '=') {
	nextCh;
	return GE;
      } else return GT;

    case '&' => nextCh;
      if (ch == '&') {
	nextCh;
	return AND;
      } else {
    	Report.fail(start, "error: parse error on '&' token");
	return BAD;
      }

    case '|' => nextCh;
      if (ch == '|') {
	nextCh;
	return OR;
      } else {
    	Report.fail(start, "error: parse error on '|' token");
	return BAD;
      }

    case '"' => nextCh;
      while(ch!='"' && ch!='\n' && ch!='\r' && ch!=EOF_CH) {
	buf.append(ch);
	nextCh;
      }
      if (ch=='"') {
	chars = buf.toString();
	nextCh;
	return STRING;
      } else {
	if (ch == EOF_CH)
    	  Report.fail(start, "error: unexpected EOF");
	else {
	  nextCh;
    	  Report.fail(start, "error: parse error on '"+ch+"' token. expected \"");
	}
	return BAD;
      }

    case _ if (Character.isLetter (ch)) =>
      while(Character.isLetter(ch) || Character.isDigit(ch) || ch == '_') {
	buf.append(ch);
	nextCh;
      }
      chars = buf.toString ();
      keywords.get(chars) match {
	case Some(t) => return t;
	case _ => return IDENT;
      }

    case _ if (Character.isDigit (ch)) =>
      if (ch == '0') {
	buf.append (ch);
	nextCh;
      } else {
	while(Character.isDigit(ch)) {
	  buf.append(ch);
	  nextCh;
	}
      }
      chars = buf.toString ();
      NUMBER;

    case EOF_CH =>
      EOF;

    case _ =>
      nextCh;
      Report.fail(start, "error: parse error on '"+ch+"' token");
      return BAD;
  }

  /**
   * Returns a textual representation of the current token.
   */
  def representation = {
    val representation = token.toString();
    if (token == NUMBER || token == IDENT || token == STRING)
      representation + "(\"" + chars + "\")";
    else
      representation
  }

  /**
   * Puts the next character into 'ch' and updates the current position.
   */
  private def nextCh: Unit = {
    ch match {
      case EOF_CH =>
        return
      case '\n' =>
        column = 1;
        line = line + 1
      case _ =>
        column = column + 1
    }
    try {
      ch = readCh;
      oldch = if ((oldch == '\r') && (ch == '\n')) readCh else ch;
      ch = if (oldch == '\r') '\n' else oldch
    }
    catch {
      case e: IOException => Report.fail(start, e.getMessage());
    }
  }

  private def readCh: Char = {
    val c = in.read();
    if (c < 0) EOF_CH else c.asInstanceOf[Char]
  }

  private var oldch: Char = ' ';
}