Newer
Older
zweic / sources / zweic / Scanner.py
@glproj03 glproj03 on 20 Nov 2005 6 KB *** empty log message ***
# scanner for the zwei compiler
#
# (c) 2005 Andreas Jaggi, Michael Karlen
#
# GNU Public License http://opensource.org/licenses/gpl-license.php

from Tokens import Tokens
import sys
import string
import os

class Scanner:
    def __init__ (self, in_stream):
        self.token = Tokens.BAD
        self.start = (0,0) # row, col
        self.chars = ""
        self.buf = []
        self.ch = ' '

        self.oldch = ""
        
        self.line = 1
        self.column = 0
        
        self.in_stream = in_stream
        self.EOF_CH = None


        
        self.nextCh ()
        self.nextToken ()


    
    keywords = {
        'true'      : Tokens.TRUE,
        'false'     : Tokens.FALSE,
        'this'      : Tokens.THIS,
        'Null'      : Tokens.NULLTYPE,
        'null'      : Tokens.NULLFACTOR,
        
        'readInt'   : Tokens.READINT,
        'readChar'  : Tokens.READCHAR,
        'printInt'  : Tokens.PRINTINT,
        'printChar' : Tokens.PRINTCHAR,
        
        'val'       : Tokens.VAL,
        'Int'       : Tokens.INT,
        'def'       : Tokens.DEF,
        'set'       : Tokens.SET,
        'var'       : Tokens.VAR,
        'class'     : Tokens.CLASS,
        'new'       : Tokens.NEW,
        'extends'   : Tokens.EXTENDS,

        'and'       : Tokens.AND,
        'or'        : Tokens.OR,
        'if'        : Tokens.IF,
        'else'      : Tokens.ELSE,
        'do'        : Tokens.DO,
        'while'     : Tokens.WHILE,              
    }

    def nextToken (self):
        self.buf = []
        self.start = (self.line, self.column)

	while (self.ch in [' ','\n', '\r', '\t','/']):#'/'+string.whitespace):
            if self.ch == '/':
                self.nextCh();
                if self.ch == '/': #comment
                    while (self.ch != '\n' and self.ch != '\r' and self.ch != self.EOF_CH):
                        self.nextCh()
                else: #divison
                    self.token = Tokens.DIV
                    return
            else: #whitespace
                self.nextCh()

	self.token = self.readToken()
	if self.token == Tokens.BAD:
		if self.ch:
			print "error: parse error before '"+self.ch+"' token"
		else:
			print "error: parse error"
		sys.exit(1)

    def readToken (self):
       if self.ch == self.EOF_CH:
	       return Tokens.EOF

       if (self.ch in string.letters):
               while((self.ch in string.letters) or (self.ch in string.digits) or (self.ch == '_')):
                       self.buf.append(self.ch);
                       self.nextCh();

               self.chars = ''.join(self.buf);

               return self.keywords.get(self.chars, Tokens.IDENT)

       if (self.ch in string.digits):
               if (self.ch == '0'):
                       self.buf.append (self.ch);
                       self.nextCh();
               else:
                       while(self.ch in string.digits):
                               self.buf.append(self.ch);
                               self.nextCh();

               self.chars = ''.join(self.buf);
               return Tokens.NUMBER;

       x = {
       '(' : Tokens.LPAREN,
       ')' : Tokens.RPAREN,
       '{' : Tokens.LACCOLADE,
       '}' : Tokens.RACCOLADE,

       '-' : Tokens.SUB,
       '+' : Tokens.ADD,
       '*' : Tokens.MUL,
       '%' : Tokens.MOD,
       '/' : Tokens.DIV,

       ';' : Tokens.SEMICOLON,
       ',' : Tokens.PERIOD,

       '.' : Tokens.DOT,
       ':' : Tokens.COLON
       }.get(self.ch, None)

       if x:
	       self.nextCh();
	       return x;


       if self.ch == '!' :
	       self.nextCh();
               if (self.ch == '='):
                       self.nextCh();
                       return Tokens.NE;
               else:
                       return Tokens.NOT;

       if self.ch == '=' :
	       self.nextCh();
               if (self.ch == '='):
                       self.nextCh();
                       return Tokens.EQ;
               else:
                       return Tokens.EQUALS;

       if self.ch == '<' :
	       self.nextCh();
               if (self.ch == '='):
                       self.nextCh();
                       return Tokens.LE;
               else:
                       return Tokens.LT;

       if self.ch == '>' :
	       self.nextCh();
               if (self.ch == '='):
                       self.nextCh();
                       return Tokens.GE;
               else:
                       return Tokens.GT;

       if self.ch == '&' :
	       self.nextCh();
               if (self.ch == '&'):
                       self.nextCh();
                       return Tokens.AND;
               else:
                       return Tokens.BAD;

       if self.ch == '|' :
	       self.nextCh();
               if (self.ch == '|'):
                       self.nextCh();
                       return Tokens.OR;
               else:
                       return Tokens.BAD;

       if self.ch == '"' :
	       self.nextCh()

               while(self.ch!='"' and self.ch!="\n" and self.ch!="\r" and self.ch!=self.EOF_CH):
                       self.buf.append(self.ch);
                       self.nextCh();

               if (self.ch=='"'):
                       self.chars = ''.join(self.buf);
                       self.nextCh();
                       return Tokens.STRING;
               else:
		       self.nextCh();
		       return Tokens.BAD;

       self.nextCh();
       return Tokens.BAD

       
    def nextCh (self):
        if self.ch == self.EOF_CH:
	    return
        elif self.ch == '\n':
            self.column = 1
            self.line += 1
        else:
            self.column += 1

        self.ch = self.readCh()
        if (self.oldch == '\r') and (self.ch == '\n'):
            self.oldch = self.readCh()
        else:
            self.oldch = self.ch

	if (self.oldch == '\r'):
		self.ch = '\n'
	else:
		self.ch = self.oldch
            
    def readCh (self):
        c = os.read(self.in_stream, 1);
        if not c:
            return self.EOF_CH
        return c
        
    def representation (self):
	if self.token in [Tokens.IDENT, Tokens.NUMBER, Tokens.STRING]:
		return "%s(\"%s\")" % (self.token, self.chars)
	else:
		return "%s" % self.token

if __name__ == '__main__':
    if len(sys.argv) != 2:
           print "usage: ScannerTest.py <file.zwei>"
           sys.exit(1)

    fin = os.open(sys.argv [1],os.O_RDONLY)

    scanner = Scanner(fin)

    while scanner.token != Tokens.EOF:
           print scanner.representation()
           scanner.nextToken()

    os.close(fin)
    import Enum