# scanner for the zwei compiler # # (c) 2005 Andreas Jaggi, Michael Karlen # # GNU Public License http://opensource.org/licenses/gpl-license.php from Tokens import Tokens import sys import string import os class Scanner: def __init__ (self, in_stream): self.token = Tokens.BAD self.start = (0,0) # row, col self.chars = "" self.buf = [] self.ch = ' ' self.oldch = "" self.line = 1 self.column = 0 self.in_stream = in_stream self.EOF_CH = None self.nextCh () self.nextToken () keywords = { 'true' : Tokens.TRUE, 'false' : Tokens.FALSE, 'this' : Tokens.THIS, 'Null' : Tokens.NULLTYPE, 'null' : Tokens.NULLFACTOR, 'readInt' : Tokens.READINT, 'readChar' : Tokens.READCHAR, 'printInt' : Tokens.PRINTINT, 'printChar' : Tokens.PRINTCHAR, 'val' : Tokens.VAL, 'Int' : Tokens.INT, 'def' : Tokens.DEF, 'set' : Tokens.SET, 'var' : Tokens.VAR, 'class' : Tokens.CLASS, 'new' : Tokens.NEW, 'extends' : Tokens.EXTENDS, 'and' : Tokens.AND, 'or' : Tokens.OR, 'if' : Tokens.IF, 'else' : Tokens.ELSE, 'do' : Tokens.DO, 'while' : Tokens.WHILE, } def nextToken (self): self.buf = [] self.start = (self.line, self.column) while (self.ch in [' ','\n', '\r', '\t','/']):#'/'+string.whitespace): if self.ch == '/': self.nextCh(); if self.ch == '/': #comment while (self.ch != '\n' and self.ch != '\r' and self.ch != self.EOF_CH): self.nextCh() else: #divison self.token = Tokens.DIV return else: #whitespace self.nextCh() self.token = self.readToken() if self.token == Tokens.BAD: if self.ch: print "error: parse error before '"+self.ch+"' token" else: print "error: parse error" sys.exit(1) def readToken (self): if self.ch == self.EOF_CH: return Tokens.EOF if (self.ch in string.letters): while((self.ch in string.letters) or (self.ch in string.digits) or (self.ch == '_')): self.buf.append(self.ch); self.nextCh(); self.chars = ''.join(self.buf); return self.keywords.get(self.chars, Tokens.IDENT) if (self.ch in string.digits): if (self.ch == '0'): self.buf.append (self.ch); self.nextCh(); else: while(self.ch in string.digits): self.buf.append(self.ch); self.nextCh(); self.chars = ''.join(self.buf); return Tokens.NUMBER; x = { '(' : Tokens.LPAREN, ')' : Tokens.RPAREN, '{' : Tokens.LACCOLADE, '}' : Tokens.RACCOLADE, '-' : Tokens.SUB, '+' : Tokens.ADD, '*' : Tokens.MUL, '%' : Tokens.MOD, '/' : Tokens.DIV, ';' : Tokens.SEMICOLON, ',' : Tokens.PERIOD, '.' : Tokens.DOT, ':' : Tokens.COLON }.get(self.ch, None) if x: self.nextCh(); return x; if self.ch == '!' : self.nextCh(); if (self.ch == '='): self.nextCh(); return Tokens.NE; else: return Tokens.NOT; if self.ch == '=' : self.nextCh(); if (self.ch == '='): self.nextCh(); return Tokens.EQ; else: return Tokens.EQUALS; if self.ch == '<' : self.nextCh(); if (self.ch == '='): self.nextCh(); return Tokens.LE; else: return Tokens.LT; if self.ch == '>' : self.nextCh(); if (self.ch == '='): self.nextCh(); return Tokens.GE; else: return Tokens.GT; if self.ch == '&' : self.nextCh(); if (self.ch == '&'): self.nextCh(); return Tokens.AND; else: return Tokens.BAD; if self.ch == '|' : self.nextCh(); if (self.ch == '|'): self.nextCh(); return Tokens.OR; else: return Tokens.BAD; if self.ch == '"' : self.nextCh() while(self.ch!='"' and self.ch!="\n" and self.ch!="\r" and self.ch!=self.EOF_CH): self.buf.append(self.ch); self.nextCh(); if (self.ch=='"'): self.chars = ''.join(self.buf); self.nextCh(); return Tokens.STRING; else: self.nextCh(); return Tokens.BAD; self.nextCh(); return Tokens.BAD def nextCh (self): if self.ch == self.EOF_CH: return elif self.ch == '\n': self.column = 1 self.line += 1 else: self.column += 1 self.ch = self.readCh() if (self.oldch == '\r') and (self.ch == '\n'): self.oldch = self.readCh() else: self.oldch = self.ch if (self.oldch == '\r'): self.ch = '\n' else: self.ch = self.oldch def readCh (self): c = os.read(self.in_stream, 1); if not c: return self.EOF_CH return c def representation (self): if self.token in [Tokens.IDENT, Tokens.NUMBER, Tokens.STRING]: return "%s(\"%s\")" % (self.token, self.chars) else: return "%s" % self.token if __name__ == '__main__': if len(sys.argv) != 2: print "usage: ScannerTest.py <file.zwei>" sys.exit(1) fin = os.open(sys.argv [1],os.O_RDONLY) scanner = Scanner(fin) while scanner.token != Tokens.EOF: print scanner.representation() scanner.nextToken() os.close(fin) import Enum