# scanner for the zwei compiler
#
# (c) 2005 Andreas Jaggi, Michael Karlen
#
# GNU Public License http://opensource.org/licenses/gpl-license.php
from Tokens import Tokens
import sys
import string
import os
class Scanner:
def __init__ (self, in_stream):
self.token = Tokens.BAD
self.start = (0,0) # row, col
self.chars = ""
self.buf = []
self.ch = ' '
self.oldch = ""
self.line = 1
self.column = 0
self.in_stream = in_stream
self.EOF_CH = None
self.nextCh ()
self.nextToken ()
keywords = {
'true' : Tokens.TRUE,
'false' : Tokens.FALSE,
'this' : Tokens.THIS,
'Null' : Tokens.NULLTYPE,
'null' : Tokens.NULLFACTOR,
'readInt' : Tokens.READINT,
'readChar' : Tokens.READCHAR,
'printInt' : Tokens.PRINTINT,
'printChar' : Tokens.PRINTCHAR,
'val' : Tokens.VAL,
'Int' : Tokens.INT,
'def' : Tokens.DEF,
'set' : Tokens.SET,
'var' : Tokens.VAR,
'class' : Tokens.CLASS,
'new' : Tokens.NEW,
'extends' : Tokens.EXTENDS,
'and' : Tokens.AND,
'or' : Tokens.OR,
'if' : Tokens.IF,
'else' : Tokens.ELSE,
'do' : Tokens.DO,
'while' : Tokens.WHILE,
}
def nextToken (self):
self.buf = []
self.start = (self.line, self.column)
while (self.ch in [' ','\n', '\r', '\t','/']):#'/'+string.whitespace):
if self.ch == '/':
self.nextCh();
if self.ch == '/': #comment
while (self.ch != '\n' and self.ch != '\r' and self.ch != self.EOF_CH):
self.nextCh()
else: #divison
self.token = Tokens.DIV
return
else: #whitespace
self.nextCh()
self.token = self.readToken()
if self.token == Tokens.BAD:
if self.ch:
print "error: parse error before '"+self.ch+"' token"
else:
print "error: parse error"
sys.exit(1)
def readToken (self):
if self.ch == self.EOF_CH:
return Tokens.EOF
if (self.ch in string.letters):
while((self.ch in string.letters) or (self.ch in string.digits) or (self.ch == '_')):
self.buf.append(self.ch);
self.nextCh();
self.chars = ''.join(self.buf);
return self.keywords.get(self.chars, Tokens.IDENT)
if (self.ch in string.digits):
if (self.ch == '0'):
self.buf.append (self.ch);
self.nextCh();
else:
while(self.ch in string.digits):
self.buf.append(self.ch);
self.nextCh();
self.chars = ''.join(self.buf);
return Tokens.NUMBER;
x = {
'(' : Tokens.LPAREN,
')' : Tokens.RPAREN,
'{' : Tokens.LACCOLADE,
'}' : Tokens.RACCOLADE,
'-' : Tokens.SUB,
'+' : Tokens.ADD,
'*' : Tokens.MUL,
'%' : Tokens.MOD,
'/' : Tokens.DIV,
';' : Tokens.SEMICOLON,
',' : Tokens.PERIOD,
'.' : Tokens.DOT,
':' : Tokens.COLON
}.get(self.ch, None)
if x:
self.nextCh();
return x;
if self.ch == '!' :
self.nextCh();
if (self.ch == '='):
self.nextCh();
return Tokens.NE;
else:
return Tokens.NOT;
if self.ch == '=' :
self.nextCh();
if (self.ch == '='):
self.nextCh();
return Tokens.EQ;
else:
return Tokens.EQUALS;
if self.ch == '<' :
self.nextCh();
if (self.ch == '='):
self.nextCh();
return Tokens.LE;
else:
return Tokens.LT;
if self.ch == '>' :
self.nextCh();
if (self.ch == '='):
self.nextCh();
return Tokens.GE;
else:
return Tokens.GT;
if self.ch == '&' :
self.nextCh();
if (self.ch == '&'):
self.nextCh();
return Tokens.AND;
else:
return Tokens.BAD;
if self.ch == '|' :
self.nextCh();
if (self.ch == '|'):
self.nextCh();
return Tokens.OR;
else:
return Tokens.BAD;
if self.ch == '"' :
self.nextCh()
while(self.ch!='"' and self.ch!="\n" and self.ch!="\r" and self.ch!=self.EOF_CH):
self.buf.append(self.ch);
self.nextCh();
if (self.ch=='"'):
self.chars = ''.join(self.buf);
self.nextCh();
return Tokens.STRING;
else:
self.nextCh();
return Tokens.BAD;
self.nextCh();
return Tokens.BAD
def nextCh (self):
if self.ch == self.EOF_CH:
return
elif self.ch == '\n':
self.column = 1
self.line += 1
else:
self.column += 1
self.ch = self.readCh()
if (self.oldch == '\r') and (self.ch == '\n'):
self.oldch = self.readCh()
else:
self.oldch = self.ch
if (self.oldch == '\r'):
self.ch = '\n'
else:
self.ch = self.oldch
def readCh (self):
c = os.read(self.in_stream, 1);
if not c:
return self.EOF_CH
return c
def representation (self):
if self.token in [Tokens.IDENT, Tokens.NUMBER, Tokens.STRING]:
return "%s(\"%s\")" % (self.token, self.chars)
else:
return "%s" % self.token
if __name__ == '__main__':
if len(sys.argv) != 2:
print "usage: ScannerTest.py <file.zwei>"
sys.exit(1)
fin = os.open(sys.argv [1],os.O_RDONLY)
scanner = Scanner(fin)
while scanner.token != Tokens.EOF:
print scanner.representation()
scanner.nextToken()
os.close(fin)
import Enum