diff --git a/sources/zweic/Scanner.py b/sources/zweic/Scanner.py new file mode 100755 index 0000000..aae225d --- /dev/null +++ b/sources/zweic/Scanner.py @@ -0,0 +1,252 @@ +# scanner for the zwei compiler +# +# (c) 2005 Andreas Jaggi, Michael Karlen +# +# GNU Public License http://opensource.org/licenses/gpl-license.php + +from Tokens import Tokens +import sys +import string +import os + +class Scanner: + def __init__ (self, in_stream): + self.token = Tokens.BAD + self.start = (0,0) # row, col + self.chars = "" + self.buf = [] + self.ch = ' ' + + self.oldch = "" + + self.line = 1 + self.column = 0 + + self.in_stream = in_stream + self.EOF_CH = None + + + + self.nextCh () + self.nextToken () + + + + keywords = { + 'true' : Tokens.TRUE, + 'false' : Tokens.FALSE, + 'this' : Tokens.THIS, + 'Null' : Tokens.NULLTYPE, + 'null' : Tokens.NULLFACTOR, + + 'readInt' : Tokens.READINT, + 'readChar' : Tokens.READCHAR, + 'printInt' : Tokens.PRINTINT, + 'printChar' : Tokens.PRINTCHAR, + + 'val' : Tokens.VAL, + 'Int' : Tokens.INT, + 'def' : Tokens.DEF, + 'set' : Tokens.SET, + 'var' : Tokens.VAR, + 'class' : Tokens.CLASS, + 'new' : Tokens.NEW, + 'extends' : Tokens.EXTENDS, + + 'and' : Tokens.AND, + 'or' : Tokens.OR, + 'if' : Tokens.IF, + 'else' : Tokens.ELSE, + 'do' : Tokens.DO, + 'while' : Tokens.WHILE, + } + + def nextToken (self): + self.buf = [] + self.start = (self.line, self.column) + + while (self.ch in [' ','\n', '\r', '\t','/']):#'/'+string.whitespace): + if self.ch == '/': + self.nextCh(); + if self.ch == '/': #comment + while (self.ch != '\n' and self.ch != '\r' and self.ch != self.EOF_CH): + self.nextCh() + else: #divison + self.token = Tokens.DIV + return + else: #whitespace + self.nextCh() + + self.token = self.readToken() + if self.token == Tokens.BAD: + if self.ch: + print "error: parse error before '"+self.ch+"' token" + else: + print "error: parse error" + sys.exit(1) + + def readToken (self): + if self.ch == self.EOF_CH: + return Tokens.EOF + + if (self.ch in string.letters): + while((self.ch in string.letters) or (self.ch in string.digits) or (self.ch == '_')): + self.buf.append(self.ch); + self.nextCh(); + + self.chars = ''.join(self.buf); + + return self.keywords.get(self.chars, Tokens.IDENT) + + if (self.ch in string.digits): + if (self.ch == '0'): + self.buf.append (self.ch); + self.nextCh(); + else: + while(self.ch in string.digits): + self.buf.append(self.ch); + self.nextCh(); + + self.chars = ''.join(self.buf); + return Tokens.NUMBER; + + x = { + '(' : Tokens.LPAREN, + ')' : Tokens.RPAREN, + '{' : Tokens.LACCOLADE, + '}' : Tokens.RACCOLADE, + + '-' : Tokens.SUB, + '+' : Tokens.ADD, + '*' : Tokens.MUL, + '%' : Tokens.MOD, + '/' : Tokens.DIV, + + ';' : Tokens.SEMICOLON, + ',' : Tokens.PERIOD, + + '.' : Tokens.DOT, + ':' : Tokens.COLON + }.get(self.ch, None) + + if x: + self.nextCh(); + return x; + + + if self.ch == '!' : + self.nextCh(); + if (self.ch == '='): + self.nextCh(); + return Tokens.NE; + else: + return Tokens.NOT; + + if self.ch == '=' : + self.nextCh(); + if (self.ch == '='): + self.nextCh(); + return Tokens.EQ; + else: + return Tokens.EQUALS; + + if self.ch == '<' : + self.nextCh(); + if (self.ch == '='): + self.nextCh(); + return Tokens.LE; + else: + return Tokens.LT; + + if self.ch == '>' : + self.nextCh(); + if (self.ch == '='): + self.nextCh(); + return Tokens.GE; + else: + return Tokens.GT; + + if self.ch == '&' : + self.nextCh(); + if (self.ch == '&'): + self.nextCh(); + return Tokens.AND; + else: + return Tokens.BAD; + + if self.ch == '|' : + self.nextCh(); + if (self.ch == '|'): + self.nextCh(); + return Tokens.OR; + else: + return Tokens.BAD; + + if self.ch == '"' : + self.nextCh() + + while(self.ch!='"' and self.ch!="\n" and self.ch!="\r" and self.ch!=self.EOF_CH): + self.buf.append(self.ch); + self.nextCh(); + + if (self.ch=='"'): + self.chars = ''.join(self.buf); + self.nextCh(); + return Tokens.STRING; + else: + self.nextCh(); + return Tokens.BAD; + + self.nextCh(); + return Tokens.BAD + + + def nextCh (self): + if self.ch == self.EOF_CH: + return + elif self.ch == '\n': + self.column = 1 + self.line += 1 + else: + self.column += 1 + + self.ch = self.readCh() + if (self.oldch == '\r') and (self.ch == '\n'): + self.oldch = self.readCh() + else: + self.oldch = self.ch + + if (self.oldch == '\r'): + self.ch = '\n' + else: + self.ch = self.oldch + + def readCh (self): + c = os.read(self.in_stream, 1); + if not c: + return self.EOF_CH + return c + + def representation (self): + if self.token in [Tokens.IDENT, Tokens.NUMBER, Tokens.STRING]: + return "%s(\"%s\")" % (self.token, self.chars) + else: + return "%s" % self.token + +if __name__ == '__main__': + if len(sys.argv) != 2: + print "usage: ScannerTest.py " + sys.exit(1) + + fin = os.open(sys.argv [1],os.O_RDONLY) + + scanner = Scanner(fin) + + while scanner.token != Tokens.EOF: + print scanner.representation() + scanner.nextToken() + + os.close(fin) + import Enum + +