import sys
import os
EOF = ""
def get_num():
digits = []
t = getchar()
while t.isdigit():
digits.append(t)
t = getchar()
if t != EOF:
ungetc(t)
num = int("".join(digits))
return num
def get_word():
chars = []
t = getchar()
while t.isalnum():
chars.append(t)
t = getchar()
if t != EOF:
ungetc(t)
s = "".join(chars)
return s
class DragonException(Exception):
pass
class Tag(object):
NONE = -1
NUM = 256
DIV = 257
MOD = 258
ID = 259
DONE = 260
class Token(object):
def __init__(self, t, value=''):
self.tag = t
self.value = value
def __str__(self):
return "Token([%s] [%s])" % (str(self.tag), str(self.value))
class Num(Token):
def __init__(self, v):
Token.__init__(self, Tag.NUM, v)
class Word(Token):
def __init__(self, tag, lexeme):
Token.__init__(self, tag, lexeme)
class Lexer(object):
def __init__(self):
self.lineno = 0
self.lookahead = None
self.out = []
self.sym_table = {} # symbol table: lexeme -> token tag
self.insert("div", Tag.DIV)
self.insert("mod", Tag.MOD)
def lookup(self, key):
return self.sym_table.get(key, None)
def insert(self, key, tag):
self.sym_table[key] = tag
def parse(self):
self.lookahead = self.lexan()
while self.lookahead.tag != Tag.DONE:
self.expr()
self.match(';')
return self
def lexan(self):
token = None
while not token:
t = getchar()
if t == ' ' or t == '\t':
pass # strip out white space
elif t == '\n':
self.lineno += 1
elif t.isdigit():
ungetc(t)
num = get_num()
token = Num(num)
elif t.isalpha():
ungetc(t)
word = get_word()
tag = self.lookup(word)
if not tag:
tag = Tag.ID
self.insert(word, tag)
token = Word(tag, word)
elif t == EOF:
token = Token(Tag.DONE)
else:
token = Token(t)
return token
def expr(self):
self.term()
while True:
token = self.lookahead
if token.tag == '+' \
or token.tag == '-':
self.match(token.tag)
self.term()
self.emit(token)
else:
break
def term(self):
self.factor()
while True:
token = self.lookahead
if token.tag == '*' \
or token.tag == '/' \
or token.tag == Tag.DIV \
or token.tag == Tag.MOD:
self.match(token.tag)
self.factor()
self.emit(token)
else:
break
def factor(self):
token = self.lookahead
if token.tag == '(':
self.match('(')
self.expr()
self.match(')')
elif token.tag == Tag.NUM:
self.emit(token)
self.match(Tag.NUM)
elif token.tag == Tag.ID:
self.emit(token)
self.match(Tag.ID)
else:
raise DragonException("factor: bad token tag - %s" % token)
def match(self, tag):
if self.lookahead.tag == tag:
self.lookahead = self.lexan()
else:
raise DragonException("match: tag %s doesn't match %s" % (tag, self.lookahead))
def emit(self, token):
tag = token.tag
value = token.value
if tag == '+' or tag == '-' or tag == '*' or tag == '/':
out = "%s" % tag
elif tag == Tag.DIV:
out = "DIV"
elif tag == Tag.MOD:
out = "MOD"
elif tag == Tag.NUM:
out = "%d" % value
elif tag == Tag.ID:
out = "%s" % value
else:
out = "emit: %s" % (token)
self.out.append(out)
print out
def db(self, intro=""):
print "%s %s" % (intro, self.lookahead)
def main():
lex = Lexer().parse()
print lex.out
# python specific
import StringIO
FP = None
def getchar():
if FP:
return FP.read(1)
def ungetc(c=''):
if FP:
FP.seek(-1, os.SEEK_CUR)
SRC = """1 + (2 + 3);
4 * (5 + 6);
7 div (8 - 9);
10 mod (11 + 12);
"""
if __name__ == "__main__":
FP = StringIO.StringIO(SRC)
main()
Diff to Previous Revision
--- revision 1 2010-09-01 00:09:20
+++ revision 2 2010-09-01 14:49:37
@@ -1,5 +1,7 @@
import sys
import os
+
+EOF = ""
def get_num():
digits = []
@@ -7,7 +9,7 @@
while t.isdigit():
digits.append(t)
t = getchar()
- if t != "": # EOF
+ if t != EOF:
ungetc(t)
num = int("".join(digits))
return num
@@ -18,7 +20,7 @@
while t.isalnum():
chars.append(t)
t = getchar()
- if t != "": # EOF
+ if t != EOF:
ungetc(t)
s = "".join(chars)
return s
@@ -55,15 +57,15 @@
self.lineno = 0
self.lookahead = None
self.out = []
- self.sym_table = {} # symbol table: lexeme -> token type
+ self.sym_table = {} # symbol table: lexeme -> token tag
self.insert("div", Tag.DIV)
self.insert("mod", Tag.MOD)
def lookup(self, key):
return self.sym_table.get(key, None)
- def insert(self, key, token_type):
- self.sym_table[key] = token_type
+ def insert(self, key, tag):
+ self.sym_table[key] = tag
def parse(self):
self.lookahead = self.lexan()
@@ -86,12 +88,13 @@
token = Num(num)
elif t.isalpha():
ungetc(t)
- word = get_word()
- if not self.lookup(word):
- self.insert(word, Tag.ID)
- tag = self.lookup(word)
+ word = get_word()
+ tag = self.lookup(word)
+ if not tag:
+ tag = Tag.ID
+ self.insert(word, tag)
token = Word(tag, word)
- elif t == "":
+ elif t == EOF:
token = Token(Tag.DONE)
else:
token = Token(t)
@@ -100,66 +103,67 @@
def expr(self):
self.term()
while True:
- if self.lookahead.tag == '+' \
- or self.lookahead.tag == '-':
- t = self.lookahead
- self.match(self.lookahead.tag)
+ token = self.lookahead
+ if token.tag == '+' \
+ or token.tag == '-':
+ self.match(token.tag)
self.term()
- self.emit(t)
+ self.emit(token)
else:
break
def term(self):
self.factor()
while True:
- if self.lookahead.tag == '*' \
- or self.lookahead.tag == '/' \
- or self.lookahead.tag == Tag.DIV \
- or self.lookahead.tag == Tag.MOD:
- t = self.lookahead
- self.match(self.lookahead.tag)
+ token = self.lookahead
+ if token.tag == '*' \
+ or token.tag == '/' \
+ or token.tag == Tag.DIV \
+ or token.tag == Tag.MOD:
+ self.match(token.tag)
self.factor()
- self.emit(t)
+ self.emit(token)
else:
break
def factor(self):
- if self.lookahead.tag == '(':
+ token = self.lookahead
+ if token.tag == '(':
self.match('(')
self.expr()
self.match(')')
- elif self.lookahead.tag == Tag.NUM:
- self.emit(self.lookahead)
+ elif token.tag == Tag.NUM:
+ self.emit(token)
self.match(Tag.NUM)
- elif self.lookahead.tag == Tag.ID:
- self.emit(self.lookahead)
+ elif token.tag == Tag.ID:
+ self.emit(token)
self.match(Tag.ID)
else:
- raise DragonException("factor: bad token tag - %s" % self.lookahead)
+ raise DragonException("factor: bad token tag - %s" % token)
- def match(self, t):
- if self.lookahead.tag == t:
+ def match(self, tag):
+ if self.lookahead.tag == tag:
self.lookahead = self.lexan()
else:
- raise DragonException("match: tag %s doesn't match %s" % (t, self.lookahead))
+ raise DragonException("match: tag %s doesn't match %s" % (tag, self.lookahead))
def emit(self, token):
- t = token.tag
- tval = token.value
- if t == '+' or t == '-' or t == '*' or t == '/':
- out = "%s" % t
- elif t == Tag.DIV:
+ tag = token.tag
+ value = token.value
+ if tag == '+' or tag == '-' or tag == '*' or tag == '/':
+ out = "%s" % tag
+ elif tag == Tag.DIV:
out = "DIV"
- elif t == Tag.MOD:
+ elif tag == Tag.MOD:
out = "MOD"
- elif t == Tag.NUM:
- out = "%d" % tval
- elif t == Tag.ID:
- out = "%s" % tval
+ elif tag == Tag.NUM:
+ out = "%d" % value
+ elif tag == Tag.ID:
+ out = "%s" % value
else:
out = "emit: %s" % (token)
- self.out.append(out) # for tracing and testing
+ self.out.append(out)
print out
def db(self, intro=""):
@@ -186,6 +190,7 @@
7 div (8 - 9);
10 mod (11 + 12);
"""
+
if __name__ == "__main__":
FP = StringIO.StringIO(SRC)
main()