Welcome, guest | Sign In | My Account | Store | Cart
import sys
import os

EOF = ""

def get_num():
    digits = []
    t = getchar()
    while t.isdigit():
        digits.append(t)
        t = getchar()
    if t != EOF: 
        ungetc(t)
    num = int("".join(digits))
    return num

def get_word():
    chars = []
    t = getchar()
    while t.isalnum():
        chars.append(t)
        t = getchar()
    if t != EOF: 
        ungetc(t)
    s = "".join(chars)
    return s

class DragonException(Exception):
    pass

class Tag(object):
    NONE = -1
    NUM = 256
    DIV = 257
    MOD = 258
    ID = 259
    DONE = 260    
    
class Token(object):
    def __init__(self, t, value=''):
        self.tag = t
        self.value = value
        
    def __str__(self):
        return "Token([%s] [%s])" % (str(self.tag), str(self.value))
    
class Num(Token):
    def __init__(self, v):
        Token.__init__(self, Tag.NUM, v)
      
class Word(Token):
    def __init__(self, tag, lexeme):
        Token.__init__(self, tag, lexeme)

class Lexer(object):
    def __init__(self):
        self.lineno = 0
        self.lookahead = None
        self.out = []
        self.sym_table = {}  # symbol table: lexeme -> token tag
        self.insert("div", Tag.DIV)
        self.insert("mod", Tag.MOD)
            
    def lookup(self, key):
        return self.sym_table.get(key, None)
       
    def insert(self, key, tag):
        self.sym_table[key] = tag
       
    def parse(self):
        self.lookahead = self.lexan()
        while self.lookahead.tag != Tag.DONE:
            self.expr()
            self.match(';')
        return self

    def lexan(self):
        token = None
        while not token:
            t = getchar()
            if t == ' ' or  t == '\t':
                pass # strip out white space
            elif t == '\n':
                self.lineno += 1
            elif t.isdigit():
                ungetc(t)
                num = get_num()
                token = Num(num)       
            elif t.isalpha():
                ungetc(t)
                word = get_word()
                tag = self.lookup(word) 
                if not tag:
                    tag = Tag.ID
                    self.insert(word, tag)
                token = Word(tag, word)
            elif t == EOF:
                token = Token(Tag.DONE)
            else:
                token = Token(t)
        return token
            
    def expr(self):
        self.term()
        while True:
            token = self.lookahead
            if token.tag == '+' \
                    or token.tag == '-':
                self.match(token.tag)
                self.term()
                self.emit(token)
            else:
                break
            
    def term(self):
        self.factor()
        while True:
            token = self.lookahead
            if token.tag == '*' \
                    or token.tag == '/' \
                    or token.tag == Tag.DIV \
                    or token.tag == Tag.MOD:
                self.match(token.tag)
                self.factor()
                self.emit(token)
            else:
                break
            
    def factor(self):
        token = self.lookahead
        if token.tag == '(':
            self.match('(')
            self.expr()
            self.match(')')
        elif token.tag == Tag.NUM:
            self.emit(token)
            self.match(Tag.NUM)
        elif token.tag == Tag.ID:
            self.emit(token)
            self.match(Tag.ID)
        else:
            raise DragonException("factor: bad token tag - %s" % token)
        
    def match(self, tag):
        if self.lookahead.tag == tag:
            self.lookahead = self.lexan()
        else:
            raise DragonException("match: tag %s doesn't match %s" % (tag, self.lookahead))

    def emit(self, token):
        tag = token.tag
        value = token.value
        if tag == '+' or tag == '-' or tag == '*' or tag == '/':
            out = "%s" % tag
        elif tag == Tag.DIV:
            out = "DIV"
        elif tag == Tag.MOD:
            out = "MOD"
        elif tag == Tag.NUM:
            out = "%d" % value
        elif tag == Tag.ID:
            out = "%s" % value
        else:
            out = "emit: %s" % (token)
            
        self.out.append(out)
        print out
            
    def db(self, intro=""):
        print "%s %s" % (intro,  self.lookahead)
        
def main():
    lex = Lexer().parse()
    print lex.out

# python specific
import StringIO

FP = None
def getchar():
    if FP:
        return FP.read(1)
    
def ungetc(c=''):
    if FP:
        FP.seek(-1, os.SEEK_CUR)    
        
SRC = """1 + (2 + 3);
4 * (5 + 6);
7 div (8 - 9);
10 mod (11 + 12);
"""

if __name__ == "__main__":
    FP = StringIO.StringIO(SRC)
    main()

Diff to Previous Revision

--- revision 1 2010-09-01 00:09:20
+++ revision 2 2010-09-01 14:49:37
@@ -1,5 +1,7 @@
 import sys
 import os
+
+EOF = ""
 
 def get_num():
     digits = []
@@ -7,7 +9,7 @@
     while t.isdigit():
         digits.append(t)
         t = getchar()
-    if t != "": # EOF
+    if t != EOF: 
         ungetc(t)
     num = int("".join(digits))
     return num
@@ -18,7 +20,7 @@
     while t.isalnum():
         chars.append(t)
         t = getchar()
-    if t != "": # EOF
+    if t != EOF: 
         ungetc(t)
     s = "".join(chars)
     return s
@@ -55,15 +57,15 @@
         self.lineno = 0
         self.lookahead = None
         self.out = []
-        self.sym_table = {}  # symbol table: lexeme -> token type
+        self.sym_table = {}  # symbol table: lexeme -> token tag
         self.insert("div", Tag.DIV)
         self.insert("mod", Tag.MOD)
             
     def lookup(self, key):
         return self.sym_table.get(key, None)
        
-    def insert(self, key, token_type):
-        self.sym_table[key] = token_type
+    def insert(self, key, tag):
+        self.sym_table[key] = tag
        
     def parse(self):
         self.lookahead = self.lexan()
@@ -86,12 +88,13 @@
                 token = Num(num)       
             elif t.isalpha():
                 ungetc(t)
-                word = get_word()      
-                if not self.lookup(word):
-                    self.insert(word, Tag.ID)
-                tag = self.lookup(word)
+                word = get_word()
+                tag = self.lookup(word) 
+                if not tag:
+                    tag = Tag.ID
+                    self.insert(word, tag)
                 token = Word(tag, word)
-            elif t == "":
+            elif t == EOF:
                 token = Token(Tag.DONE)
             else:
                 token = Token(t)
@@ -100,66 +103,67 @@
     def expr(self):
         self.term()
         while True:
-            if self.lookahead.tag == '+' \
-                    or self.lookahead.tag == '-':
-                t = self.lookahead
-                self.match(self.lookahead.tag)
+            token = self.lookahead
+            if token.tag == '+' \
+                    or token.tag == '-':
+                self.match(token.tag)
                 self.term()
-                self.emit(t)
+                self.emit(token)
             else:
                 break
             
     def term(self):
         self.factor()
         while True:
-            if self.lookahead.tag == '*' \
-                    or self.lookahead.tag == '/' \
-                    or self.lookahead.tag == Tag.DIV \
-                    or self.lookahead.tag == Tag.MOD:
-                t = self.lookahead
-                self.match(self.lookahead.tag)
+            token = self.lookahead
+            if token.tag == '*' \
+                    or token.tag == '/' \
+                    or token.tag == Tag.DIV \
+                    or token.tag == Tag.MOD:
+                self.match(token.tag)
                 self.factor()
-                self.emit(t)
+                self.emit(token)
             else:
                 break
             
     def factor(self):
-        if self.lookahead.tag == '(':
+        token = self.lookahead
+        if token.tag == '(':
             self.match('(')
             self.expr()
             self.match(')')
-        elif self.lookahead.tag == Tag.NUM:
-            self.emit(self.lookahead)
+        elif token.tag == Tag.NUM:
+            self.emit(token)
             self.match(Tag.NUM)
-        elif self.lookahead.tag == Tag.ID:
-            self.emit(self.lookahead)
+        elif token.tag == Tag.ID:
+            self.emit(token)
             self.match(Tag.ID)
         else:
-            raise DragonException("factor: bad token tag - %s" % self.lookahead)
+            raise DragonException("factor: bad token tag - %s" % token)
         
-    def match(self, t):
-        if self.lookahead.tag == t:
+    def match(self, tag):
+        if self.lookahead.tag == tag:
             self.lookahead = self.lexan()
         else:
-            raise DragonException("match: tag %s doesn't match %s" % (t, self.lookahead))
+            raise DragonException("match: tag %s doesn't match %s" % (tag, self.lookahead))
 
     def emit(self, token):
-        t = token.tag
-        tval = token.value
-        if t == '+' or t == '-' or t == '*' or t == '/':
-            out = "%s" % t
-        elif t == Tag.DIV:
+        tag = token.tag
+        value = token.value
+        if tag == '+' or tag == '-' or tag == '*' or tag == '/':
+            out = "%s" % tag
+        elif tag == Tag.DIV:
             out = "DIV"
-        elif t == Tag.MOD:
+        elif tag == Tag.MOD:
             out = "MOD"
-        elif t == Tag.NUM:
-            out = "%d" % tval
-        elif t == Tag.ID:
-            out = "%s" % tval
+        elif tag == Tag.NUM:
+            out = "%d" % value
+        elif tag == Tag.ID:
+            out = "%s" % value
         else:
             out = "emit: %s" % (token)
             
-        self.out.append(out) # for tracing and testing
+        self.out.append(out)
         print out
             
     def db(self, intro=""):
@@ -186,6 +190,7 @@
 7 div (8 - 9);
 10 mod (11 + 12);
 """
+
 if __name__ == "__main__":
     FP = StringIO.StringIO(SRC)
     main()

History