Welcome, guest | Sign In | My Account | Store | Cart
# Follow Sets Construction using prioritydictionary and graphs.
# Narayana Chikkam, Dec, 22, 2015.

import collections

from lib.graph import  *

class Grammar:
    prodNum = 0

    def __init__(self, sg):
        self.g = {}
        self.np = {}

        self.processStringsToGrammar(sg)

        self.sigma = self.orderSymbols()
        self.v = list(map(str, self.g.keys()))  # chg#1
        self.t = self.orderTerminals()
        self.prepareNullablesMap()
        self.numberedProds = None

    def processStringsToGrammar(self, sg):

        prods = sg.splitlines()
        if len(prods) > 0:
            self.g = collections.OrderedDict()
            self.np = collections.OrderedDict()

            S = prods[0].split("->")[0].strip()

            self.np[0] = ["%s#"%(S), [S]]    # Adding the start production S'
            index = 1

            for prod in prods:
                (lhs, rhs) = prod.split("->")
                if index not in self.np:
                    self.np[index] = [lhs.strip(), list(rhs.strip().split())]
                    index += 1

            for k in self.np:
                lhs, rhs = self.np[k][0], self.np[k][1]
                if lhs not in self.g:
                    self.g[lhs] = []
                self.g[lhs].append(rhs)


    def orderSymbols(self):
        symbols = []
        for k in self.g:  # order really matters
            if k not in symbols:
                symbols.append(k)
            for prods in self.g[k]:
                for s in prods:
                    if s not in symbols:
                        symbols.append(s)
        return symbols
        """
        ss = [prods for x in self.g.values() for prods in x]
        for prod in ss:
            symbols |= set(prod)
        return symbols
        """

    def orderTerminals(self):
        terminals = []
        for s in self.sigma:
            if s not in self.v:
                terminals.append(str(s))
        return terminals

    # NULLABLES CALCULATION:
    # can be optimized, recursion hits for limit 1000 default in Python!!!
    ######################################################################
    def isNullableRecursively(self, v):
        if self.visited[v] == False:
            self.visited[v] = True
            for prod in self.g[v]:
                #epsilonProd = ['epsilon']
                if 'epsilon' in prod: # Epsilon production
                    self.nullables_map[v] = True
                    break
                else:
                    prodNullableCount = 0
                    for prefix in prod:
                        if prefix in self.t:
                            break
                        if self.isNullableRecursively(prefix):
                            self.nullables_map[prefix] = True
                            prodNullableCount += 1
                        else:
                            break

                    if prodNullableCount == len(prod):
                        self.nullables_map[v] = True
                        break

            if v not in self.nullables_map:
                self.nullables_map[v] = False

            self.visited[v] = False
            return self.nullables_map[v]

    def prepareNullablesMap(self):
        self.visited = {k: False for k in self.v}
        self.nullables_map = {k: False for k in self.v}
        for v in self.v:
            self.nullables_map[v] = self.isNullableRecursively(v)


    def getNullablesMap(self):
        return self.nullables_map

    # FIRST CALCULATION:
    ######################################################################
    """
        find direct contributions and
        contains-the-FIRSTs-of
    """
    def findDirectFirstSets(self, v):
        first = set()
        for prod in self.g[v]:
            count = 0 # variable to check if this v could yeild epsilon because other pre-prefixes
            for prefix in prod:
                if prefix in self.t:
                    first = first.union([prefix])
                    break
                elif self.nullables_map[prefix] == True: # nullable V, go to next
                    self.containsFIRSTsOf.append((v, prefix))
                    count += 1
                    continue
                else:
                    self.containsFIRSTsOf.append((v, prefix))
                    break
            if count == len(prod):
                # all vars could produce nulls so add this v can indirectly produces epsilon
                first = first.union(['epsilon'])
        return first

    def getDirectFirstSets(self):
        self.init_first = {}
        self.containsFIRSTsOf = []
        for v in self.v:
            self.init_first[v] = self.findDirectFirstSets(v)

        return self.init_first

    def FIRST(self):

        initFirst = self.getDirectFirstSets()

        # create Graph withe the edges from containsOF relation
        g = Graph()
        for v in self.v:
            g.addVertex(v)

        for a,b in self.containsFIRSTsOf:
            g.addEdge(a, b, 1)

        #print "whole Graph: ", ffg
        return g.computeFirstUsingSCC(initFirst)

    # asking for first(rhs) of a particular production
    def FIRSTOFRHS(self, rhs):
        first = self.FIRST()
        retFirst = set()

        count = 0 # variable to check if this v could yeild epsilon because other pre-prefixes
        for prefix in rhs:
            if prefix in self.t:
                retFirst = retFirst.union([prefix])
                break
            elif self.nullables_map[prefix] == True: # nullable V, go to next
                retFirst = retFirst.union(first[prefix] - set(['epsilon']))
                count += 1
                continue
            else:
                retFirst = retFirst.union(first[prefix])
                break
        if count == len(rhs):
            # all vars could produce nulls so add this v can indirectly produces epsilon
            retFirst = retFirst.union(['epsilon'])

        return retFirst


    # FOLLOW CALCULATION:
    ######################################################################
    def getDirectFollowSets(self, FIRST):
        """
            1. If $ is the input end-marker, and S is the start symbol, $ belongs to FOLLOW(S).
            2. If there is a production, A -> Alpha B Beta, then FOLLOW(B) contains (FIRST(Beta) - epsilon)
            3. If there is a production, A -> Alpha B, or a production A -> Alpha B Beta, where  epsilon is in FIRST(Beta) , then FOLLOW(B) contains FOLLOW(A).
        """
        self.init_follow = {v:set() for v in self.v }
        self.containsFOLLOWOf = set()
        for v in self.v:
            if v == self.np[0][0]:  # Starting Production
                self.init_follow[v] = set(['$']) # $ is in follow of 'S' applying rule 1
            for prod in self.g[v]:
                for i in range(len(prod)):
                    if prod[i] in self.v and i+1 < len(prod):
                        if prod[i+1] in self.t:
                            self.init_follow[prod[i]] |= set([prod[i+1]])
                        else:
                            t = i + 1
                            while t < len(prod) and prod[t] in self.nullables_map:
                                if self.nullables_map[prod[t]] == True:
                                    self.init_follow[prod[i]] |= FIRST[prod[t]]-set(['epsilon'])
                                else:
                                    self.init_follow[prod[i]] |= FIRST[prod[t]]
                                    break
                                t += 1
                            if t >= len(prod): # every thing on rhs of prod[i] could produce epsison, rule - 3
                                self.containsFOLLOWOf |= set([(prod[i], v)])
                            else: #prod[i+1] is a non nullable prod or prod[t] was a terminal
                                if prod[t] in self.t:
                                    self.init_follow[prod[i]] |= set([prod[t]])
                                else:
                                    self.init_follow[prod[i]] |= FIRST[prod[t]]-set(['epsilon'])

                    elif prod[i] in self.v:
                        self.containsFOLLOWOf |= set([(prod[i], v)])         # applying rule 2

        #self.containsFOLLOWOf = set([(a, b) for (a, b) in self.containsFOLLOWOf if a != b]) # remove the self loops
        return self.init_follow

    def FOLLOW(self):

        first = self.FIRST()

        init_follow = self.getDirectFollowSets(first)  # populates the containsFOLLOWOf set for SCC

        g = Graph()
        for v in self.v:
            g.addVertex(v)

        for a,b in self.containsFOLLOWOf:
            g.addEdge(a, b, 1)  # dummy edge to keep the edges

        return g.computeFollowUsingSCC(first, init_follow)

Diff to Previous Revision

--- revision 1 2015-12-22 10:37:16
+++ revision 2 2015-12-22 10:49:21
@@ -1,3 +1,6 @@
+# Follow Sets Construction using prioritydictionary and graphs.
+# Narayana Chikkam, Dec, 22, 2015.
+
 import collections
 
 from lib.graph import  *

History