This module executes the string matching between an input sequence T and a pattern P using a Finite State Machine. The complexity for building the transition function is O(m^3 x |A|) where A is the alphabet. Since the string matching function scan the input sequence only once, the total complexity is O(n + m^3 x |A|)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | """
This module executes the string matching between a input sequence T and an
pattern P using a Finite State Machine.
The complexity for building the transition function is O(m^3 x |A|) where A is the
alphabet. Since the string matching function scan the input sequence only once,
the total complexity is O(n + m^3 x |A|)
@author Filippo Squillace
@version 1.0.0
@date 07/06/2012
"""
def string_matching_FSM(T, trans, m):
"""
T: is the input sequence;
trans: is the transition function that define the pattern P we need to look
for;
m: lenght of the pattern
"""
s = 0
for i,c in enumerate(T):
s = trans[s][c]
if s == m:
return i-m+1
return -1
import string as st
def transition_function(P):
"""
The main principle on building the transition function is to think about
the fact that every time we scan a new character from the input sequence
the suffix should match with the prefix of the pattern. If that is not
possible for every length of the suffix, the next state need to be the
initial, otherwise the length of the suffix that matches properly will be
exactly the next state.
"""
alphabet = st.ascii_letters+st.punctuation+st.digits+st.whitespace
m = len(P)
trans = [{c:0 for c in alphabet} for i in range(m)]
for s in range(m):
for c in alphabet:
k = min(m, s+1)
while (P[:s]+c)[-k:] != P[:k]:
k-=1
trans[s][c]=k
return trans
if __name__=='__main__':
import unittest
class StringMatchTestCase(unittest.TestCase):
def setUp(self):
# Table of (sequence,pattern,expected_result)
self.pos_cases = [\
('abcbbaanmdiababcdrttf','ababcd',11),
('abcbbaanmdiabafweefabab','abab',19),
('abcbbaanmdiasfo pfj=pewpfiojafaXre8abbafw_ eefabab','aXre8ab',30)
]
self.neg_cases = [\
('abcbbaanmdiabcdrttf','ababcd',-1),
('abcbbaanmdiabafweefaba','abab',-1),
('abcbb_?aaFSRnmfew345sdhfhhuw.fad iabafweefaba','abab',-1)
]
def test_positive(self):
for (T,P,er) in self.pos_cases:
trans = transition_function(P)
res = string_matching_FSM(T, trans, len(P))
self.assertEqual(res, er)
def test_negative(self):
for (T,P,er) in self.neg_cases:
trans = transition_function(P)
res = string_matching_FSM(T, trans, len(P))
self.assertEqual(res, er)
unittest.main()
|