Searches nested strings from a line of text. The strings are limited by two different characters.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | from mx.TextTools import *
text = "aa(AA)a((BB))aa((CC)DD)aa(EE(FF))aa(GG(HH(II)JJ)KK)aa"
tables = []
tab = ('start',
(None,Is+LookAhead,'(',+1,'nesting'), # If next character is "(" then recurse
(None,Is,')',+1,MatchOk), # If current character is ")" then stop or return from recursion
(None,AllNotIn,'()',+1,'start'), # Search all characters except "(" and ")"
(None, EOF, Here, MatchOk),
'nesting',
('group',SubTable+AppendMatch,((None,Is,'(',0,+1), # Since we have looked ahead, collect "(" -sign
(None,SubTableInList, (tables,0)))), # Recurse
(None,Jump,To,'start')) # After recursion jump back to 'start'
tables.append(tab) # Add tab to tables
if __name__ == '__main__':
result, taglist, nextindex = tag(text,tab)
print taglist
-----the version below returns strings without limiting characters ----
from mx.TextTools import *
text = "aa(AA)a((BB))aa((CC)DD)aa(EE(FF))aa(GG(HH(II)JJ)KK)aa"
tab = ('start',
(None, Is+LookAhead, ')', +1, MatchOk),
(None, Is, '(', 'letters', +1),
('group', SubTable+AppendMatch, ThisTable),
(None, Skip, 1, MatchFail, 'start'),
'letters',
(None, AllNotIn, '()', +1, 'start'),
(None, EOF, Here, MatchOk))
result,taglist,next = tag(text, tab)
print taglist
|
Modified 28/7/2002: The previous version worked only due to a bug in mx.TextTools 2.1 beta2! Fixed code to work with python 2.2.1 + mx.TextTools 2.1 beta3 (Pekka)
Tags: text