1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51 | import re
class reop:
"""supporting class for representing re operators"""
def __init__(self,x):
self.value = x
def buildmap(inx):
"""builds the map from the symbol set above, every symbol as a unicode char"""
import itertools
from collections import defaultdict
d = defaultdict(itertools.count().next)
for x in inx:
d[x]
return d
def buildimap(inmap):
"""builds the inverse map"""
return dict([(y,x) for x,y in inmap.iteritems()])
def buildseq(inmap,inseq,xdef=None):
"""given a sequence and the mapping returns the encoding"""
if xdef is not None:
xdef = inmap[xdef]
if len(inmap) < 254:
r = "".join([chr(inmap.get(x,xdef)) for x in inseq])
else:
r = u"".join([unichr(inmap.get(x,xdef)) for x in inseq])
else:
if len(inmap)+len(inseq) < 254:
r = "".join([chr(inmap[x]) for x in inseq])
else:
r = u"".join([unichr(inmap[x]) for x in inseq])
return r
def compile(inmap,gregexp):
"""given a mapping dictionary and a generic regular expression returns it compiled"""
return re.compile(u"".join([isinstance(x,reop) and x.value or u"\\"+unichr(inmap[x]) for x in gregexp]))
def unmap(inmapr,encoded):
return [inmapr[ord(x)] for x in encoded]
if __name__ == "__main__":
x = ["hello","world","view","around","*"]
map1 = buildmap(x)
rex = compile(map1,("look",reop(".*?"),"world"))
es = buildseq(map1,"when I look the world what can I look at you in the world".split(" "))
print "encoded is ",es,len(es),type(es)
print unmap(map1,es)
print "go!"
map1i = buildimap(map1)
for m in rex.findall(es):
print unmap(map1i,m)
|
Sign in to comment