Sometimes one needs a quick and dirty solution for parsing and generating xml. This recipe uses only the python parser itself for the parsing of xml. xml code is translated to valid python code and then evaluated. The generated objects can then be manipluated within python itself and treated as regular python objects.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 | # a quick and dirty xml module for parsing and generating xml/html
#
# this is a very poor man's xml parser
# it uses the python syntax parser for parsing xml code
# and defines a tag class called T. xml code is first translated to
# valid python code and then evaluated. works also under jython.
#
# (c) f.jamitzky 2006
class T:
def __init__(self,name,args=[]):
arglist=name.split(" ")
self._name=arglist[0]
self._kw={}
self._args=args
if len(arglist)>1:
kw={}
for i in arglist[1:]:
key, val= i.split("=")
kw[key]=val
self._kw=kw
def __len__(self):
return len(self._args)
def __str__(self):
if self._args==[]:
if self._kw=={}:
txt="<"+self._name+"/>"
else:
txt="<"+self._name
for i in self._kw.keys():
txt+=" "+str(i)+"="+str(self._kw[i])+" "
txt=txt[:-1]+"/>"
else:
if self._kw=={}:
txt="<"+self._name+">"
else:
txt="<"+self._name
for i in self._kw.keys():
txt+=" "+str(i)+"="+str(self._kw[i])+" "
txt=txt[:-1]+">"
for arg in self._args:
txt+=str(arg)
txt+="</"+self._name+">"
return txt
def __repr__(self):
return str(self)
def __getitem__(self,key):
if type(key)==type(0):
return self._args[key]
elif type(key)==type(""):
return self._kw[key]
def __setitem__(self,key,value):
if type(key)==type(0):
if key<len(self._args):
self._args[key]=value
else:
self._args.insert(key,value)
else:
self._kw[key]=value
def keys(self):
return self._kw.keys()
def tags(self):
lst=[]
for i in range(len(self)):
try:
lst.append(self[i]._name)
except:
pass
return lst
def get_tag_by_name(self,strg):
lst=[]
for i in range(len(self)):
try:
if self[i]._name==strg:
lst.append(self[i])
except:
pass
if len(lst)==1:
return lst[0]
else:
return lst
def __getattr__(self,key):
try:
return self.get_tag_by_name(key)
except:
if self.__dict__.has_key(key):
return self.__dict__[key]
else:
raise AttributeError, "Name does not exist '%s.'" % (key)
def append(self,val):
self._args.append(val)
def xml2code(instr):
data=instr.replace("[","<lbracket/>").replace("]","<rbracket/>")
data=data.replace("\n","").replace('"',"'")
data=data.replace("?>","?/>").replace("-->","--/>")
data=data.replace("</","[]endtag[").replace("/>","[]mptytag[")
data=data.replace("<","[]starttag[").replace(">","[]closetag[")
data=data.split("[")
outstr=''
i=-1
lendata=len(data)
while i<lendata-1:
i+=1
x=data[i]
x=x.strip()
if len(x)==0:
continue
if x[0]=="]":
if x[1]=="s":
outstr+='T("'+data[i+1]+'",['
i=i+2
if data[i][0:2]=="]m":
outstr+=']),'
elif x[1]=="e":
outstr+=']),'
i=i+2
else:
outstr+='"'+x+'",'
outstr="T('root',["+outstr+"])"
outstr=outstr.replace(",)",")")
return outstr
def xml(strg):
return eval(xml2code(strg))[0]
print "parsing xml:"
data="""<a><a b='a'><b a='b'/></a><b>/a</b>b<a/><a/></a>"""
print "xml string:"
print data
tt=xml(data)
print "print:"
print tt
print "print tags:"
print tt.tags()
print "get tag 'a':"
print tt.a
print "generating html:"
html=xml("<html><head/><body/></html>")
html.body.append("Hello World from jython")
html.head['title']="Hello World"
print html
print ""
|
Python has a large choice of xml parsers and they all serve their special purposes. But often enough one ends up needing some information from an xml file and uses string manipulation methods on the file in order to extract the desired information. This module is a very rough parser for xml by translating the xml code into valid python code which is then evaluated. The generated object can then be manipulated further by builtin python methods. The object behaves like a dictionary and has some of the standard dictionary methods. e.g. len(x), x[0], x['attribute'], x.keys() and x.tag returns a list of elements from x which have the tagname 'tag'.
Another purpose of this module is the fast and easy generation of valid xml. An example is given at the end of the file which generates a html string with a and tag.