This is a way to 'unprettify' xml, making it hard to read, but reducing the size.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | #!/bin/python
# works w/Jython also
import xml.dom.minidom as dom
input_xml = """<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<epp xmlns="urn:ietf:params:xml:ns:epp-1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="urn:ietf:params:xml:ns:epp-1.0 epp-1.0.xsd"
>
<command>
<login>
<clID>username</clID>
<pw>password</pw>
<options>
<version>1.0</version>
<lang>en</lang>
</options>
<svcs>
<objURI>urn:ietf:params:xml:ns:domain-1.0</objURI>
<objURI>urn:ietf:params:xml:ns:host-1.0</objURI>
</svcs>
</login>
<clTRID>ABC-12345-XYZ</clTRID>
</command>
</epp>"""
"""
Simple doctest:
>>> fromprettyxml(input_xml)
<?xml version="1.0" ?><epp xmlns="urn:ietf:params:xml:ns:epp-1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:ietf:params:xml:ns:epp-1.0 epp-1.0.xsd"><command><login><clID>username</clID><pw>password</pw><options><version>1.0</version><lang>en</lang></options><svcs><objURI>urn:ietf:params:xml:ns:domain-1.0</objURI><objURI>urn:ietf:params:xml:ns:host-1.0</objURI></svcs></login><clTRID>ABC-12345-XYZ</clTRID></command></epp>
"""
def fromprettyxml(input_xml): #cool name, but not the opposite of dom.toprettyxml()
_dom = dom.parseString(input_xml)
output_xml = ''.join([line.strip() for line in _dom.toxml().splitlines()])
_dom.unlink()
return output_xml
def _test():
import doctest, stripxml
doctest.testmod(stripxml)
if __name__ == "__main__":
_test()
print fromprettyxml(input_xml)
|
If you're dealing with a bunch of pretty printed XML, the kind that is broken out by newlines and has spaces between elements (indented), and you want to reduce the size of the XML so you don't waste bandwidth by transmitting all those irrelevant bytes, here's a way to strip them out without removing the relevant spaces inside the elements, such as in namespaces.
This code fails horribly if the XML contains CDATA text. It strips the CDATA tags and all whitespace in the text as well.