#!/usr/bin/python # Import import xml.etree.ElementTree as ET import sys def removeNS(tag) : if tag.find('}') == -1 : return tag else: return tag.split('}', 1)[1] def linearize(el, path) : # Print text value if not empty text = el.text.strip() if text == "" : print path else : # Several lines ? lines = text.splitlines() if len(lines) > 1 : lineNb=1 for line in lines : print path + "[line %d]=%s " % (lineNb, line) lineNb += 1 else : print path + "=" + text # Print attributes for name, val in el.items() : print path + "/@" + removeNS(name) + "=" + val # Counter on the sibbling element names counters = {} # Loop on child elements for childEl in el : # Remove namespace tag = removeNS(childEl.tag) # Tag name already encountered ? if counters.has_key(tag) : counters[tag] += 1 # Number it numberedTag = tag + "[" + str(counters[tag]) + "]" else : counters[tag] = 1 numberedTag = tag # Print child node recursively linearize(childEl, path + '/' + numberedTag) # Main def process(stream, prefix) : # Parse the XML tree = ET.parse(stream) # Get root element root = tree.getroot() # Linearize linearize(root, prefix + "//" + removeNS(root.tag)) # Each argument is a file args = sys.argv[1:] # Loop on files for filename in args : # Open the file file = open(filename) # If we process several files, prefix each one with its path if len(args) > 1 : prefix = filename + ":" else: prefix = "" # Process it process(file, prefix) # No input file ? => Proces std input if len(args) == 0 : process(sys.stdin, "")