Welcome, guest | Sign In | My Account | Store | Cart
#!/usr/bin/python

# Import
import xml.etree.ElementTree as ET
import sys


def removeNS(tag) :
    if tag.find('}') == -1 :
        return tag
    else:
        return tag.split('}', 1)[1]

def linearize(el, path) :

    # Print text value if not empty
    text = el.text.strip()
    if text == "" :
        print path  
    else :

        # Several lines ?
        lines = text.splitlines()
        if len(lines) > 1 :
            lineNb=1
            for line in lines :
                print path + "[line %d]=%s " % (lineNb, line)
                lineNb += 1
        else :
            print path + "=" + text
    

    # Print attributes
    for name, val in el.items() :
        print path + "/@" + removeNS(name) + "=" + val

    # Counter on the sibbling element names
    counters = {}

    # Loop on child elements
    for childEl in el :

        # Remove namespace
        tag = removeNS(childEl.tag)

        # Tag name already encountered ?
        if counters.has_key(tag) :
            counters[tag] += 1
            # Number it
            numberedTag = tag + "[" + str(counters[tag]) + "]"
        else :
            counters[tag] = 1
            numberedTag = tag

        # Print child node recursively
        linearize(childEl, path + '/' + numberedTag)

# Main 
def process(stream, prefix) :

    # Parse the XML
    tree = ET.parse(stream)

    # Get root element
    root = tree.getroot()

    # Linearize
    linearize(root, prefix + "//" + removeNS(root.tag))


# Each argument is a file
args = sys.argv[1:]

# Loop on files
for filename in args :

    # Open the file
    file = open(filename)
    
    # If we process several files, prefix each one with its path
    if len(args) > 1 :
        prefix = filename + ":"
    else:
        prefix = ""

    # Process it
    process(file, prefix)

# No input file ? => Proces std input
if len(args) == 0 :
    process(sys.stdin, "") 

History