Welcome, guest | Sign In | My Account | Store | Cart
#!/usr/bin/env python

# recordjar.py - Parse a Record-Jar into a list of dictionaries.
# Copyright 2005 Lutz Horn <lutz.horn@gmx.de>
# Licensed unter the same terms as Python.

def parse_jar(flo):
    """Parse a Record-Jar from a file like object into a list of dictionaries.

    This method parses a file like object as described in "The Art of Unix
    Programming" <http://www.faqs.org/docs/artu/ch05s02.html#id2906931>.

    The records are divided by lines containing '%%'. Each record consists of
    one or more lines, each containing a key, a colon, and a value. Whitespace
    around both key and value are ignored.

    >>> import StringIO
    >>> flo = StringIO.StringIO("a:b\\nc:d\\n%%\\nx:y\\n")
    >>> out = parse_jar(flo)
    >>> print out
    [{'a': 'b', 'c': 'd'}, {'x': 'y'}]

    If a record contains a key more than once, the value for this key is a list
    containing the values in their order of occurence.

    >>> flo = StringIO.StringIO("a:b\\nc:d\\n%%\\nx:y\\nx:z\\n")
    >>> out = parse_jar(flo)
    >>> print out
    [{'a': 'b', 'c': 'd'}, {'x': ['y', 'z']}]

    Leading or trailing separator lines ('%%') and lines containing only
    whitespace are ignored.

    >>> flo = StringIO.StringIO("%%\\na:b\\nc:d\\n%%\\n\\nx:y\\nx:z\\n")
    >>> out = parse_jar(flo)
    >>> print out
    [{'a': 'b', 'c': 'd'}, {'x': ['y', 'z']}]
    """
    records = []
    for record in flo.read().split("%%"):
        dict = {}
        for line in [line for line in record.split("\n") if line.strip() != ""]:
            key, value = line.split(":", 1)
            key, value = key.strip(), value.strip()
            try:
                dict[key].append(value)
            except AttributeError:
                dict[key] = [dict[key], value]
            except KeyError:
                dict[key] = value
        if len(dict) > 0:
            records.append(dict)
    return records

def _test():
    import doctest, recordjar
    return doctest.testmod(recordjar)

if __name__ == "__main__":
    _test()

History