Welcome, guest | Sign In | My Account | Store | Cart

Sometimes you want to pass XML document as unicode object which later should be encoded for output. Unfortunately very often you don't know the output encoding and can't set XML declaration properly. UnicodeXML adds XML declaration right on encoding operation.

Python, 43 lines
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from sys import getdefaultencoding
from xml.sax.saxutils import quoteattr


class UnicodeXML(unicode):
    r"""Version of the unicode class which adds XML declaration on encoding.

    >>> xml = UnicodeXML("<root>Root</root>")
    >>> print xml.encode("windows-1251")
    <?xml version="1.0" encoding="windows-1251"?>
    <root>Root</root>
    >>> print xml.encode("utf-8")
    <?xml version="1.0" encoding="utf-8"?>
    <root>Root</root>

    If XML declaration already present it will be removed:

    >>> xml = UnicodeXML(
    ...     '<?xml version="1.0" encoding="utf-8"?>\n<root>Root</root>')
    >>> print xml.encode("windows-1251")
    <?xml version="1.0" encoding="windows-1251"?>
    <root>Root</root>
    """

    def encode(self, *args):
        if len(args) > 2:
            raise TypeError("too much arguments for encode()")
        elif not args:
            encoding = getdefaultencoding()
        else:
            encoding = args[0]

        if not self.startswith("<?xml"):
            body = self
        else:
            try:
                i = self.index("?>")
            except ValueError:
                raise ValueError("unproper XML declaration")
            body = self[i + 2:].lstrip()

        decl = '<?xml version="1.0" encoding=%s?>\n' % quoteattr(encoding)
        return decl + unicode(body).encode(*args)