Welcome, guest | Sign In | My Account | Store | Cart
from sys import getdefaultencoding
from xml.sax.saxutils import quoteattr


class UnicodeXML(unicode):
    r"""Version of the unicode class which adds XML declaration on encoding.

    >>> xml = UnicodeXML("<root>Root</root>")
    >>> print xml.encode("windows-1251")
    <?xml version="1.0" encoding="windows-1251"?>
    <root>Root</root>
    >>> print xml.encode("utf-8")
    <?xml version="1.0" encoding="utf-8"?>
    <root>Root</root>

    If XML declaration already present it will be removed:

    >>> xml = UnicodeXML(
    ...     '<?xml version="1.0" encoding="utf-8"?>\n<root>Root</root>')
    >>> print xml.encode("windows-1251")
    <?xml version="1.0" encoding="windows-1251"?>
    <root>Root</root>
    """

    def encode(self, *args):
        if len(args) > 2:
            raise TypeError("too much arguments for encode()")
        elif not args:
            encoding = getdefaultencoding()
        else:
            encoding = args[0]

        if not self.startswith("<?xml"):
            body = self
        else:
            try:
                i = self.index("?>")
            except ValueError:
                raise ValueError("unproper XML declaration")
            body = self[i + 2:].lstrip()

        decl = '<?xml version="1.0" encoding=%s?>\n' % quoteattr(encoding)
        return decl + unicode(body).encode(*args)

History