def _textlist(self, _addtail=False):
'''Returns a list of text strings contained within an element and its sub-elements.
Helpful for extracting text from prose-oriented XML (such as XHTML or DocBook).
'''
result = []
if self.text is not None:
result.append(self.text)
for elem in self:
result.extend(elem.textlist(True))
if _addtail and self.tail is not None:
result.append(self.tail)
return result
# inject the new method into the ElementTree framework
from xml.etree.ElementTree import _Element
_Element.textlist = _textlist
## ---------- Sample calls -----------
from xml.etree.ElementTree import XML
from textwrap import fill
xhmtl_fragment = XML('''
- XHTML documents start with an <html> tag - there is no such thing as an <xhtml> tag.
- It is required that you declare the XHTML namespace inside the opening <html> tag.
- This XHTML example covered the use of XHTML transitional - for XHTML strict or frameset, use the appropriate
DOCTYPE Declaration.
- Remember that declaring a DOCTYPE with a valid identifier at the top of an XHTML page puts most browers
in standards mode- so remember not to use old browser hacks, and non-standard tags. (Otherwise, use just use regular HTML)
- For some browsers, including Microsoft Internet Explorer 6, if you start an XHTML page with the XML declaration,
the browser goes into quirks mode, an unfortunate bug. The workaround is to delete the optional
declaration and declare the the encoding using a meta tag.
- The DOCTYPE declaration has to be in all uppercase characters, just like in the XHTML example code.
''')
print fill(''.join(xhmtl_fragment.textlist()))
docbook_fragment = XML('''
History of Computer Programming
Chapter 1 -- Evolution
In the beginning, there was machine language. Then, arose assember.
From those humble beginnings, a thousand languages were born.
Chapter 2 -- Consolidation
Eventually, all designs converged on variants on LISP.
''')
print '\n'.join(map(fill, docbook_fragment.textlist()))
## ---------- Sample output -----------
'''
XHTML documents start with an tag - there is no such thing as
an tag. It is required that you declare the XHTML namespace
inside the opening tag. This XHTML example covered the use of
XHTML transitional - for XHTML strict or frameset, use the appropriate
DOCTYPE Declaration. Remember that declaring a DOCTYPE with a valid
identifier at the top of an XHTML page puts most browers in standards
mode- so remember not to use old browser hacks, and non-standard tags.
(Otherwise, use just use regular HTML) For some browsers, including
Microsoft Internet Explorer 6, if you start an XHTML page with the XML
declaration, the browser goes into quirks mode, an unfortunate bug.
The workaround is to delete the optional declaration and declare the
the encoding using a meta tag. The DOCTYPE declaration has to be in
all uppercase characters, just like in the XHTML example code.
History of Computer Programming
Chapter 1 -- Evolution
In the beginning, there was machine language. Then, arose assember.
From those humble beginnings, a thousand languages were born.
Chapter 2 -- Consolidation
Eventually, all designs converged on variants on LISP.
'''