Yet another reinvention of a Python HTML generation mechanism « Python recipes

The other day I was complaining about writing html, forms, etc., for Python cgi and/or web programming. I had pointed out a selection of three examples, the first of which ended up being very much like Nevow.stan . Thinking a bit about it, I realized that stan had issues in that you couldn't really re-use pre-defined tags with attributes via map, and keyword arguments were just too darn convenient to swap the calling and getitem syntax.

Instead, I hacked together a mechanism that supports: T.tagname("content", T.tagname(...), ..., attr1='value', ...) T.tagname(attr1='value', ...)("content", T.tagname(...), ...) x = T.tagname(attr1='value', ...) y = T.tagname(*map(x, ['content', ...])) ... and many other options.

Essentially, you can mix and match calls as much as you want, with three memory and sanity saving semantics: 1. Creating a new tag object via T.tagname, or any call of such, will create a shallow copy of the object you are accessing. 2. smallred = T.font(size='-1', color='red');bigred = smallred(size='+1') Works exactly the way you expect it to. If it doesn't work the way you expect it to, then your expectations are confused. 3. If you are adding content that sites within the tag, the content is replaced, not updated, like attributes.

This simple version handles auto-indentation of content as necessary (or desireable), auto-escaping of text elements, and includes an (I believe) nearly complete listing of entities which don't require closing tags.

I don't know where this is going, whether it can or will expand into something more, or what, but I believe what I have managed to hack together is better than other similar packages available elsewhere (including this recipe over here http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/366000 , which I discovered after writing my own). Funny how these things work out. Astute observers will note that I borrow nevow.stan's meme of using T.tagname for generating tag objects.

      import sys
import cStringIO

#handle character escaping...
import re
from htmlentitydefs import codepoint2name

character2name = {}
for i,j in codepoint2name.iteritems():
    if i <= 127:
        character2name[chr(i)] = '&%s;'%j
    else:
        character2name[unichr(i)] = '&#%d;'%i
del i;del j;del codepoint2name
escape = re.compile('(%s)'%('|'.join(list(character2name))))

def repl(matchobj):
    return character2name.get(matchobj.group(0), '?')

#handle special tags
no_ends = dict.fromkeys(('br p input img area base basefont col '
                         'frame hr isindex link meta param iframe').split())
no_escape = dict.fromkeys('script raw'.split())
raw = dict.fromkeys('pre'.split())

#the base tag generator

class T(object):
    def __getattr__(self, tagname):
        return tag(tagname)

T = T()

class tag(object):
    __slots__ = ['name', 'attrs', 'contents']
    def __init__(self, name, attrs=None, contents=None):
        self.name = name.lower()
        self.attrs = attrs
        self.contents = contents
    def __call__(self, *args, **kwargs):
        if kwargs and self.attrs:
            d = dict(self.attrs)
            d.update(kwargs)
            kwargs = d
        __klass = kwargs.pop('klass', None)
        if __klass:
            kwargs['class'] = __klass
        if args and kwargs:
            return tag(self.name, kwargs, args)
        elif kwargs:
            return tag(self.name, kwargs, self.contents)
        elif args:
            return tag(self.name, self.attrs, args)
        return self
    def __setitem__(self, key, value):
        if isinstance(key, basestring):
            if self.attrs is None:
                self.attrs = {}
            self.attrs[key] = value
        else:
            raise TypeError('attribute assignments must only be to named attributes')
    def __getitem__(self, key):
        if isinstance(key, (int, long)):
            if not self.contents:
                raise IndexError('tuple index out of range')
            return self.contents[key]
        raise TypeError('content fetch must only be from indexed attributes')
    def render(self, where=None, called=0):
        if where is None:
            x = cStringIO.StringIO()
            self.render(x)
            x.seek(0)
            return x.read()
        if self.name != 'raw':
            if self.attrs:
                x = []
                for key, value in self.attrs.iteritems():
                    x.append("%s='%s'"%(key, value))
                where.write('\n' + called*'  ' + '<%s %s>'%(self.name, ' '.join(x).encode('utf-8')))
            else:
                where.write('\n' + called*'  ' + '<%s>'%self.name)
        x = where.tell()
        if self.contents:
            c2n = character2name
            for i in self.contents:
                if hasattr(i, 'render'):
                    i.render(where, called+1)
                elif self.name in no_escape:
                    where.write(str(i).encode('utf-8'))
                else:
                    st = str(i)
                    chrs = dict.fromkeys(st)
                    for i in chrs:
                        if i in c2n:
                            break
                    else:
                        chrs = None
                    if chrs:
                        #we found something that needs to be translated
                        st = escape.sub(repl, st)
                    where.write(st.encode('utf-8'))
        if self.name != 'raw' and self.name not in no_ends:
            if self.name not in raw and where.tell()-x > 25:
                where.write('\n' + called*'  ' +'</%s>'%self.name)
            else:
                where.write('</%s>'%self.name)
        if not called:
            where.write('\n')
            
'''
>>> print T.html(
...     T.body(
...         "hello world", T.br, "how are you?", T.br,
...         T.table(*[T.tr(*map(T.td, map(str, range(0+i, 3+i)))) for i in xrang
e(3)])
...                 )).render()

<html>
  <body>hello world
    <br>how are you?
    <br>
    <table>
      <tr>
        <td>0</td>
        <td>1</td>
        <td>2</td>
      </tr>
      <tr>
        <td>1</td>
        <td>2</td>
        <td>3</td>
      </tr>
      <tr>
        <td>2</td>
        <td>3</td>
        <td>4</td>
      </tr>
    </table>
  </body>
</html>

>>>
>>> x = T.html(
...     T.body(bgcolor='red')(
...         T.font(size='+1')('Welcome to this wonderful web page!'),
...         T.br, "How are you doing today?",
...         T.br, T.input(type='text', size='25', value='say something')
...         )).render()
>>> print x

<html>
  <body bgcolor='red'>
    <font size='+1'>Welcome to this wonderful web page!
    </font>
    <br>How are you doing today?
    <br>
    <input type='text' value='say something' size='25'>
  </body>
</html>

>>>
>>> print T.html(
...     T.body(
...         T.pre(x))).render()

<html>
  <body>
    <pre>
&lt;html&gt;
  &lt;body bgcolor='red'&gt;
    &lt;font size='+1'&gt;Welcome to this wonderful web page!
    &lt;/font&gt;
    &lt;br&gt;How are you doing today?
    &lt;br&gt;
    &lt;input type='text' value='say something' size='25'&gt;
  &lt;/body&gt;
&lt;/html&gt;
</pre>
  </body>
</html>

>>>
>>> def generate_something():
...     return T.b("I was generated from a function")
...
>>> print T.html(T.body(generate_something())).render()

<html>
  <body>
    <b>I was generated from a function
    </b>
  </body>
</html>

>>>
'''

      

After describing a similar syntax to the above, and seeing Nevow.stan, I took some time and hacked together the above. After finishing, I took a wander through the cookbook and found a few recipes, links, etc., many of whom implement a very similar method, though none really manage to capture multiple calling semantics, and/or the very convenient re-use of pre-attributed tags as I do.

With the use of the non-XHTML tag of 'raw', one can pass through pre-generated html (perhaps embedded ReST -> html, etc.), sets of containers of objects, and various other interesting things. One could even signal to a form processor or somesuch that a particular input needs to be bounds checked on return, etc.

Tags: web

6 comments

Rob Walker 18 years, 5 months ago # | flag

Attributes that are Python keywords. I'd like to use the HTML generated from this recipe with CSS classes. The problem is that you can't do T.tag(class="whatevercssclass") because "class" is a Python keyword and using it this way generates an error. Is there a clean way to get around this?

Boh Heong Yap 18 years, 5 months ago # | flag

to add attribute 'class' add this chunk of code after line 46 of the code:

__klass = kwargs.pop('klass', None)
if __klass:
    kwargs['class'] = __klass

then just do this:

T.option(klass="aclass")( ..some content..)...

spelt with a 'k' so Python does not catch it as a reserved word, and it will output with the correct class="aclass" attribute

Josiah Carlson (author) 18 years, 5 months ago # | flag

I've added this modification to the code. Thank you.

Chris Heller 16 years, 2 months ago # | flag

This is a useful bit of code. I really like this recipe, and I even used it in a internal project of mine that never really went anywhere.

Before I mothball my project I wanted to share back with you my rendition of your code.

I made some small changes to the general code to add some extra name-spacing. I also dropped the need for using tell() to determine if the renderer should wrap, instead relying on counting the number of children in an element and whether those children are themselves tags.

With the dropped dependency on tell(), render() can now be used with sys.stdout passed in as the file object.

I also made some other slight changes to the HTML that is generated to make it more XHTML like (although I do no such verification of this).

Anyway, here is the code. I figure it better to post it in a comment, rather than a recipe (even though comment code formatting sucks), since this is really not a new recipe.

import htmlentitydefs
import re
import cStringIO as StringIO
import urllib

class HTMLEscape(object):
    __lookuptable = None
    __escapefunc = None
    def __init__(self):
        if HTMLEscape.__lookuptable is None:
            HTMLEscape.__lookuptable = {}
            for codepoint,name in htmlentitydefs.codepoint2name.iteritems():
                if codepoint &lt;= 127:
                    HTMLEscape.__lookuptable[chr(codepoint)] = '&amp;%s; ' % name
                else:
                    HTMLEscape.__lookuptable[unichr(codepoint)] = '&amp;#%d; ' % codepoint

        if HTMLEscape.__escapefunc is None:
            HTMLEscape.__escapefunc = re.compile('(%s)' % ('|'.join(list(HTMLEscape.__lookuptable))))

    def escape(self,_encodedhtml):
        _replace = lambda matchobj: HTMLEscape.__lookuptable.get(matchobj.group(0), '?')
        return HTMLEscape.__escapefunc.sub(_replace,_encodedhtml)

    def canescape(self,_char):
        return _char in HTMLEscape.__lookuptable

class Tag(object):
    """HTMLTag Factory"""
    def __getattr__(self,_type):
        return HTMLTag(_type)

class HTMLTag(object):
    """Constructs a new HTMLTag object, which can contain inner HTMLTag objects and text"""
    __unpairedtags = frozenset('br input img hr link meta iframe'.split())
    __noescaping = frozenset('script cdata'.split())
    __noformatting = frozenset('pre'.split())
    __htmlents = HTMLEscape()
    __wrapchildcount = 1

    __slots__ = ["_type","_attributes","_children"]
    def __init__(self,_type,_attributes=None,_children=None):
        self._type = _type.lower()
        self._attributes = _attributes
        self._children = _children

    def _shallow_copy_update(_dict,_update):
        d = dict(_dict)
        d.update(_update)
        return d

(comment continued...)

Chris Heller 16 years, 2 months ago # | flag

(...continued from previous comment)

    def __call__(self,*_children,**_attributes):
        """Calling on an instantiated HTMLTag object instantiates a new shallow copy of the tag or the tag updated
        In the copy the children are replaced by _children and the attributes are updated from _attributes"""
        if _attributes and self._attributes:
            _attributes = _shallow_copy_update(_attributes,self._attributes)
        # replace 'klass' with 'class', must be 'klass' in Python to prevent name conflict
        klass = _attributes.pop("klass",None)
        if klass: _attributes['class'] = klass
        # return a new HTMLTag
        if _children and _attributes:
            return HTMLTag(self._type,_attributes,_children)
        elif _attributes:
            return HTMLTag(self._type,_attributes,self._children)
        elif _children:
            return HTMLTag(self._type,self._attributes,_children)
        else:
            return self

    def __setitem__(self,_attribute,value):
        """Allow HTMLTag attributes to be updated like Python attributes"""
        # prevent assignment on hashable types other than strings
        if not isinstance(_attribute,basestring):
            raise TypeError("Invalid attribute name")
        if self._attributes is None: self._attributes = {}
        self._attributes[_attribute] = value

    def __getitem__(self,index):
        """Retrieve a child element from the HTMLTag"""
        if not isinstance(index,(int,long)):
            raise TypeError("Children must be indexed numerically")
        if not self._children:
            raise IndexError("list index out of range")
        return self._children[index]

    def render(self,fp=None,calldepth=0):
        """Pretty print the HTMLTag tree into a file object"""
        spaces = lambda: calldepth * ' '
        if fp is None:
            fp = StringIO.StringIO()
            self.render(fp)
            fp.seek(0)
            return fp.read()

        if calldepth == 0:
            fp.write('&lt;!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"&gt;')

        if self._type != 'cdata':
            unpaired_terminator = ""
            if self._type in HTMLTag.__unpairedtags:
                unpaired_terminator = " /"
            if self._attributes:
                fp.write('\n' + spaces() + '&lt;%s %s%s&gt;' % (self._type,' '.join([x for x in ["%s='%s'" % (k,v) for k,v in self._attributes.iteritems()]]).encode('utf-8'),unpaired_terminator))
            else:
                fp.write('\n' + spaces() + '&lt;%s%s&gt;' % (self._type,unpaired_terminator))
        else:
            fp.write('\n' + spaces() + '&lt;![CDATA[')

(comment continued...)

Chris Heller 16 years, 2 months ago # | flag

(...continued from previous comment)

        if self._children:
            for child in self._children:
                if hasattr(child,'render'):
                    child.render(fp,calldepth + 1)
                elif self._type in HTMLTag.__noescaping:
                    fp.write(str(child).encode('utf-8'))
                else:
                    child = str(child)
                    try:
                        for char in child:
                            if HTMLTag.__htmlents.canescape(char):
                                raise ValueError
                    except ValueError:
                        # child contained a value that needed escaping
                        child = HTMLTag.__htmlents.escape(child)

                    fp.write(child.encode('utf-8'))
        if self._type != 'cdata' and self._type not in HTMLTag.__unpairedtags:
            # sets a boolean flag to determin if we can wrap
            child_wrap_p = self._children and (len(self._children) &gt;= HTMLTag.__wrapchildcount and hasattr(self._children[-1],'render'))
            if self._type not in HTMLTag.__noformatting and child_wrap_p:
                fp.write('\n' + spaces() + '&lt;/%s&gt;' % self._type)
            else:
                fp.write('&lt;/%s&gt;' % self._type)
        elif self._type == 'cdata':
            fp.write(']]&gt;')

        if calldepth == 0:
            fp.write('\n')

◄	Python recipes (4591)	►
◄	Josiah Carlson's recipes (9)	►

Yet another reinvention of a Python HTML generation mechanism (Python recipe) by Josiah Carlson
ActiveState Code (http://code.activestate.com/recipes/440563/)

6 comments

Tags

Required Modules

Other Information and Tasks

Accounts

Code Recipes

Feedback & Information

ActiveState

Yet another reinvention of a Python HTML generation mechanism (Python recipe) by Josiah Carlson ActiveState Code (http://code.activestate.com/recipes/440563/)

6 comments

Tags

Required Modules

Other Information and Tasks

Accounts

Code Recipes

Feedback & Information

ActiveState

Yet another reinvention of a Python HTML generation mechanism (Python recipe) by Josiah Carlson
ActiveState Code (http://code.activestate.com/recipes/440563/)