Welcome, guest | Sign In | My Account | Store | Cart

twvalidator is a simple markup validator gateway you can embed in your Twisted Web application for validating all your pages.

Python, 152 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
from urlparse import urlparse, urlsplit
from cgi import parse_qs
import cStringIO as StringIO

from twisted.internet import defer
from twisted.web import server, resource, client



def encodeFormData(arg, value):
    """Encode data as a multipart/form-data
    """
 
    BOUNDARY = '----------BOUNDARY'
   
    l = []
    l.append('--' + BOUNDARY)
    l.append('Content-Disposition: form-data; name="%s"' % arg)
    l.append('')
    l.append(value)
    l.append('--' + BOUNDARY + '--')
    l.append('')

    body = '\r\n'.join(l)
    contentType = 'multipart/form-data; boundary=%s' % BOUNDARY

    return body, contentType


class FakeTransport(object):
    def __init__(self):
        self.io = StringIO.StringIO()

    
    def writeSequence(self, sequence):
        self.io.writelines(sequence)

    def write(self, data):
        self.io.write(data)

    def getData(self):
        return self.io.getvalue()


class MarkupValidator(resource.Resource):
    """A simple gateway to a validator service for Twisted Web.
    """

    # Please, install a validator on your server!
    uri = 'http://validator.w3.org/check'
    arg = 'fragment'

    def __init__(self, site):
        """site is the site object of your twisted.web server
        """

        self.site = site


    def render_GET(self, request):
        def finish(data):
            # Write the data back to our client
            request.clientproto = clientproto
            transport.write(data)
            transport.loseConnection()


        # Get the referer and parse it
        referer = request.getHeader('referer')
        scheme, netloc, path, parameters, query, fragment = urlparse(referer)
        args = parse_qs(query)
        
        transport = request.transport
        clientproto = request.clientproto

        # Modify the original request
        request.uri = referer
        request.path = path
        request.args = args
        request.clientproto = 'HTTP/1.0'  # we don't want chunk encoding
        request.transport = FakeTransport()

        # Reload the modified request on the server, without using HTTP
        deferred = request.notifyFinish()
        request.process()

        # XXX TODO handle errors
        deferred.addCallback(lambda _: request.transport.getData()
                             ).addCallback(self.validate, request
                                           ).addCallback(finish)
        
        return server.NOT_DONE_YET

    def validate(self, data, request):
        # We need only the body
        data = data[data.find('\r\n\r\n') + 4:]

        # Build the request for the validator service, using the
        # original request as the base
        headers = request.received_headers
        data, contentType = encodeFormData(self.arg, data)
        
        headers['content-type'] = contentType
        headers.pop('cookie', None)
        headers.pop('referer', None)
        headers.pop('host', None)
        
        return client.getPage(
            self.uri, method='POST', headers=headers, postdata=data 
            )


if __name__ == '__main__':
    """A simple usage example.
    """

    from twisted.web import server
    from twisted.internet import reactor


    class Simple(resource.Resource):
        def render_GET(self, request):
            return """<!DOCTYPE html
  PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">

<html xmlns="http://www.w3.org/1999/xhtml" 
      lang="en" xml:lang="en">
  <head>
    <title>twvalidator example</title>
  </head>
  <body>
    <h1>twvalidator example</h1>

    <p><a href="validate">validate</a></p>
  </body>
</html>"""

        def getChild(self, name, request):
            if name == '':
                return self
            return resource.Resource.getChild(
                self, name, request)


    root = Simple()
    site = server.Site(root)

    root.putChild('validate', MarkupValidator(site))

    reactor.listenTCP(8080, site)
    reactor.run()

Public markup validator services, like http://validator.w3.org/, are very useful for validating web pages of your web application, but they are not usable when:

1) your application is on a private network 2) you application is accessible only with HTTPS 3) your application's rendering depends on the request (cookies).

twvalidator solves the problem by sitting between your application and the original validator service.

When a request comes to twvalidator, it looks at the referer header, requests the correspondent resource and post the resulting page to the the validator service, returning the result.

The important feature is that twvalidator uses the original request to obtain your page, assuming that the same headers that would be sent to the original resource (like cookies) are sent to the twvalidator resource.

Another feature of twvalidator is that the page is requested using the internal Twisted Web API, instead of using HTTP, allowing great flexibility.