twvalidator is a simple markup validator gateway you can embed in your Twisted Web application for validating all your pages.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 | from urlparse import urlparse, urlsplit
from cgi import parse_qs
import cStringIO as StringIO
from twisted.internet import defer
from twisted.web import server, resource, client
def encodeFormData(arg, value):
"""Encode data as a multipart/form-data
"""
BOUNDARY = '----------BOUNDARY'
l = []
l.append('--' + BOUNDARY)
l.append('Content-Disposition: form-data; name="%s"' % arg)
l.append('')
l.append(value)
l.append('--' + BOUNDARY + '--')
l.append('')
body = '\r\n'.join(l)
contentType = 'multipart/form-data; boundary=%s' % BOUNDARY
return body, contentType
class FakeTransport(object):
def __init__(self):
self.io = StringIO.StringIO()
def writeSequence(self, sequence):
self.io.writelines(sequence)
def write(self, data):
self.io.write(data)
def getData(self):
return self.io.getvalue()
class MarkupValidator(resource.Resource):
"""A simple gateway to a validator service for Twisted Web.
"""
# Please, install a validator on your server!
uri = 'http://validator.w3.org/check'
arg = 'fragment'
def __init__(self, site):
"""site is the site object of your twisted.web server
"""
self.site = site
def render_GET(self, request):
def finish(data):
# Write the data back to our client
request.clientproto = clientproto
transport.write(data)
transport.loseConnection()
# Get the referer and parse it
referer = request.getHeader('referer')
scheme, netloc, path, parameters, query, fragment = urlparse(referer)
args = parse_qs(query)
transport = request.transport
clientproto = request.clientproto
# Modify the original request
request.uri = referer
request.path = path
request.args = args
request.clientproto = 'HTTP/1.0' # we don't want chunk encoding
request.transport = FakeTransport()
# Reload the modified request on the server, without using HTTP
deferred = request.notifyFinish()
request.process()
# XXX TODO handle errors
deferred.addCallback(lambda _: request.transport.getData()
).addCallback(self.validate, request
).addCallback(finish)
return server.NOT_DONE_YET
def validate(self, data, request):
# We need only the body
data = data[data.find('\r\n\r\n') + 4:]
# Build the request for the validator service, using the
# original request as the base
headers = request.received_headers
data, contentType = encodeFormData(self.arg, data)
headers['content-type'] = contentType
headers.pop('cookie', None)
headers.pop('referer', None)
headers.pop('host', None)
return client.getPage(
self.uri, method='POST', headers=headers, postdata=data
)
if __name__ == '__main__':
"""A simple usage example.
"""
from twisted.web import server
from twisted.internet import reactor
class Simple(resource.Resource):
def render_GET(self, request):
return """<!DOCTYPE html
PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml"
lang="en" xml:lang="en">
<head>
<title>twvalidator example</title>
</head>
<body>
<h1>twvalidator example</h1>
<p><a href="validate">validate</a></p>
</body>
</html>"""
def getChild(self, name, request):
if name == '':
return self
return resource.Resource.getChild(
self, name, request)
root = Simple()
site = server.Site(root)
root.putChild('validate', MarkupValidator(site))
reactor.listenTCP(8080, site)
reactor.run()
|
Public markup validator services, like http://validator.w3.org/, are very useful for validating web pages of your web application, but they are not usable when:
1) your application is on a private network 2) you application is accessible only with HTTPS 3) your application's rendering depends on the request (cookies).
twvalidator solves the problem by sitting between your application and the original validator service.
When a request comes to twvalidator, it looks at the referer header, requests the correspondent resource and post the resulting page to the the validator service, returning the result.
The important feature is that twvalidator uses the original request to obtain your page, assuming that the same headers that would be sent to the original resource (like cookies) are sent to the twvalidator resource.
Another feature of twvalidator is that the page is requested using the internal Twisted Web API, instead of using HTTP, allowing great flexibility.