This code is useful if you are using a http client and you want to simulate a request similar to that of a browser that submits a form containing several input fields (including file upload fields). I've used this with python 2.x.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 | #!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
uploadform.py - A simple/minimalist yet flexible helper module to construct a POSTable mime
message that is similar to a message generated by a browser when you submit an html form.
Supports file uploads and even "incremental uploads" (by yielding) if you don't
want to read the whole uploadable file into memory before POSTing it to the HTTP server.
Provided "as is" - use it at your own risk and don't complain if it cuts down your limbs...
Run this module to execute the test/example code.
'''
import uuid, urllib
__author__ = 'István Pásztor'
__all__ = ['UploadForm']
class UploadForm:
def __init__(self, boundary=None):
self.set_boundary(boundary)
self.fields = []
def set_boundary(self, boundary):
self.boundary = urllib.quote(boundary) if boundary else '__--__{%s}__--__' % uuid.uuid4()
def add_field(self, name, value, **params):
'''see _Field.__init__() for the description of the parameters.'''
field = _Field(name, value, **params)
self.fields.append(field)
return field
def get_request_headers(self, calculate_content_length=True):
assert self.fields
headers = {'Content-Type': 'multipart/form-data; boundary='+self.boundary}
if not calculate_content_length:
return headers
content_length = self.get_size()
headers['Content-Length'] = str(content_length)
return (headers, content_length)
def __iter__(self):
assert self.fields
boundary = '--' + self.boundary + '\r\n'
for field in self.fields:
yield boundary
for chunk in field:
yield chunk
yield '--'
yield self.boundary
yield '--\r\n'
def __str__(self):
return ''.join(s for s in self)
def dump(self, f):
for chunk in self:
f.write(chunk)
def get_size(self):
return (2+len(self.boundary)+2) * (len(self.fields)+1) + 2 + sum(field.get_size() for field in self.fields)
class _Header:
def __init__(self, _header_name, _header_value, **params):
self.name = _header_name
self.value = _header_value
self.params = params
def __iter__(self):
yield _stringify(self.name)
yield ': '
yield _stringify(self.value)
for name, value in self.params.iteritems():
yield '; '
yield _stringify(name)
yield '="'
yield _stringify(value).replace('\\', '\\\\').replace('"', '\\"')
yield '"'
yield '\r\n'
def get_size(self):
def _param_size(name, value):
value = _stringify(value)
return 5 + len(_stringify(name)) + len(value) + value.count('\\') + value.count('"')
return 4 + len(_stringify(self.name)) + len(_stringify(self.value)) + sum(_param_size(name, value) for name,value in self.params.iteritems())
class _Field:
BUFSIZE = 16*1024 # BUFSIZE used when yielding data from file objects
def __init__(self, name, value, **params):
'''
@param params: Contains additional key-value pairs besides the "name" parameter for the Content-Disposition header. Example: filename='x.zip'
@param value: This parameter can be a string, a unicode object, or a (file-object, size) tuple. In case of unicode object the string
is converted to utf-8 before converting to mime. In case of a file-object we read the data starting from the current file pointer.
'''
params['name'] = name
self.headers = [_Header('Content-Disposition', 'form-data', **params)]
assert isinstance(value, (str, unicode, tuple))
self.value = value
def add_header(self, name, value, **params):
''' You can add additional mime headers, for example a "Content-Type" header with "text/plain" value with optional parameters like charset="UTF-8"'''
self.headers.append(_Header(name, value, **params))
def __iter__(self):
for header in self.headers:
for chunk in header:
yield chunk
yield '\r\n'
if isinstance(self.value, str):
yield self.value
elif isinstance(self.value, unicode):
yield self.value.encode('utf-8')
else:
bytes_left = self.value[1]
while bytes_left > 0:
bytes_to_read = min(self.BUFSIZE, bytes_left)
data = self.value[0].read(bytes_to_read)
if not data:
raise Exception('The specified file object doesn\'t contain enough data!')
yield data
bytes_left -= len(data)
yield '\r\n'
def __str__(self):
return ''.join(s for s in self)
def get_size(self):
size = sum(header.get_size() for header in self.headers) + 2
if isinstance(self.value, str):
size += len(self.value)
elif isinstance(self.value, unicode):
size += len(self.value.encode('utf-8'))
else:
size += self.value[1]
size += 2 # newline
return size
def _stringify(s):
return s.encode('utf-8') if isinstance(s, unicode) else s
if __name__ == '__main__':
'''Test/example code.'''
import os
filename = 'x.zip'
filepath = os.path.join('j:\\', filename)
with open(filepath, 'rb') as f:
form = UploadForm()
# Adding a field by setting its value by reading from file like object...
form.add_field('file', (f, os.path.getsize(filepath)), filename=filename)
field = form.add_field('file_from_mem', u'This is the content of Pistike\'s text file.\n', filename='memfile.txt')
# Adding headers is optional but we do it this time just to demonstrate it...
field.add_header('Content-Type', 'text/plain', charset='UTF-8')
field.add_header('MyHeader', 'my_header_value', quote_test_param='m\\y"param"2')
form.add_field('comment', u'Árvíztűrő tükörfúrógép') # simulating a type="text" input field on the form with name="comment"
form.add_field('my_checkbox', 'on') # simulating a type="checkbox" input field on the form with name="my_checkbox"
headers, content_length = form.get_request_headers()
content = str(form)
print 'Headers: %s' % (headers,)
print 'Content-Length: %s' % len(content)
#print content
assert len(content) == content_length
import httplib
conn = httplib.HTTPConnection('localhost')
try:
conn.request('POST', '/dir2/upload', content, headers=headers)
resp = conn.getresponse()
print resp.status
print resp.read()
finally:
conn.close()
|
Note that file upload is not something special. If you have used a html <form> before then you know that in a <form> you can easily define fields with names and you can post the values of the fields by pressing the submit button that you have declared in your <form>. In case of file upload you have to specify method="post" and enctype="multipart/form-data" attributes for your <form> in order to send your form data with "multipart/form-data" Content-Type instead of the default "application/x-www-form-urlencoded". This way the payload of your request will contain a mime multipart/form-data encoded mime data instead of a query string. Here is a html example:
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Upload Test</title>
</head>
<body>
<h1>Upload Test</h1>
<form action="/dir2/upload" method="post" enctype="multipart/form-data">
File: <input type="file" name="file" /><br/>
Branch: <input type="text" name="branch"/><br/>
<input type="submit" value="Upload!"/>
</form>
</body>
</html>
The above html produces the following network traffic (http request/response pair) between the browser and the http server when you select a file and enter "asdf" to the branch text field and then press the submit button:
POST /dir2/upload HTTP/1.1
Host: 192.168.56.1
Content-Type: multipart/form-data; boundary=---------------------------98215567712697562581705625561
Content-Length: 562
-----------------------------98215567712697562581705625561
Content-Disposition: form-data; name="file"; filename="p4v.desktop"
Content-Type: application/x-desktop
#!/usr/bin/env xdg-open
[Desktop Entry]
Version=1.0
Type=Application
Terminal=false
Icon[en_US]=gnome-panel-launcher
Name[en_US]=p4v
Exec=/usr/local/bin/p4v-2013.1.611503/bin/p4v
Name=p4v
Icon=gnome-panel-launcher
-----------------------------98215567712697562581705625561
Content-Disposition: form-data; name="branch"
asdf
-----------------------------98215567712697562581705625561--
HTTP/1.1 200 OK
Server: SimpleHTTP/0.6 Python/2.7.6
Date: Thu, 06 Mar 2014 19:04:27 GMT
Content-Type: text/html
Content-Length: 82
<html>
<head><title>200 OK</title></head>
<body>
<h1>200 OK</h1>
</body>
</html>
If you take a look at the HTTP request you will see that the file is also sent in a similar way as the simple branch input text field - it just has an additional "filename" parameter in its Content-Disposition mime header and it also has a Content-Type mime header that is optional but the browser provided it. You can also specify anything else as the name parameter of the file input field, it doesn't have to be "file" but almost all examples on the internet use this naming convention and of course if you use a different name for the file input field then you have to handle it accordingly on the server side. If we used a checkbox input field in the <form> it would also be an additional block in this mime-encoded http request body it would just have a different "name" attribute and its value would be "on" if the checkbox is checked. With my 100-liner module you can generate the "Content-Type" and "Content-Length" headers and the body of your HTTP request to simulate any kind of html <form> post and you can send it with whatever http client you have.
If you run this module then it contains a simple example program that puts together a http request in memory and sends it with the http client of httplib. This example creates the whole body of the request as a single unit that isn't desirable in case of large files but the UploadForm class of this module is able to generate the http request body incrementally if you use it as a generator along with a http client that supports streaming request content upload.
Good luck!