This recipe is a reimplemtation of this recipe,
http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/415503
using Python 2.4 decorator syntax.
It also has added support for boolean and unicode types, and a keyword argument (compress=False) for the dumps function, which will compress the string.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 | from types import (
IntType, TupleType, StringType,
FloatType, LongType, ListType,
DictType, NoneType, BooleanType, UnicodeType
)
from struct import pack, unpack
from cStringIO import StringIO
import zlib
class EncodeError(Exception): pass
class DecodeError(Exception): pass
HEADER = "SRW3"
protocol = {
TupleType :"T",
ListType :"L",
DictType :"D",
LongType :"B",
IntType :"I",
FloatType :"F",
StringType :"S",
NoneType :"N",
BooleanType:"b",
UnicodeType:"U"
}
encoder = {}
class register_encoder_for_type(object):
"""Registers an encoder function, for a type, in the global encoder dictionary."""
def __init__(self, t):
self.type = t
def __call__(self, func):
encoder[self.type] = func
return func
#contains dictionary of decoding functions, where the dictionary key is the type prefix used.
decoder = {}
class register_decoder_for_type(object):
"""Registers a decoder function, for a prefix, in the global decoder dictionary."""
def __init__(self, t):
self.prefix = protocol[t]
def __call__(self, func):
decoder[self.prefix] = func
return func
## <encoding functions> ##
@register_encoder_for_type(DictType)
def enc_dict_type(obj):
data = "".join([encoder[type(i)](i) for i in obj.items()])
return "%s%s%s" % ("D", pack("!L", len(data)), data)
@register_encoder_for_type(TupleType)
@register_encoder_for_type(ListType)
def enc_list_type(obj):
data = "".join([encoder[type(i)](i) for i in obj])
return "%s%s%s" % (protocol[type(obj)], pack("!L", len(data)), data)
@register_encoder_for_type(IntType)
def enc_int_type(obj):
return "%s%s" % (protocol[IntType], pack("!i", obj))
@register_encoder_for_type(FloatType)
def enc_float_type(obj):
return "%s%s" % (protocol[FloatType], pack("!d", obj))
@register_encoder_for_type(LongType)
def enc_long_type(obj):
obj = hex(obj)[2:-1]
return "%s%s%s" % (protocol[LongType], pack("!L", len(obj)), obj)
@register_encoder_for_type(UnicodeType)
def enc_unicode_type(obj):
obj = obj.encode('utf-8')
return "%s%s%s" % (protocol[UnicodeType], pack("!L", len(obj)), obj)
@register_encoder_for_type(StringType)
def enc_string_type(obj):
return "%s%s%s" % (protocol[StringType], pack("!L", len(obj)), obj)
@register_encoder_for_type(NoneType)
def enc_none_type(obj):
return protocol[NoneType]
@register_encoder_for_type(BooleanType)
def enc_bool_type(obj):
return protocol[BooleanType] + str(int(obj))
def dumps(obj, compress=False):
"""Encode simple Python types into a binary string."""
option = "N"
if compress: option = "Z"
try:
data = encoder[type(obj)](obj)
if compress: data = zlib.compress(data)
return "%s%s%s" % (HEADER, option, data)
except KeyError, e:
raise EncodeError, "Type not supported. (%s)" % e
## </encoding functions> ##
## <decoding functions> ##
def build_sequence(data, cast=list):
size = unpack('!L', data.read(4))[0]
items = []
data_tell = data.tell
data_read = data.read
items_append = items.append
start_position = data.tell()
while (data_tell() - start_position) < size:
T = data_read(1)
value = decoder[T](data)
items_append(value)
return cast(items)
@register_decoder_for_type(TupleType)
def dec_tuple_type(data):
return build_sequence(data, cast=tuple)
@register_decoder_for_type(ListType)
def dec_list_type(data):
return build_sequence(data, cast=list)
@register_decoder_for_type(DictType)
def dec_dict_type(data):
return build_sequence(data, cast=dict)
@register_decoder_for_type(LongType)
def dec_long_type(data):
size = unpack('!L', data.read(4))[0]
value = long(data.read(size),16)
return value
@register_decoder_for_type(StringType)
def dec_string_type(data):
size = unpack('!L', data.read(4))[0]
value = str(data.read(size))
return value
@register_decoder_for_type(FloatType)
def dec_float_type(data):
value = unpack('!d', data.read(8))[0]
return value
@register_decoder_for_type(IntType)
def dec_int_type(data):
value = unpack('!i', data.read(4))[0]
return value
@register_decoder_for_type(NoneType)
def dec_none_type(data):
return None
@register_decoder_for_type(BooleanType)
def dec_bool_type(data):
value = int(data.read(1))
return bool(value)
@register_decoder_for_type(UnicodeType)
def dec_unicode_type(data):
size = unpack('!L', data.read(4))[0]
value = data.read(size).decode('utf-8')
return value
def loads(data):
"""
Decode a binary string into the original Python types.
"""
buffer = StringIO(data)
header = buffer.read(len(HEADER))
assert header == HEADER
option = buffer.read(1)
decompress = False
if option == "Z":
buffer = StringIO(zlib.decompress(buffer.read()))
try:
value = decoder[buffer.read(1)](buffer)
except KeyError, e:
raise DecodeError, "Type prefix not supported. (%s)" % e
return value
## </decoding functions> ##
try:
import psyco
dumps = psyco.proxy(dumps)
loads = psyco.proxy(loads)
except ImportError:
pass
if __name__ == "__main__":
value = (u'\N{POUND SIGN} Testing unicode', {True:False},[1,2,3,4],["simon"],("python is","cool"),
"pi equals",3.14,("longs are ok",
912398102398102938102398109238019283012983019238019283019283))
data = dumps(value)
print data
x = loads(data)
print x
|
Why is this recipe useful?
The marshal module is unsafe. The documentation clearly states this. Also, streams generated by marshal are not compatible across Python versions.
Pickle is also unsafe, and complex. It should not be used for passing objects across untrusted network connections.
XML is bloated, and requires complicated parsers and generators. This reciple provides a simple binary protocol, which can safely decode strings recieved from untrusted sources, without the bloat of an XML string.
This recipe will only serialize IntType, TupleType, StringType, FloatType, LongType, ListType, DictType, NoneType, UnicodeType and BooleanType. It will not serialize recursive data structures, and will not protect you if you decide to try it. :)
The protocol can also be easily changed, by modifying the protocol dictionary. It assumes long ints are 4 bytes long, and decodes and encodes unicode strings using the utf-8 codec.
FloatType should be encoded as the eight-byte '!d', not '!f' FloatType is using the encoding type for C/C++ floats, while Python floats use C/C++ doubles. If you make the appropriate changes, you can end your main test block with:
Ahhh. Yes, thanks for the tip. I wondered what was happening to my floats, now I know! :D