Welcome, guest | Sign In | My Account | Store | Cart

This recipe is a reimplemtation of this recipe,

http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/415503

using Python 2.4 decorator syntax.

It also has added support for boolean and unicode types, and a keyword argument (compress=False) for the dumps function, which will compress the string.

Python, 199 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
from types import (
    IntType, TupleType, StringType,
    FloatType, LongType, ListType,
    DictType, NoneType, BooleanType, UnicodeType
)

from struct import pack, unpack
from cStringIO import StringIO
import zlib

class EncodeError(Exception): pass
class DecodeError(Exception): pass

HEADER = "SRW3"

protocol = {
    TupleType  :"T",
    ListType   :"L",
    DictType   :"D",
    LongType   :"B",
    IntType    :"I",
    FloatType  :"F",
    StringType :"S",
    NoneType   :"N",
    BooleanType:"b",
    UnicodeType:"U"
}

encoder = {}
class register_encoder_for_type(object):
    """Registers an encoder function, for a type, in the global encoder dictionary."""
    def __init__(self, t):
        self.type = t
    def __call__(self, func):
        encoder[self.type] = func
        return func

#contains dictionary of decoding functions, where the dictionary key is the type prefix used.
decoder = {}
class register_decoder_for_type(object):
    """Registers a decoder function, for a prefix, in the global decoder dictionary."""
    def __init__(self, t):
        self.prefix = protocol[t]
    def __call__(self, func):
        decoder[self.prefix] = func
        return func

## <encoding functions> ##
@register_encoder_for_type(DictType)
def enc_dict_type(obj):
    data = "".join([encoder[type(i)](i) for i in obj.items()])
    return "%s%s%s" % ("D", pack("!L", len(data)), data)

@register_encoder_for_type(TupleType)
@register_encoder_for_type(ListType)
def enc_list_type(obj):
    data = "".join([encoder[type(i)](i) for i in obj])
    return "%s%s%s" % (protocol[type(obj)], pack("!L", len(data)), data)

@register_encoder_for_type(IntType)
def enc_int_type(obj):
    return "%s%s" % (protocol[IntType], pack("!i", obj))

@register_encoder_for_type(FloatType)
def enc_float_type(obj):
    return "%s%s" % (protocol[FloatType], pack("!d", obj))

@register_encoder_for_type(LongType)
def enc_long_type(obj):
    obj = hex(obj)[2:-1]
    return "%s%s%s" % (protocol[LongType], pack("!L", len(obj)), obj)

@register_encoder_for_type(UnicodeType)
def enc_unicode_type(obj):
    obj = obj.encode('utf-8')
    return "%s%s%s" % (protocol[UnicodeType], pack("!L", len(obj)), obj)


@register_encoder_for_type(StringType)
def enc_string_type(obj):
    return "%s%s%s" % (protocol[StringType], pack("!L", len(obj)), obj)

@register_encoder_for_type(NoneType)
def enc_none_type(obj):
    return protocol[NoneType]

@register_encoder_for_type(BooleanType)
def enc_bool_type(obj):
    return protocol[BooleanType] + str(int(obj))

def dumps(obj, compress=False):
    """Encode simple Python types into a binary string."""
    option = "N"
    if compress: option = "Z"
    try:
        data = encoder[type(obj)](obj)
        if compress: data = zlib.compress(data)
        return "%s%s%s" % (HEADER, option, data)
    except KeyError, e:
        raise EncodeError, "Type not supported. (%s)" % e
## </encoding functions> ##

## <decoding functions> ##
def build_sequence(data, cast=list):
    size = unpack('!L', data.read(4))[0]
    items = []
    data_tell = data.tell
    data_read = data.read
    items_append = items.append
    start_position = data.tell()
    while (data_tell() - start_position) < size:
        T = data_read(1)
        value = decoder[T](data)
        items_append(value)
    return cast(items)

@register_decoder_for_type(TupleType)
def dec_tuple_type(data):
    return build_sequence(data, cast=tuple)

@register_decoder_for_type(ListType)
def dec_list_type(data):
    return build_sequence(data, cast=list)

@register_decoder_for_type(DictType)
def dec_dict_type(data):
    return build_sequence(data, cast=dict)

@register_decoder_for_type(LongType)
def dec_long_type(data):
    size = unpack('!L', data.read(4))[0]
    value = long(data.read(size),16)
    return value

@register_decoder_for_type(StringType)
def dec_string_type(data):
    size = unpack('!L', data.read(4))[0]
    value = str(data.read(size))
    return value

@register_decoder_for_type(FloatType)
def dec_float_type(data):
    value = unpack('!d', data.read(8))[0]
    return value

@register_decoder_for_type(IntType)
def dec_int_type(data):
    value = unpack('!i', data.read(4))[0]
    return value

@register_decoder_for_type(NoneType)
def dec_none_type(data):
    return None

@register_decoder_for_type(BooleanType)
def dec_bool_type(data):
    value = int(data.read(1))
    return bool(value)

@register_decoder_for_type(UnicodeType)
def dec_unicode_type(data):
    size = unpack('!L', data.read(4))[0]
    value = data.read(size).decode('utf-8')
    return value

def loads(data):
    """
    Decode a binary string into the original Python types.
    """
    buffer = StringIO(data)
    header = buffer.read(len(HEADER))
    assert header == HEADER
    option = buffer.read(1)
    decompress = False
    if option == "Z":
        buffer = StringIO(zlib.decompress(buffer.read()))
    try:
        value = decoder[buffer.read(1)](buffer)
    except KeyError, e:
        raise DecodeError, "Type prefix not supported. (%s)" % e

    return value
## </decoding functions> ##

try:
    import psyco
    dumps = psyco.proxy(dumps)
    loads = psyco.proxy(loads)
except ImportError:
    pass

if __name__ == "__main__":
    value = (u'\N{POUND SIGN} Testing unicode', {True:False},[1,2,3,4],["simon"],("python is","cool"),
"pi equals",3.14,("longs are ok",
912398102398102938102398109238019283012983019238019283019283))
    data = dumps(value)
    print data
    x = loads(data)
    print x

Why is this recipe useful?

The marshal module is unsafe. The documentation clearly states this. Also, streams generated by marshal are not compatible across Python versions.

Pickle is also unsafe, and complex. It should not be used for passing objects across untrusted network connections.

XML is bloated, and requires complicated parsers and generators. This reciple provides a simple binary protocol, which can safely decode strings recieved from untrusted sources, without the bloat of an XML string.

This recipe will only serialize IntType, TupleType, StringType, FloatType, LongType, ListType, DictType, NoneType, UnicodeType and BooleanType. It will not serialize recursive data structures, and will not protect you if you decide to try it. :)

The protocol can also be easily changed, by modifying the protocol dictionary. It assumes long ints are 4 bytes long, and decodes and encodes unicode strings using the utf-8 codec.

2 comments

Scott David Daniels 18 years, 10 months ago  # | flag

FloatType should be encoded as the eight-byte '!d', not '!f' FloatType is using the encoding type for C/C++ floats, while Python floats use C/C++ doubles. If you make the appropriate changes, you can end your main test block with:

assert x == value
S W (author) 18 years, 10 months ago  # | flag

Ahhh. Yes, thanks for the tip. I wondered what was happening to my floats, now I know! :D