Routines for converting large unsigned arbitrary sized Python integers to packed binary data strings and vice versa.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 | #!/usr/bin/env python
import struct
import unittest
#-----------------------------------------------------------------------------
#: enable verbose print statements.
DEBUG = True
#: struct format lookup for specific word sizes.
STRUCT_FMT = {
8 : 'B', # unsigned char
16 : 'H', # unsigned short
32 : 'I', # unsigned int
}
#-----------------------------------------------------------------------------
def int_to_words(int_val, num_words=4, word_size=32):
"""
@param int_val: an arbitrary length Python integer to be split up.
Network byte order is assumed. Raises an IndexError if width of
integer (in bits) exceeds word_size * num_words.
@param num_words: number of words expected in return value tuple.
@param word_size: size/width of individual words (in bits).
@return: a list of fixed width words based on provided parameters.
"""
max_int = 2 ** (word_size*num_words) - 1
max_word_size = 2 ** word_size - 1
if not 0 <= int_val <= max_int:
raise IndexError('integer %r is out of bounds!' % hex(int_val))
words = []
for _ in range(num_words):
word = int_val & max_word_size
words.append(int(word))
int_val >>= word_size
words.reverse()
return words
#-----------------------------------------------------------------------------
def int_to_packed(int_val, width=128, word_size=32):
"""
@param int_val: an arbitrary sized Python integer to be packed.
@param width: expected maximum with of an integer. Can be any size but
should be divide by word_size without a remainder.
@param word_size: size/width of individual words (in bits).
Valid sizes are 8, 16 and 32 bits.
@return: a (network byte order) packed string equivalent to integer value.
"""
num_words = width / word_size
words = int_to_words(int_val, num_words, word_size)
try:
fmt = '>%d%s' % (num_words, STRUCT_FMT[word_size])
#DEBUG: print 'format:', fmt
except KeyError:
raise ValueError('unsupported word size: %d!' % word_size)
return struct.pack(fmt, *words)
#-----------------------------------------------------------------------------
def packed_to_int(packed_int, width=128, word_size=32):
"""
@param packed_int: a packed string to be converted to an abritrary size
Python integer. Network byte order is assumed.
@param width: expected maximum width of return value integer. Can be any
size but should divide by word_size equally without remainder.
@param word_size: size/width of individual words (in bits).
Valid sizes are 8, 16 and 32 bits.
@return: an arbitrary sized Python integer.
"""
num_words = width / word_size
try:
fmt = '>%d%s' % (num_words, STRUCT_FMT[word_size])
#DEBUG: print 'format:', fmt
except KeyError:
raise ValueError('unsupported word size: %d!' % word_size)
words = list(struct.unpack(fmt, packed_int))
words.reverse()
int_val = 0
for i, num in enumerate(words):
word = num
word = word << word_size * i
int_val = int_val | word
return int_val
#-----------------------------------------------------------------------------
class NetworkAddressTests(unittest.TestCase):
"""Example test case using various network address types"""
def debug(self, val, expect_val_packed, actual_val_packed, new_val):
print 'original int :', hex(val)
print 'packed int (expected) : %r' % expect_val_packed
print 'packed int (actual) : %r' % actual_val_packed
print 'unpacked int :', hex(new_val)
print
def testIPv4(self):
"""IP version 4 address test"""
val = 0xfffefffe
expect_val_packed = '\xff\xfe\xff\xfe'
actual_val_packed = int_to_packed(val, width=32, word_size=8)
new_val = packed_to_int(actual_val_packed, width=32, word_size=8)
self.assertEqual(val, new_val)
self.assertEqual(expect_val_packed, actual_val_packed)
if DEBUG:
print 'IPv4'
self.debug(val, expect_val_packed, actual_val_packed, new_val)
def testMAC(self):
"""MAC address test"""
val = 0xfffefffefffe
expect_val_packed = '\xff\xfe\xff\xfe\xff\xfe'
actual_val_packed = int_to_packed(val, width=48, word_size=8)
new_val = packed_to_int(actual_val_packed, width=48, word_size=8)
self.assertEqual(val, new_val)
self.assertEqual(expect_val_packed, actual_val_packed)
if DEBUG:
print 'MAC'
self.debug(val, expect_val_packed, actual_val_packed, new_val)
def testIPv6(self):
"""IP version 6 address test"""
val = 0xfffefffefffefffefffefffefffefffe
expect_val_packed = '\xff\xfe\xff\xfe\xff\xfe\xff\xfe' \
'\xff\xfe\xff\xfe\xff\xfe\xff\xfe'
actual_val_packed = int_to_packed(val, width=128, word_size=32)
new_val = packed_to_int(actual_val_packed, width=128, word_size=32)
self.assertEqual(val, new_val)
self.assertEqual(expect_val_packed, actual_val_packed)
if DEBUG:
print 'IPv6'
self.debug(val, expect_val_packed, actual_val_packed, new_val)
#-----------------------------------------------------------------------------
if __name__ == '__main__':
unittest.main()
|
Recently, I discovered a need to convert large (128 bit) unsigned integers in Python to structures accessible from C.
Reading through the documentation on Python's struct
module it became apparent that pack()
and unpack()
don't support the conversion of arbitrary sized Python integers to binary packed data strings directly without first breaking them up into smaller pieces.
I wrote these wrappers around struct.pack()
and struct.unpack()
as a convenient way of working around this feature by adding in the binary splitting and joining code. I've chosen 32 bits ('I'
in struct
format strings) as a maximum word size for portability across multiple architectures.
int_to_packed()
and packed_to_int()
use network byte order. The packed binary data strings must be of a fixed size/width (in bits) and are padded with nulls.
Recently, I had the same need. I was advised to use bitarray (http://pypi.python.org/pypi/bitarray). YMMV.
The author uses big-endian as the order of words, but word internal will be little-endian if you use Intel CPU, that's inconsistent, and in some cases, that will be a problem.