This recipe introduces a novel way of compressing text and is meant primarily as an exercise. The procedures work best on standard 7-BIT ASCII and worst on binary encoded data. Please note that function "encode" returns a string and a key that must be passed to function "decode" in order recover the original data.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | '''Module for string compression.
This module provides two functions for
compressing and decompressing strings.'''
__version__ = '1.0'
import sys as _sys
################################################################################
def encode(string, divide=1024):
'Compress a string.'
def encode(s, k, b):
i = 0
for c in s:
i *= b
i += k.index(c) + 1
s = ''
while i:
s = chr(i % 255 + 1) + s
i /= 255
return s
key = ''.join(byte for byte in map(chr, xrange(256)) if byte in string)
divide = divide * 256 / len(key)
base = len(key) + 1
return '\0'.join(encode(string[index:index+divide], key, base) for index in xrange(0, len(string), divide)), key
def decode(string, key):
'Decompress a string.'
def decode(s, k, b):
i = 0
for c in s:
i *= 255
i += ord(c) - 1
s = ''
while i:
s = k[i % b - 1] + s
i /= b
return s
base = len(key) + 1
return ''.join(decode(string, key, base) for string in string.split('\0'))
################################################################################
if __name__ == '__main__':
_sys.stdout.write('Content-Type: text/plain\n\n')
_sys.stdout.write(file(_sys.argv[0]).read())
|
Tags: algorithms