This recipe will split a file into several smaller files while at the same time hiding the original formatting of the file. The program has a primitive GUI design (my first one), put allows a small amount of interaction with the program. Testing has not been extensive, but all appears to be in order.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 | import os # give access to os.path & os.remove
import md5 # allows md5.md5(arg).digest() for file signature
import time # gives access to time.sleep
import random # gives access to random.randrange
import thread # to start new threads
import cPickle # to pickle and unpickle the index
import Tkinter # provides GUI tools
import tkFileDialog # provides some GUI dialogs
THREAD_FLAG = False # we'll use this to control the flag
class Application: # this is the main class / [function]
FILE_SIZE = 1024 * 1024 # this is an approximate size of output files
PART_SIZE = 1024 # this is used to tune the "disguising" engine
# main class function
def __init__(self):
self.__root = Tkinter.Tk() # open the main window
self.__root.title('Split & Join') # give it a name
self.__root.resizable(False, False) # disable resizing
# starts the splitter engine
split = Tkinter.Button(self.__root, text='Split', font='Courier 8', command=self.split)
split.grid(row=0, column=0, padx=15, pady=5)
# starts the joiner engine
join = Tkinter.Button(self.__root, text='Join', font='Courier 8', command=self.join)
join.grid(row=0, column=1, padx=15, pady=5)
# used for saving / opening files
self.__open = tkFileDialog.Open()
self.__save = tkFileDialog.Directory()
# don't forget to execute!
self.__root.mainloop()
# wrap the splitter engine
def split(self):
global THREAD_FLAG
if not THREAD_FLAG:
self.__root.withdraw()
self.__start()
try:
self.__do_split()
self.__stop('SPLIT - DONE')
except:
self.__stop('SPLIT - FAIL')
self.__root.deiconify()
# wrap the joiner engine
def join(self):
global THREAD_FLAG
if not THREAD_FLAG:
self.__root.withdraw()
self.__start()
try:
self.__do_join()
self.__stop('JOIN - DONE')
except:
self.__stop('JOIN - FAIL')
self.__root.deiconify()
# remind the user that the program is working
def __working(self):
global THREAD_FLAG
state, key = 0, ['|', '/', '-', '\\']
while THREAD_FLAG:
os.system('cls')
print '.' * (state / 8) + key[state % 4]
state += 1
time.sleep(0.125)
# start the reminder thread
def __start(self):
global THREAD_FLAG
THREAD_FLAG = True
thread.start_new_thread(self.__working, ())
# stop the reminder thread
def __stop(self, message):
global THREAD_FLAG
THREAD_FLAG = False
time.sleep(0.25)
os.system('cls')
print message
# get the signature of the file specified by path
def __signature(self, path):
return md5.md5(file(path, 'rb').read()).digest()
# split string so len(part) == size
def __partition(self, string, size):
if len(string) % size:
parts = len(string) / size + 1
else:
parts = len(string) / size
return [string[index*size:index*size+size] for index in range(parts)]
# get a source file and a destination folder
def __get_source_and_destination(self):
return open(self.__open.show(), 'rb'), self.__save.show()
# create a random key
def __new_key(self):
data = range(256)
key = ''
while data:
index = random.randrange(len(data))
key += chr(data[index])
del data[index]
return key
# encode a string
def __s2c(self, string):
'''s2c(str string)
Convert from string to code.'''
self.__assert_type((str, string))
return self.__n2c(self.__s2n(string))
# convert number to code
def __n2c(self, number):
self.__assert_type((long, number))
code = ''
while number:
code = chr(number % 255 + 1) + code
number /= 255
return code
# convert string to number
def __s2n(self, string):
self.__assert_type((str, string))
number = 1L
for character in string:
number <<= 8
number += ord(character)
return number
# make sure that type checking passes
def __assert_type(self, *tuples):
for types, objects in tuples:
if type(objects) is not types:
raise TypeError
# this is the splitter engine
def __do_split(self):
# get file and folder
source, destination = self.__get_source_and_destination()
# make sure that there is a destination
assert destination != ''
# index will be the master file to the many files, key will be for mangling
index = [os.path.basename(source.name), self.__new_key()]
# devide the source for the individual files
data = self.__partition(source.read(), self.FILE_SIZE)
# all source data has been collected, so close it
source.close()
# write the individual files
for num, part in enumerate(data):
# figure out what the filename will be
dest_path = os.path.join(destination, '%s.%s.part' % (num, os.path.basename(source.name)))
# open the file for writing
dest_file = open(dest_path, 'wb')
# mangle part to be indistiguishable
part = part.translate(index[1])
# partition part for futher mangling
part = self.__partition(part, self.PART_SIZE)
# mangle each part again
part = [self.__s2c(x) for x in part]
# write the joined parts after mangling
dest_file.write(chr(0).join(part).translate(index[1]))
# close the destination
dest_file.close()
# add the signature to index
index.append(self.__signature(dest_path))
# write the index
cPickle.dump(index, file(os.path.join(destination, '%s.part' % os.path.basename(source.name)), 'wb'))
# return an inverted key
def __inverse(self, key):
array = range(256)
for num, char in enumerate(key):
array[ord(char)] = chr(num)
return ''.join(array)
# verify unpacking
def __check_index(self, index, dirname, source):
all_path = list()
for num, signature in enumerate(index):
all_path.append(os.path.join(dirname, '%s.%s' % (num, source)))
present = self.__signature(all_path[-1])
assert signature == present
return all_path
# convert from code to string
def __c2s(self, code):
'''c2s(str code)
Convert from code to string.'''
self.__assert_type((str, code))
return self.__n2s(self.__c2n(code))
# convert from code to number
def __c2n(self, code):
self.__assert_type((str, code))
number = 0L
for character in code:
number *= 255
number += ord(character) - 1
return number
# convert from number to string
def __n2s(self, number):
self.__assert_type((long, number))
string = ''
while number > 1:
string = chr(number & 0xFF) + string
number >>= 8
return string
# this is the joiner engine
def __do_join(self):
# get the source file and destination folder
source, destination = self.__get_source_and_destination()
# make sure that there is a destination
assert destination != ''
# reload the index
index = cPickle.load(source)
# close the source
source.close()
# make sure that the name of the source agrees with itself
assert index[0] == os.path.basename(source.name)[:-5]
# save the key
key = index[1]
# do a mild check of the key
assert len(key) == 256
# invert the key for decoding purposes
key = self.__inverse(key)
# get the dirname from source
dirname = os.path.dirname(source.name)
# verify that all files are present and valid
all_path = self.__check_index(index[2:], dirname, os.path.basename(source.name))
# if all files were verfied, they just need to put together now [file]
dest_file = open(os.path.join(destination, index[0]), 'wb')
# go through all of the files
for path in all_path:
# open the source
source2 = open(path, 'rb')
# get the source data
data = source2.read()
# close the source
source2.close()
# automatically clean up the source
os.remove(path)
# translate the data
data = data.translate(key)
# get the parts
parts = data.split(chr(0))
# decode the parts
parts = [self.__c2s(part) for part in parts]
# calculate the string to be written
final = ''.join(parts).translate(key)
# write the data
dest_file.write(final)
# close the destination
dest_file.close()
# cleanup the index
os.remove(source.name)
if __name__ == '__main__':
Application()
|
__do_split(self) and __do_join(self) have extensive documentation that should allow the algorithms to be more easily analyzed. This recipe's main purpose is to allow sending files via e-mail. The encoding and decoding systems are built into the program so that an e-mail client will not be able to reject the file because of the file's format. File formatting is destroyed (or hidden) on splitting and reconstructed on joining.