""" FileSplitter - Simple Python file split/concat module.
What it does
-==========-
1. Split a text/binary file into equal sized chunks
and save them separately.
2. Concat existing chunks and recreate
original file.
Author: Anand Pillai
Copyright : None, (Public Domain)
"""
import os, sys
class FileSplitterException(Exception):
def __init__(self, value):
self.value = value
def __str__(self):
return str(self.value)
def usage():
return """\nUsage: FileSplitter.py -i -n [option]\n
Options:\n
-s, --split Split file into chunks
-j, --join Join chunks back to file.
"""
class FileSplitter:
""" File splitter class """
def __init__(self):
# cache filename
self.__filename = ''
# number of equal sized chunks
self.__numchunks = 5
# Size of each chunk
self.__chunksize = 0
# Optional postfix string for the chunk filename
self.__postfix = ''
# Program name
self.__progname = "FileSplitter.py"
# Action
self.__action = 0 # split
def parseOptions(self, args):
import getopt
try:
optlist, arglist = getopt.getopt(args, 'sji:n:', ["split=", "join="])
except getopt.GetoptError, e:
print e
return None
for option, value in optlist:
if option.lower() in ('-i', ):
self.__filename = value
elif option.lower() in ('-n', ):
self.__numchunks = int(value)
elif option.lower() in ('-s', '--split'):
self.__action = 0 # split
elif option.lower() in ('-j', '--join'):
self.__action = 1 # combine
if not self.__filename:
sys.exit("Error: filename not given")
def do_work(self):
if self.__action==0:
self.split()
elif self.__action==1:
self.combine()
else:
return None
def split(self):
""" Split the file and save chunks
to separate files """
print 'Splitting file', self.__filename
print 'Number of chunks', self.__numchunks, '\n'
try:
f = open(self.__filename, 'rb')
except (OSError, IOError), e:
raise FileSplitterException, str(e)
bname = (os.path.split(self.__filename))[1]
# Get the file size
fsize = os.path.getsize(self.__filename)
# Get size of each chunk
self.__chunksize = int(float(fsize)/float(self.__numchunks))
chunksz = self.__chunksize
total_bytes = 0
for x in range(self.__numchunks):
chunkfilename = bname + '-' + str(x+1) + self.__postfix
# if reading the last section, calculate correct
# chunk size.
if x == self.__numchunks - 1:
chunksz = fsize - total_bytes
try:
print 'Writing file',chunkfilename
data = f.read(chunksz)
total_bytes += len(data)
chunkf = file(chunkfilename, 'wb')
chunkf.write(data)
chunkf.close()
except (OSError, IOError), e:
print e
continue
except EOFError, e:
print e
break
print 'Done.'
def sort_index(self, f1, f2):
index1 = f1.rfind('-')
index2 = f2.rfind('-')
if index1 != -1 and index2 != -1:
i1 = int(f1[index1:len(f1)])
i2 = int(f2[index2:len(f2)])
return i2 - i1
def combine(self):
""" Combine existing chunks to recreate the file.
The chunks must be present in the cwd. The new file
will be written to cwd. """
import re
print 'Creating file', self.__filename
bname = (os.path.split(self.__filename))[1]
bname2 = bname
# bugfix: if file contains characters like +,.,[]
# properly escape them, otherwise re will fail to match.
for a, b in zip(['+', '.', '[', ']','$', '(', ')'],
['\+','\.','\[','\]','\$', '\(', '\)']):
bname2 = bname2.replace(a, b)
chunkre = re.compile(bname2 + '-' + '[0-9]+')
chunkfiles = []
for f in os.listdir("."):
print f
if chunkre.match(f):
chunkfiles.append(f)
print 'Number of chunks', len(chunkfiles), '\n'
chunkfiles.sort(self.sort_index)
data=''
for f in chunkfiles:
try:
print 'Appending chunk', os.path.join(".", f)
data += open(f, 'rb').read()
except (OSError, IOError, EOFError), e:
print e
continue
try:
f = open(bname, 'wb')
f.write(data)
f.close()
except (OSError, IOError, EOFError), e:
raise FileSplitterException, str(e)
print 'Wrote file', bname
def main():
import sys
if len(sys.argv)<2:
sys.exit(usage())
fsp = FileSplitter()
fsp.parseOptions(sys.argv[1:])
fsp.do_work()
if __name__=="__main__":
main()