Welcome, guest | Sign In | My Account | Store | Cart

This recipe shows how to create a simple binary file split utility in Python.

It takes two command line arguments: 1) the name of the input file to split , 2) the number of bytes per file into which to split the input.

Python, 89 lines
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import sys
import os

OUTFIL_PREFIX = "out_"

def error_exit(message, code=1):
    sys.stderr.write("Error:\n{}".format(str(message)))
    sys.exit(code)

def err_write(message):
    sys.stderr.write(message)

def make_out_filename(prefix, idx):
    '''Make a filename with a serial number suffix.'''
    return prefix + str(idx).zfill(4)

def bsplit(in_filename, bytes_per_file):
    '''Split the input file in_filename into output files of 
    bytes_per_file bytes each. Last file may have less bytes.'''

    in_fil = open(in_filename, "rb")
    outfil_idx = 1
    out_filename = make_out_filename(OUTFIL_PREFIX, outfil_idx)
    out_fil = open(out_filename, "wb")

    byte_count = tot_byte_count = file_count = 0
    c = in_fil.read(1)

    # Loop over the input and split it into multiple files 
    # of bytes_per_file bytes each (except possibly for the 
    # last file, which may have less bytes.
    while c != '':
        byte_count += 1
        out_fil.write(c)
        # Bump vars; change to next output file.
        if byte_count >= bytes_per_file:
            tot_byte_count += byte_count
            byte_count = 0
            file_count += 1
            out_fil.close()
            outfil_idx += 1
            out_filename = make_out_filename(OUTFIL_PREFIX, outfil_idx)
            out_fil = open(out_filename, "wb")
        c = in_fil.read(1)
    # Clean up.
    in_fil.close()
    if not out_fil.closed:
        out_fil.close()
    if byte_count == 0:
        os.remove(out_filename)
        
def usage():
    err_write(
    "Usage: [ python ] {} in_filename bytes_per_file\n".format(
        sys.argv[0]))
    err_write(
    "splits in_filename into files with bytes_per_file bytes\n".format(
        sys.argv[0]))

def main():

    if len(sys.argv) != 3:
        usage()
        sys.exit(1)

    try:
        # Do some checks on arguments.
        in_filename = sys.argv[1]
        if not os.path.exists(in_filename):
            error_exit(
            "Input file '{}' not found.\n".format(in_filename))
        if os.path.getsize(in_filename) == 0:
            error_exit(
            "Input file '{}' has no data.\n".format(in_filename))
        bytes_per_file = int(sys.argv[2])
        if bytes_per_file <= 0:
            error_exit(
            "bytes_per_file cannot be less than or equal to 0.\n")
        # If all checks pass, split the file.
        bsplit(in_filename, bytes_per_file) 
    except ValueError as ve:
        error_exit(str(ve))
    except IOError as ioe:
        error_exit(str(ioe))
    except Exception as e:
        error_exit(str(e))

if __name__ == '__main__':
    main()

It has no OS-specific code, so should work on Windows, Linux and Mac. Tested on Windows as of now.

1 comment