import sys import os OUTFIL_PREFIX = "out_" def make_out_filename(prefix, idx): '''Make a filename with a serial number suffix.''' return prefix + str(idx).zfill(4) def split(in_filename, lines_per_file): '''Split the input file in_filename into output files of lines_per_file lines each. Last file may have less lines.''' in_fil = open(in_filename, "r") outfil_idx = 1 out_filename = make_out_filename(OUTFIL_PREFIX, outfil_idx) out_fil = open(out_filename, "w") # Using chain assignment feature of Python. line_count = tot_line_count = file_count = 0 # Loop over the input and split it into multiple files. # A text file is an iterable sequence, from Python 2.2, # so the for line below works. for lin in in_fil: # Bump vars; change to next output file. if line_count >= lines_per_file: tot_line_count += line_count line_count = 0 file_count += 1 out_fil.close() outfil_idx += 1 out_filename = make_out_filename(OUTFIL_PREFIX, outfil_idx) out_fil = open(out_filename, "w") line_count += 1 out_fil.write(lin) in_fil.close() out_fil.close() sys.stderr.write("Output is in file(s) with prefix {}\n".format(OUTFIL_PREFIX)) def usage(): sys.stderr.write( "Usage: {} in_filename lines_per_file\n".format(sys.argv[0])) def main(): if len(sys.argv) != 3: usage() sys.exit(1) try: # Get and validate in_filename. in_filename = sys.argv[1] # If input file does not exist, exit. if not os.path.exists(in_filename): sys.stderr.write("Error: Input file '{}' not found.\n".format(in_filename)) sys.exit(1) # If input is empty, exit. if os.path.getsize(in_filename) == 0: sys.stderr.write("Error: Input file '{}' has no data.\n".format(in_filename)) sys.exit(1) # Get and validate lines_per_file. lines_per_file = int(sys.argv[2]) if lines_per_file <= 0: sys.stderr.write("Error: lines_per_file cannot be less than or equal to 0.\n") sys.exit(1) # If all checks pass, split the file. split(in_filename, lines_per_file) except ValueError as ve: sys.stderr.write("Caught ValueError: {}\n".format(repr(ve))) except IOError as ioe: sys.stderr.write("Caught IOError: {}\n".format(repr(ioe))) except Exception as e: sys.stderr.write("Caught Exception: {}\n".format(repr(e))) raise if __name__ == '__main__': main()