Welcome, guest | Sign In | My Account | Store | Cart

This is a recipe to compare any two files via a Python command-line program. It is like a basic version of the cmp command of Unix or the fc.exe (file compare) command of Windows.

Python, 99 lines
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# file_compare.py
# A simple file comparison utility.
# Author: Vasudev Ram
# Copyright 2016 Vasudev Ram

import sys
import os
from os.path import exists, getsize

def out_write(msg):
    sys.stdout.write(msg)

def err_write(msg):
    sys.stderr.write(msg)

def usage():
    err_write("Usage: {} file_a file_b\n".format(sys.argv[0]))

def file_object_compare(in_fil_a, in_fil_b):
    '''Logic: Assume files are equal to start with.
    Read both files, character by character.
    Compare characters at corresponding byte offsets. 
    If any pair at the same offset don't match, the files 
    are unequal. If we reach the end of the files, and 
    there was no mismatch, the files are equal.  We do not
    check for one file being a strict subset of the other, 
    because we only enter this function if the files are 
    of the same size.'''

    files_are_equal = True
    pos = 0
    while True:
        ca = in_fil_a.read(1)
        if ca == '':
            break
        cb = in_fil_b.read(1)
        if cb == '':
            break
        if ca != cb:
            files_are_equal = False
            break
        pos += 1
        if pos % 10000 == 0:
            print pos, 

    if files_are_equal:
        return (True, None)
    else:
        return (False, "files differ at byte offset {}".format(pos))

def file_compare(in_filename_a, in_filename_b):
    '''Compare the files in_filename_a and in_filename_b.
    If their contents are the same, return (True, None).
    else return (False, "[reason]"), where [reason] 
    is the reason why they are different, as a string.
    Reasons could be: file sizes differ or file contents differ.'''

    if getsize(in_filename_a) != getsize(in_filename_b):
        return (False, "file sizes differ")
    else:
        in_fil_a = open(in_filename_a, "rb")
        in_fil_b = open(in_filename_b, "rb")
        result = file_object_compare(in_fil_a, in_fil_b)
        in_fil_a.close()
        in_fil_b.close()
        return result
        
def main():
    if len(sys.argv) != 3:
        usage()
        sys.exit(1)

    try:
        # Get the input filenames.
        in_filename_a, in_filename_b = sys.argv[1:3]
        # Check they exist.
        for in_filename in (in_filename_a, in_filename_b):
            if not exists(in_filename):
                err_write(
                    "Error: Input file '{}' not found.\n".format(in_filename))
                sys.exit(1)
        # Don't allow comparing a file with itself.
        if in_filename_a == in_filename_b:
            out_write("No sense comparing {} against itself.".format(in_filename_a))
            sys.exit(0)
        # Compare the files.
        result = file_compare(in_filename_a, in_filename_b)
        if result[0]:
            out_write("Files compare equal.")
        else:
            out_write("Files compare unequal: {}".format(result[1]))
        sys.exit(0)
    except IOError as ioe:
        sys.stderr.write("Caught IOError: {}\n".format(str(ioe)))
    except Exception as e:
        sys.stderr.write("Caught Exception: {}\n".format(str(e)))

if __name__ == '__main__':
    main()

The program should work on both Linux and Windows, since it does not have any OS-specific code.

More details and sample input and output at this URL:

http://jugad2.blogspot.in/2016/03/a-basic-file-compare-utility-in-python.html