Welcome, guest | Sign In | My Account | Store | Cart
import re
import sys
import string
import os.path
import time

"""
Reads the content of the two md5 files in two lists.

The lists content 'll be [md5, path, filename] :
* md5 : the md5, untouched
* path : the path, normalized (only forward slashes), filtered with the selected regular expression
* filename : the filename, untouched
"""

# re_compiled = re.compile('^\.svn|/\.svn')     # re to exclude all .svn directory (subversion administrative dirs)
re_compiled = re.compile('^$')     # re to exclude nothing

list_tmp = [line[:-1] for line in open(sys.argv[1],mode='rU').readlines()]      # reading first file
list_one = []    # initializing the first list
for x in list_tmp:      # writing the first list
    if x[0:1] <> '#' and x[0:1] <> ';' and x.strip() <> '' :   # skip comment lines (starting with '#' or ';') and empty lines
        str_md5 = x[:32]    # extract the md5
        str_path = string.replace(os.path.dirname(x[34:]),'\\','/')    # extract the path
        if str_path[0:1] == '/' : str_path = str_path [1:]    # remove the trailing '/' from the path
        str_filename = os.path.basename(x[34:])    # extract the filename
        if not re_compiled.search(str_path):    # exclude paths matching the re
            list_one.append((str_md5, str_path, str_filename))

list_tmp = [line[:-1] for line in open(sys.argv[2],mode='rU').readlines()]      # reading second file
list_two = []    # initializing the first list
for x in list_tmp:      # writing the first list
    if x[0:1] <> '#' and x[0:1] <> ';' and x.strip() <> '' and not re_compiled.search(x):   # skip comment lines (starting with '#' or ';') and empty lines
        str_md5 = x[:32]    # extract the md5
        str_path = string.replace(os.path.dirname(x[34:]),'\\','/')    # extract the path
        if str_path[0:1] == '/' : str_path = str_path [1:]    # remove the trailing '/' from the path
        str_filename = os.path.basename(x[34:])    # extract the filename
        if not re_compiled.search(str_path):    # exclude paths matching the re
            list_two.append((str_md5, str_path, str_filename))

list_tmp = []   # erasing the temp list

"""
Diff the two lists, obtaining two list 'list_xxx_diff' (md5, flag, path, filename) :
* flag :
** '==' equal (md5 =, path =, filename =)
** '<>' different (md5 <>, path =, filename =)
** '>>' new dx (md5 n/a, path <>, filename <>)
** '<<' new sx (md5 n/a, path <>, filename <>)
** 'm>' moved dx (md5 =, path <>, filename =)
** '<m' moved sx (md5 =, path <>, filename =)
** 'r>' renamed dx (md5 =, path =, filename <>)
** '<r' renamed sx (md5 =, path =, filename <>)
* first md5 : the md5 of the first md5 file
* second md5 : the md5 of the second md5 file
* path : path
* filename : filename
"""
# creating the two list containing the result of the diff'ing
list_one_diff = []
list_two_diff = []

# searching for '==' equal (md5 =, path =, filename =)
for x in range(len(list_one)):
    item_one = (list_one[x][0], list_one[x][1],list_one[x][2])
    for y in range(len(list_two)):
        item_two = (list_two[y][0], list_two[y][1],list_two[y][2])
        if item_one == item_two:
            list_one_diff.append([list_one[x][0], '==', list_one[x][1], list_one[x][2]])   # write in the first diff'ing result list
            list_two_diff.append([list_two[y][0], '==', list_two[y][1], list_two[y][2]])   # write in the second diff'ing result list
            list_one[x]=[]   # mark the first list element for removing
            del list_two[y]   # remove the current item from the second list
            break   # return to the upper for loop

list_one = [x for x in list_one if x <> []]   # remove marked items from the first list


# searching for '<>' different (md5 <>, path =, filename =)
for x in range(len(list_one)):
    item_one = (list_one[x][1],list_one[x][2])
    for y in range(len(list_two)):
        item_two = (list_two[y][1],list_two[y][2])
        if item_one == item_two:
            list_one_diff.append([list_one[x][0], '<>', list_one[x][1], list_one[x][2]])   # write in the first diff'ing result list
            list_two_diff.append([list_two[y][0], '<>', list_two[y][1], list_two[y][2]])   # write in the second diff'ing result list
            list_one[x]=[]   # mark the first list element for removing
            del list_two[y]   # remove the current item from the second list
            break   # return to the upper for loop

list_one = [x for x in list_one if x <> []]   # remove marked items from the first list


# searching for 'm>' moved dx and '<m' moved sx (md5 =, path <>, filename =)
for x in range(len(list_one)):
    item_one = (list_one[x][0],list_one[x][2])
    for y in range(len(list_two)):
        item_two = (list_two[y][0],list_two[y][2])
        if item_one == item_two:
            list_one_diff.append([list_one[x][0], '<m', list_one[x][1], list_one[x][2]])   # write in the first diff'ing result list
            list_two_diff.append([list_two[y][0], 'm>', list_two[y][1], list_two[y][2]])   # write in the second diff'ing result list
            list_one[x]=[]   # mark the first list element for removing
            del list_two[y]   # remove the current item from the second list
            break   # return to the upper for loop

list_one = [x for x in list_one if x <> []]   # remove marked items from the first list


# searching for 'r>' renamed dx and '<r' renamed sx (md5 =, path =, filename <>)
for x in range(len(list_one)):
    item_one = (list_one[x][0],list_one[x][1])
    for y in range(len(list_two)):
        item_two = (list_two[y][0],list_two[y][1])
        if item_one == item_two:
            list_one_diff.append([list_one[x][0], '<r', list_one[x][1], list_one[x][2]])   # write in the first diff'ing result list
            list_two_diff.append([list_two[y][0], 'r>', list_two[y][1], list_two[y][2]])   # write in the second diff'ing result list
            list_one[x]=[]   # mark the first list element for removing
            del list_two[y]   # remove the current item from the second list
            break   # return to the upper for loop

list_one = [x for x in list_one if x <> []]   # remove marked items from the first list


# searching for '>>' new dx and '<<' new sx (md5 n/a, path <>, filename <>)
for x in range(len(list_one)):
    list_one_diff.append([list_one[x][0], '<<', list_one[x][1], list_one[x][2]])   # write in the first diff'ing result list

for y in range(len(list_two)):
    list_two_diff.append([list_two[y][0], '>>', list_two[y][1], list_two[y][2]])   # write in the second diff'ing result list


"""
Printing the diff'ing list and some stats
"""

# printing the first diff'ed md5
print "#\n# diff'ed md5 '" + sys.argv[1] +"' (" +time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime()) + ")\n#"  # prints the header of the first diff'ed md5 file

for x in range(len(list_one_diff)):   # loops on the first diff'ing list, and print each element
    print list_one_diff[x][0] + list_one_diff[x][1] + list_one_diff[x][2] + '/' + list_one_diff[x][3]


# printing the second diff'ed md5
print "\n\n#\n# diff'ed md5 '" + sys.argv[2] +"' (" +time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime()) + ")\n#"  # prints the header of the second diff'ed md5 file

for x in range(len(list_two_diff)):   # loops on the first diff'ing list, and print each element
    print list_two_diff[x][0] + list_two_diff[x][1] + list_two_diff[x][2] + '/' + list_two_diff[x][3]


# printing stats of the first list
list_stats = [x for (a,x,b,c) in list_one_diff]    # create a list containing only flag from the first diff list, used to create stats
print "\n\n\n#   *** stats of '" + sys.argv[1] +"' ***\n#"   # print stats header
print '#   ==  equal      ', list_stats.count('==')
print '#   <>  different  ', list_stats.count('<>')
print '#   <<  new sx     ', list_stats.count('<<')
print '#   <r  renamed sx ', list_stats.count('<r')
print '#   <m  moved sx   ', list_stats.count('<m')
print '#   --  total      ', len(list_stats)


# printing stats of the second list
list_stats = [x for (a,x,b,c) in list_two_diff]    # create a list containing only flag from the second diff list, used to create stats
print "\n\n#   *** stats of '" + sys.argv[2] +"' ***\n#"   # print stats header
print '#   ==  equal      ', list_stats.count('==')
print '#   <>  different  ', list_stats.count('<>')
print '#   >>  new dx     ', list_stats.count('>>')
print '#   r>  renamed dx ', list_stats.count('r>')
print '#   m>  moved dx   ', list_stats.count('m>')
print '#   --  total      ', len(list_stats)

History

  • revision 5 (19 years ago)
  • previous revisions are not available