A little script to remove duplicate files. Uses md5sum and a dictionary. There may be a shorter way to do it but this was simple. Works only on cygwin/Linux/Unix systems.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | import os, sys
# get checksums this may take a while
print "Collecting checksums..."
stdin, stdout = os.popen2("md5sum *.txt")
sums = stdout.readlines()
# sorting files
print "Sorting files..."
ls = {}
for s in sums:
md5, file = s.split()
# remove the stupid asterisk
file = file[1:]
if md5 in ls:
ls[md5].append(file)
else:
ls[md5] = [file]
print "Deleting dupes..."
n = 0
for md5 in ls:
for file in ls[md5][1:]:
os.remove(file)
n += 1
print "Operation complete. %d files removed." % n
|
Tags: files