A little script to remove duplicate files. Uses md5sum and a dictionary. There may be a shorter way to do it but this was simple. Works only on cygwin/Linux/Unix systems.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | import os, sys
# get checksums this may take a while
print "Collecting checksums..."
stdin, stdout = os.popen2("md5sum *.txt")
sums = stdout.readlines()
# sorting files
print "Sorting files..."
ls = {}
for s in sums:
	md5, file = s.split()
	# remove the stupid asterisk
	file = file[1:]
	if md5 in ls:
		ls[md5].append(file)
	else:
		ls[md5] = [file]
		
print "Deleting dupes..."
n = 0
for md5 in ls:
	for file in ls[md5][1:]:
		os.remove(file)
		n += 1
print "Operation complete. %d files removed." % n
 | 
    Tags: files
  
  
      
 Download
Download Copy to clipboard
Copy to clipboard