#!/usr/bin/env python """Find duplicate file names. Command line options: h - show help on usage s - compare file sizes n <text> - restrict to names containing text r <regex> - restrict to names containing regex match (overrides -n) The non-option parameters, if specified, are used as search path. Otherwise, current directory is used. """ import getopt import os import os.path import re import sys def addIfFile(allfiles, dirname, file): if os.path.isfile(os.path.join(dirname, file)): if file in allfiles: allfiles[file].append(dirname) else: allfiles[file] = [dirname] def checkdup(allfiles, dirname, files): for n in files: addIfFile(allfiles, dirname, n) class CheckdupName: def __init__(self, name): self.__name = name def __call__(self, allfiles, dirname, files): for n in files: if self.__name in n: addIfFile(allfiles, dirname, n) class CheckdupRegex: def __init__(self, pattern): self.__re = re.compile(pattern) def __call__(self, allfiles, dirname, files): for n in files: if self.__re.search(n): addIfFile(allfiles, dirname, n) class HelpException(Exception): pass def printDupNames(duplist): for n, d in duplist: for dd in d: pj = os.path.normpath(os.path.join(dd, n)) print pj print def printDupNameSizes(duplist): for n, d in duplist: szgroups = {} for dd in d: pj = os.path.normpath(os.path.join(dd, n)) sz = os.stat(pj).st_size if sz in szgroups: szgroups[sz].append(pj) else: szgroups[sz] = [pj] for sz, g in szgroups.iteritems(): if len(g) > 1: for n in g: print n print def main(argv): optlist, args = getopt.getopt(argv, "hsn:r:") visit = checkdup prndup = printDupNames for o, a in optlist: if o == "-h": raise HelpException() if o == "-s": prndup = printDupNameSizes if o == "-n": visit = CheckdupName(a) if o == "-r": visit = CheckdupRegex(a) paths = ["."] if args: paths = args allfiles = {} for path in paths: os.path.walk(path, visit, allfiles) duplist = [x for x in allfiles.iteritems() if len(x[1])>1] duplist.sort() prndup(duplist) if __name__ == "__main__": try: main(sys.argv[1:]) except getopt.GetoptError, e: print >> sys.stderr, e print >> sys.stderr, "Try '%s -h' for help." % sys.argv[0] raise SystemExit(2) except re.error, e: print >> sys.stderr, "Malformed regex pattern:" print >> sys.stderr, e raise SystemExit(2) except HelpException, e: print "Usage: %s [options] [path [path ...]]" % sys.argv[0] print print __doc__