Welcome, guest | Sign In | My Account | Store | Cart
#!/usr/bin/env python
# mixnmatch.py - find combination of files/dirs that sum below a given threshold
# -- Jose Fonseca

import os
import os.path
import optparse
import sys

from sets import ImmutableSet as set


def get_size(path, block_size):
    if os.path.isdir(path):
        result = 0
        for name in os.listdir(path):
            size = get_size(os.path.join(path, name), block_size)
            size = (size + block_size - 1)//block_size*block_size
            result += size
        return result
    else:
        return os.path.getsize(path)


def mix_and_match(limit, items, verbose = False):

    # filter items
    items = [(size, name) for size, name in items if size <= limit]
    # sort them by size
    items.sort(lambda (xsize, xname), (ysize, yname): cmp(xsize, ysize))
    
    # initialize variables
    added_collections = dict([(set([name]), size) for size, name in items])
    collections = added_collections

    while True:
        if verbose:
            sys.stderr.write("%d\n" % len(collections))
        
        # find unique combinations of the recent collections 
        new_collections = {}
        for names1, size1 in added_collections.iteritems():
            for size2, name2 in items:
                size3 = size1 + size2
                if size3 > limit:
                    # we can break here as all collections that follow are
                    #  bigger in size due to the sorting above
                    break
                if name2 in names1:
                    continue
                names3 = names1.union(set([name2]))
                if names3 in new_collections:
                    continue
                new_collections[names3] = size3
    
        if len(new_collections) == 0:
            break
        
        collections.update(new_collections)
        added_collections = new_collections

    return [(size, names) for names, size in collections.iteritems()]


def main():
    parser = optparse.OptionParser(usage="\n\t%prog [options] path ...")
    parser.add_option(
        '-l', '--limit', 
        type="int", dest="limit", default=4700000000, 
        help="total size limit")
    parser.add_option(
        '-B', '--block-size', 
        type="int", dest="size", default=2048, 
        help="use this block size")
    parser.add_option(
        '-s', '--show', 
        type="int", dest="show", default=10, 
        help="number of combinations to show")
    parser.add_option(
        '-v', '--verbose', 
        action="store_true", dest="verbose", default=False, 
        help="verbose output")
    (options, args) = parser.parse_args(sys.argv[1:])

    limit = options.limit
    block_size = options.size
    
    items = [(get_size(arg, block_size), arg) for arg in args]
    
    collections = mix_and_match(limit, items, options.verbose)
    collections.sort(lambda (xsize, xnames), (ysize, ynames): cmp(xsize, ysize))
    if options.show != 0:
        collections = collections[-options.show:]
    
    for size, names in collections:
        percentage = 100.0*float(size)/float(limit)
        try:
            sys.stdout.write("%10d\t%02.2f%%\t%s\n" % (size, percentage, " ".join(names)))
        except IOError:
            # ignore broken pipe
            pass 


if __name__ == '__main__':
    main()

History

  • revision 2 (18 years ago)
  • previous revisions are not available