Welcome, guest | Sign In | My Account | Store | Cart
"""
This is a bit of an experiment with a technique of applying filters to lists of
files.

The base functions are IterFiles which is a simple wrapper around os.walk and
then Filter which is used to create filter functions.

Whitelist and Blacklist are then examples of filters created using the Filter 
function as a decorator

As a demonstration of creating filters on the fly, HasMode is then created in 
the __main__ section
"""

import os
import fnmatch
import time

__author__ = 'Eysteinn Kristinsson <eysispeisi@gmail.com>'

def IterFiles(folder, **extraWalkArgs):
    '''
    A simple wrapper around os.walk that returns file paths.
    **extraWalkArgs are passed to os.walk if you want to change the defaults 
    there.
    '''
    for root, dirs, files in os.walk(folder, **extraWalkArgs):
        for fileName in files:
            yield os.path.join(root, fileName)

def Filter(func):
    '''
    This is the filter creator function, you can also use it as a decorator.

    usage: Filter(function, *args **keywordArgs)
    args and keywordArgs are automatically passed to the function during 
    iteration.

    The function passed to it must take a valid file path as a first argument
    example:
        @Filter
        def MinSize(file, minsize):
            return os.path.getsize(file) >= minsize:
        # Now you have created a filter that can take a list of files and a
        # minsize argument and apply the minsize condition to the files list
        # Print files in '.' that are 1MB or larger
        for file in MinSize(os.listdir('.'), 1024*1024):
            print file
    '''
    def wrapper(files, *a, **kw):
        for file in files:
            if func(file, *a, **kw):
                yield file
    return wrapper

@Filter
def Whitelist(file, patterns):
    for pat in patterns:
        if fnmatch.fnmatch(file, pat):
            return True
    return False

@Filter
def Blacklist(file, patterns):
    for pat in patterns:
        if fnmatch.fnmatch(file, pat):
            return False
    return True

if __name__ == '__main__':
    folder = '.' # folder to process

    # get an iterator of all files under <folder>
    files = IterFiles(folder)
    # apply a whitelist to the files iterator
    wfiles = Whitelist(files, ('*.py', '*.txt'))
    # apply a blacklist to the whitelisted-files iterator
    bfiles = Blacklist(wfiles, ('*/__init__.py','*/*test*.py'))
    print 'whitelist/blacklist test'
    print bfiles # prints a generator object as we haven't iterated over it yet
    for file in bfiles: # iterate and print results
        print '  ', file

    # Filters can also constructed on the fly
    # the HasMode function constructed here checks the file mode, it filters
    # out all files that don't have the mode you pass into it.
    import stat # for permission constants
    HasMode = Filter(lambda file, mode: os.stat(file).st_mode & mode == mode)
    print 'files others have read and write access to'
    mode = stat.S_IROTH|stat.S_IWOTH
    for file in HasMode(IterFiles(folder), mode):
        print '  ', file
    print

    # just to state the obvious, you don't have to use IterFilesInFolder to get
    # a list of files to feed a filter, just something that is iterable but 
    # contains actual valid file paths
    files = os.listdir('.')
    print 'python files in current dir'
    for file in Whitelist(files, ('*.py',)):
        print '  ', file
    print

Diff to Previous Revision

--- revision 1 2012-03-20 19:27:24
+++ revision 2 2012-03-20 21:17:26
@@ -40,8 +40,7 @@
     example:
         @Filter
         def MinSize(file, minsize):
-            if os.path.getsize(file) >= minsize:
-                yield file
+            return os.path.getsize(file) >= minsize:
         # Now you have created a filter that can take a list of files and a
         # minsize argument and apply the minsize condition to the files list
         # Print files in '.' that are 1MB or larger

History